mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	merging with trunk
This commit is contained in:
		
						commit
						07e888f764
					
				
							
								
								
									
										145
									
								
								Changelog.yaml
									
									
									
									
									
								
							
							
						
						
									
										145
									
								
								Changelog.yaml
									
									
									
									
									
								
							@ -4,6 +4,151 @@
 | 
				
			|||||||
# for important features/bug fixes.
 | 
					# for important features/bug fixes.
 | 
				
			||||||
# Also, each release can have new and improved recipes.
 | 
					# Also, each release can have new and improved recipes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- version: 0.6.44
 | 
				
			||||||
 | 
					  date: 2010-03-05
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  new features:
 | 
				
			||||||
 | 
					    - title: "Experimental support for conversion of CHM files"
 | 
				
			||||||
 | 
					      type: major
 | 
				
			||||||
 | 
					      description : >
 | 
				
			||||||
 | 
					        "Conversion and reading of metadata from CHM files is now supported. This feature is
 | 
				
			||||||
 | 
					        still experimental, with more testing needed. Building from source on linux now 
 | 
				
			||||||
 | 
					        requires chmlib."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Experimental support for fetching annotations from the Kindles"
 | 
				
			||||||
 | 
					      type: major
 | 
				
			||||||
 | 
					      description: >
 | 
				
			||||||
 | 
					        "calibre can now fetch annotations from your kindle and put them into the
 | 
				
			||||||
 | 
					        comments field. To fetch annotations, click the arrow next to the 
 | 
				
			||||||
 | 
					        'send to device' button and select 'Fetch Annotations', with your Kindle
 | 
				
			||||||
 | 
					        connected."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Support FreeBSD out of the box (except USB)"
 | 
				
			||||||
 | 
					      type: major
 | 
				
			||||||
 | 
					      tickets: [4715]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "News download scheduler: Don't try to download news when no active internet connection is present (linux/windows only)"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "EPUB to WPUB conversion: Preserve font encryption"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "calibre-server: Add --pidfile and --daemonize (unix only) options"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Plugins: When loading a plugin zip file extract to temp dir and add to sys.path, if the zip file contains binay code (pyd/dll/so/dylib), instead of just adding the zip file to the path, as python cannot load compiled code from a zip file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bug fixes:
 | 
				
			||||||
 | 
					    - title: "Ebook-viewer: Handle non-ascii CSS files when doing font substitutions"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Conversion pipline: Ignore non-integral play orders when parsing NCX files"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "When decoding NCX toc files, if no encoding is declared and detection has less that 100% confidence, assume UTF-8."
 | 
				
			||||||
 | 
					      tickets: [5039]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "PML chapter definitions missing from toc.ncx"
 | 
				
			||||||
 | 
					      tickets: [4990]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Unicode string for cover causes calibredb --output-format stanza to fail"
 | 
				
			||||||
 | 
					      ticket: [5035]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Search cover:False fails, cover:True succeeds"
 | 
				
			||||||
 | 
					      tickets: [5034]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Plugins: correctly use context"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "MOBI Input: Don't lose cover if it is also referred to in main text"
 | 
				
			||||||
 | 
					      ticket: [5020]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "RTF Output: Don't choke on PNG images"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  new recipes:
 | 
				
			||||||
 | 
					    - title: Journal of Hospital Medicine, San Francisco Bay Guardian, Smithsonian Magazine
 | 
				
			||||||
 | 
					      author: Krittika Goyal
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: Astronomy Pick of the Day, Epicurious
 | 
				
			||||||
 | 
					      author: Starson17
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: Diario Vasco, Various Chilean newspapers
 | 
				
			||||||
 | 
					      author: Darko Miletic
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: Kukuburi
 | 
				
			||||||
 | 
					      author: Mori
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  improved recipes:
 | 
				
			||||||
 | 
					    - Ars Technica
 | 
				
			||||||
 | 
					    - Fudzilla
 | 
				
			||||||
 | 
					    - The Atlantic
 | 
				
			||||||
 | 
					    - The Economist
 | 
				
			||||||
 | 
					    - Huffington Post
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- version: 0.6.43
 | 
				
			||||||
 | 
					  date: 2010-02-26
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  new features:
 | 
				
			||||||
 | 
					    - title: "Support for the Teclast K3 and Elonex e-book readers"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Add 'Recently Read' category to catalog if Kindle is connected when catalog is generated"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "When adding PRC/MOBI files that are actually Topaz files, change detected file type to Topaz"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "MOBI Output: If the SVG rasterizer is not avaialbale continue anyway"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "News download: When using the debug pipeline options, create a zip file named periodical.downloaded_recipe in the debug directory. This can be passed to ebook-convert to directly convert a previous download into an e-book."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Add Apply button to catalog generation dialog"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bug fixes:
 | 
				
			||||||
 | 
					    - title: "When fetching metadata in the edit metadata dialog, use a python thread instead of a Qt thread. Hopefully this will fix the reports of crashes when fetching metadata"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Refresh cover browser when a cover is changed via the edit meta information dialog"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "More device detection debug output on OS X"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Download only covers should not also set social metadata"
 | 
				
			||||||
 | 
					      tickets: [4966]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Content server: If fail to bind to 0.0.0.0 try detecting and binding only to interface used for outgoing traffic"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Handle poorly designed import plugins that return None on error"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Move logic for removing inline navbars out of the BasicNewsRecipe class"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "MOBI metadata: When setting title, set in both PalmDoc and EXTH headers"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "MOBI metadata: Do not try to extarct embedded metadata from MOBI files larger than 4MB"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Handle PDB files that contain PDF files"
 | 
				
			||||||
 | 
					      tickets: [4971]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "PML Input: Various fixes"
 | 
				
			||||||
 | 
					      tickets: [4959,4961]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Fix reading MOBI metadata from files in zip/rar archives"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Make extracting single files from RAR archives more efficient"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "No longer need Qt to generate default cover for news downloads"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Catalog generation: fix for EPUB anchors beginning with numbers in Recently Added"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    - title: "Searching: Handle uppercase keywords correctly"
 | 
				
			||||||
 | 
					      tickets: [4951]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  new recipes:
 | 
				
			||||||
 | 
					    - title: Gamasutra
 | 
				
			||||||
 | 
					      author: Darko Miletic
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  improved recipes:
 | 
				
			||||||
 | 
					    - "Strategy+Business"
 | 
				
			||||||
 | 
					    - Arizona Daily Star
 | 
				
			||||||
 | 
					    - Heise
 | 
				
			||||||
 | 
					    - New Scientist
 | 
				
			||||||
 | 
					    - Various Serbian news feeds
 | 
				
			||||||
 | 
					    - Houston and San Francisco Chronicles
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- version: 0.6.42
 | 
					- version: 0.6.42
 | 
				
			||||||
  date: 2010-02-20
 | 
					  date: 2010-02-20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -79,9 +79,24 @@ p.unread_book {
 | 
				
			|||||||
	text-indent:-2em;
 | 
						text-indent:-2em;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p.date_read {
 | 
				
			||||||
 | 
						text-align:left;
 | 
				
			||||||
 | 
						margin-top:0px;
 | 
				
			||||||
 | 
						margin-bottom:0px;
 | 
				
			||||||
 | 
						margin-left:6em;
 | 
				
			||||||
 | 
						text-indent:-6em;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
hr.series_divider {
 | 
					hr.series_divider {
 | 
				
			||||||
	width:50%;
 | 
						width:50%;
 | 
				
			||||||
	margin-left:1em;
 | 
						margin-left:1em;
 | 
				
			||||||
	margin-top:0em;
 | 
						margin-top:0em;
 | 
				
			||||||
	margin-bottom:0em;
 | 
						margin-bottom:0em;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					hr.annotations_divider {
 | 
				
			||||||
 | 
						width:50%;
 | 
				
			||||||
 | 
						margin-left:1em;
 | 
				
			||||||
 | 
						margin-top:0em;
 | 
				
			||||||
 | 
						margin-bottom:0em;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
				
			|||||||
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 116 KiB After Width: | Height: | Size: 124 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/diariovasco.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/diariovasco.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 766 B  | 
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/gamasutra_fa.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/gamasutra_fa.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 956 B  | 
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/gamasutra_news.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/gamasutra_news.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 956 B  | 
@ -1,7 +1,6 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
24sata.rs
 | 
					24sata.rs
 | 
				
			||||||
@ -9,7 +8,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
from calibre.ebooks.BeautifulSoup import Tag
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Ser24Sata(BasicNewsRecipe):
 | 
					class Ser24Sata(BasicNewsRecipe):
 | 
				
			||||||
    title                 = '24 Sata - Sr'
 | 
					    title                 = '24 Sata - Sr'
 | 
				
			||||||
@ -17,22 +15,20 @@ class Ser24Sata(BasicNewsRecipe):
 | 
				
			|||||||
    description           = '24 sata portal vesti iz Srbije'
 | 
					    description           = '24 sata portal vesti iz Srbije'
 | 
				
			||||||
    publisher             = 'Ringier d.o.o.'
 | 
					    publisher             = 'Ringier d.o.o.'
 | 
				
			||||||
    category              = 'news, politics, entertainment, Serbia'
 | 
					    category              = 'news, politics, entertainment, Serbia'
 | 
				
			||||||
    oldest_article        = 7
 | 
					    oldest_article        = 2
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    language = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
 | 
				
			||||||
    lang                  = 'sr-Latn-RS'
 | 
					 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , 'pretty_print'     : True
 | 
					                        , 'linearize_tables' : True
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
@ -40,25 +36,6 @@ class Ser24Sata(BasicNewsRecipe):
 | 
				
			|||||||
    feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
 | 
					    feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        soup.html['xml:lang'] = self.lang
 | 
					 | 
				
			||||||
        soup.html['lang']     = self.lang
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        attribs = [  'style','font','valign'
 | 
					 | 
				
			||||||
                    ,'colspan','width','height'
 | 
					 | 
				
			||||||
                    ,'rowspan','summary','align'
 | 
					 | 
				
			||||||
                    ,'cellspacing','cellpadding'
 | 
					 | 
				
			||||||
                    ,'frames','rules','border'
 | 
					 | 
				
			||||||
                  ]
 | 
					 | 
				
			||||||
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
 | 
					 | 
				
			||||||
            item.name = 'div'
 | 
					 | 
				
			||||||
            for attrib in attribs:
 | 
					 | 
				
			||||||
                if item.has_key(attrib):
 | 
					 | 
				
			||||||
                   del item[attrib]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
 | 
					 | 
				
			||||||
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
 | 
					 | 
				
			||||||
        soup.head.insert(0,mlang)
 | 
					 | 
				
			||||||
        soup.head.insert(1,mcharset)
 | 
					 | 
				
			||||||
        return self.adeify_images(soup)
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										37
									
								
								resources/recipes/apod.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								resources/recipes/apod.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class APOD(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'Astronomy Picture of the Day'
 | 
				
			||||||
 | 
					    __author__  = 'Starson17'
 | 
				
			||||||
 | 
					    description = 'Astronomy Pictures'
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					    use_embedded_content    = False
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    cover_url     = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					    recursions = 0
 | 
				
			||||||
 | 
					    oldest_article        = 14
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					             (u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
 | 
				
			||||||
 | 
					             ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
 | 
				
			||||||
 | 
					                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
 | 
				
			||||||
 | 
					                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
 | 
				
			||||||
 | 
					                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 | 
				
			||||||
 | 
							'''
 | 
				
			||||||
 | 
					    def postprocess_html(self, soup, first_fetch):
 | 
				
			||||||
 | 
					        center_tags = soup.findAll(['center'])
 | 
				
			||||||
 | 
					        p_tags = soup.findAll(['p'])
 | 
				
			||||||
 | 
					        last_center = center_tags[-1:]
 | 
				
			||||||
 | 
					        last_center[0].extract()
 | 
				
			||||||
 | 
					        first_p = p_tags[:1]
 | 
				
			||||||
 | 
					        for tag in first_p:
 | 
				
			||||||
 | 
					            tag.extract()
 | 
				
			||||||
 | 
					        last2_p = p_tags[-2:]
 | 
				
			||||||
 | 
					        for tag in last2_p:
 | 
				
			||||||
 | 
					            tag.extract()
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -5,6 +5,7 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			|||||||
arstechnica.com
 | 
					arstechnica.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 | 
					from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -20,7 +21,7 @@ class ArsTechnica2(BasicNewsRecipe):
 | 
				
			|||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    extra_css             = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
 | 
					    extra_css             = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                             'comments'  : description
 | 
					                             'comments'  : description
 | 
				
			||||||
@ -30,6 +31,10 @@ class ArsTechnica2(BasicNewsRecipe):
 | 
				
			|||||||
                         }
 | 
					                         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    preprocess_regexps = [
 | 
				
			||||||
 | 
					                (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
 | 
				
			||||||
 | 
					               ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
 | 
				
			||||||
 | 
					                         ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -37,7 +42,7 @@ class ArsTechnica2(BasicNewsRecipe):
 | 
				
			|||||||
                     dict(name=['object','link','embed'])
 | 
					                     dict(name=['object','link','embed'])
 | 
				
			||||||
                    ,dict(name='div', attrs={'class':'read-more-link'})
 | 
					                    ,dict(name='div', attrs={'class':'read-more-link'})
 | 
				
			||||||
                  ]
 | 
					                  ]
 | 
				
			||||||
 | 
					    remove_attributes=['width','height']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds = [
 | 
					    feeds = [
 | 
				
			||||||
              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
 | 
					              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
 | 
				
			||||||
@ -90,3 +95,5 @@ class ArsTechnica2(BasicNewsRecipe):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return soup
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
 | 
					        return article.get('guid',  None).rpartition('?')[0]
 | 
				
			||||||
 | 
				
			|||||||
@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
				
			|||||||
'''
 | 
					'''
 | 
				
			||||||
theatlantic.com
 | 
					theatlantic.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
import re
 | 
					import string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TheAtlantic(BasicNewsRecipe):
 | 
					class TheAtlantic(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    title      = 'The Atlantic'
 | 
					    title      = 'The Atlantic'
 | 
				
			||||||
    __author__ = 'Kovid Goyal and Sujata Raman'
 | 
					    __author__ = 'Kovid Goyal and Sujata Raman'
 | 
				
			||||||
    description = 'Current affairs and politics focussed on the US'
 | 
					    description = 'Current affairs and politics focussed on the US'
 | 
				
			||||||
    INDEX = 'http://www.theatlantic.com/doc/current'
 | 
					    INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags_before = dict(name='div', id='storytop')
 | 
					    remove_tags_before = dict(name='div', id='articleHead')
 | 
				
			||||||
    remove_tags        = [
 | 
					    remove_tags_after  = dict(id='copyright')
 | 
				
			||||||
                        dict(name='div', id=['seealso','storybottom',  'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
 | 
					    remove_tags        = [dict(id=['header', 'printAds', 'pageControls'])]
 | 
				
			||||||
                        dict(name='p', attrs={'id':["pagination"]}),
 | 
					    no_stylesheets = True
 | 
				
			||||||
                        dict(name='table',attrs={'class':"tools"}),
 | 
					
 | 
				
			||||||
                        dict(name='style'),
 | 
					
 | 
				
			||||||
                        dict(name='a', href='/a/newsletters.mhtml')
 | 
					    def print_version(self, url):
 | 
				
			||||||
                         ]
 | 
					        return url.replace('/archive/', '/print/')
 | 
				
			||||||
    remove_attributes = ['icap', 'callout', 'style']
 | 
					 | 
				
			||||||
    no_stylesheets     = True
 | 
					 | 
				
			||||||
    conversion_options = { 'linearize_tables':True }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    extra_css = '''
 | 
					 | 
				
			||||||
                    #timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
 | 
					 | 
				
			||||||
                    #storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
 | 
					 | 
				
			||||||
                    h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
 | 
					 | 
				
			||||||
                    h1{font-family:georgia,serif; font-weight:bold; font-size:large}
 | 
					 | 
				
			||||||
                    #byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
 | 
					 | 
				
			||||||
                    #topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
 | 
					 | 
				
			||||||
                    .artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
 | 
					 | 
				
			||||||
                '''
 | 
					 | 
				
			||||||
    def parse_index(self):
 | 
					    def parse_index(self):
 | 
				
			||||||
        articles = []
 | 
					        articles = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        soup = self.index_to_soup(self.INDEX)
 | 
					        soup = self.index_to_soup(self.INDEX)
 | 
				
			||||||
 | 
					        sectit = soup.find('h1', attrs={'class':'sectionTitle'})
 | 
				
			||||||
 | 
					        if sectit is not None:
 | 
				
			||||||
 | 
					            texts = sectit.findAll('cufontext')
 | 
				
			||||||
 | 
					            texts = map(self.tag_to_string, texts[-2:])
 | 
				
			||||||
 | 
					            self.timefmt = ' [%s]'%(''.join(texts))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        issue = soup.find('span', attrs={'class':'issue'})
 | 
					        cover = soup.find('img', src=True, attrs={'class':'cover'})
 | 
				
			||||||
        if issue:
 | 
					 | 
				
			||||||
            self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        cover = soup.find('img', alt=re.compile('Cover'), src=True)
 | 
					 | 
				
			||||||
        if cover is not None:
 | 
					        if cover is not None:
 | 
				
			||||||
            self.cover_url = 'http://theatlantic.com'+cover['src']
 | 
					            self.cover_url = cover['src']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for item in soup.findAll('div', attrs={'class':'item'}):
 | 
					        feeds = []
 | 
				
			||||||
            a = item.find('a')
 | 
					        for section in soup.findAll('div', attrs={'class':'magazineSection'}):
 | 
				
			||||||
            if a and a.has_key('href'):
 | 
					            section_title = section.find(attrs={'class':'sectionHeader'})
 | 
				
			||||||
 | 
					            section_title = string.capwords(self.tag_to_string(section_title))
 | 
				
			||||||
 | 
					            self.log('Found section:', section_title)
 | 
				
			||||||
 | 
					            articles = []
 | 
				
			||||||
 | 
					            for post in section.findAll('div', attrs={'class':'post'}):
 | 
				
			||||||
 | 
					                h = post.find(['h3', 'h4'])
 | 
				
			||||||
 | 
					                title = self.tag_to_string(h)
 | 
				
			||||||
 | 
					                a = post.find('a', href=True)
 | 
				
			||||||
                url = a['href']
 | 
					                url = a['href']
 | 
				
			||||||
                if not url.startswith('http://'):
 | 
					                if url.startswith('/'):
 | 
				
			||||||
                    url = 'http://www.theatlantic.com/'+url
 | 
					                    url = 'http://www.theatlantic.com'+url
 | 
				
			||||||
                url = url.replace('/doc/', '/doc/print/')
 | 
					                p = post.find('p', attrs={'class':'dek'})
 | 
				
			||||||
                title = self.tag_to_string(a)
 | 
					                desc = None
 | 
				
			||||||
                if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
 | 
					                self.log('\tFound article:', title, 'at', url)
 | 
				
			||||||
                    continue
 | 
					                if p is not None:
 | 
				
			||||||
                title = title.replace('&', '&')
 | 
					                    desc = self.tag_to_string(p)
 | 
				
			||||||
                byline = item.find(attrs={'class':'byline'})
 | 
					                    self.log('\t\t', desc)
 | 
				
			||||||
                date = self.tag_to_string(byline) if byline else ''
 | 
					                articles.append({'title':title, 'url':url, 'description':desc,
 | 
				
			||||||
                description = ''
 | 
					                    'date':''})
 | 
				
			||||||
 | 
					            feeds.append((section_title, articles))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                self.log('\tFound article:', title)
 | 
					        poems = []
 | 
				
			||||||
                self.log('\t\t', url)
 | 
					        self.log('Found section: Poems')
 | 
				
			||||||
 | 
					        for poem in soup.findAll('div', attrs={'class':'poem'}):
 | 
				
			||||||
 | 
					            title = self.tag_to_string(poem.find('h4'))
 | 
				
			||||||
 | 
					            desc  = self.tag_to_string(poem.find(attrs={'class':'author'}))
 | 
				
			||||||
 | 
					            url   = 'http://www.theatlantic.com'+poem.find('a')['href']
 | 
				
			||||||
 | 
					            self.log('\tFound article:', title, 'at', url)
 | 
				
			||||||
 | 
					            self.log('\t\t', desc)
 | 
				
			||||||
 | 
					            poems.append({'title':title, 'url':url, 'description':desc,
 | 
				
			||||||
 | 
					                    'date':''})
 | 
				
			||||||
 | 
					        if poems:
 | 
				
			||||||
 | 
					            feeds.append(('Poems', poems))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                articles.append({
 | 
					        self.log('Found section: Advice')
 | 
				
			||||||
                                 'title':title,
 | 
					        div = soup.find(id='advice')
 | 
				
			||||||
                                 'date':date,
 | 
					        title = self.tag_to_string(div.find('h4'))
 | 
				
			||||||
                                 'url':url,
 | 
					        url = 'http://www.theatlantic.com'+div.find('a')['href']
 | 
				
			||||||
                                 'description':description
 | 
					        desc = self.tag_to_string(div.find('p'))
 | 
				
			||||||
                            })
 | 
					        self.log('\tFound article:', title, 'at', url)
 | 
				
			||||||
 | 
					        self.log('\t\t', desc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
 | 
				
			||||||
 | 
					                    'date':''}]))
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def postprocess_html(self, soup, first):
 | 
				
			||||||
 | 
					        for table in soup.findAll('table', align='right'):
 | 
				
			||||||
 | 
					            img = table.find('img')
 | 
				
			||||||
 | 
					            if img is not None:
 | 
				
			||||||
 | 
					                img.extract()
 | 
				
			||||||
 | 
					                caption = self.tag_to_string(table).strip()
 | 
				
			||||||
 | 
					                div = Tag(soup, 'div')
 | 
				
			||||||
 | 
					                div['style'] = 'text-align:center'
 | 
				
			||||||
 | 
					                div.insert(0, img)
 | 
				
			||||||
 | 
					                div.insert(1, Tag(soup, 'br'))
 | 
				
			||||||
 | 
					                if caption:
 | 
				
			||||||
 | 
					                    div.insert(2, NavigableString(caption))
 | 
				
			||||||
 | 
					                table.replaceWith(div)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return [('Current Issue', articles)]
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,10 +1,10 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
www.azstarnet.com
 | 
					azstarnet.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					import urllib
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Azstarnet(BasicNewsRecipe):
 | 
					class Azstarnet(BasicNewsRecipe):
 | 
				
			||||||
@ -14,12 +14,12 @@ class Azstarnet(BasicNewsRecipe):
 | 
				
			|||||||
    language              = 'en'
 | 
					    language              = 'en'
 | 
				
			||||||
    publisher             = 'azstarnet.com'
 | 
					    publisher             = 'azstarnet.com'
 | 
				
			||||||
    category              = 'news, politics, Arizona, USA'
 | 
					    category              = 'news, politics, Arizona, USA'
 | 
				
			||||||
    delay                 = 1
 | 
					 | 
				
			||||||
    oldest_article        = 3
 | 
					    oldest_article        = 3
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
 | 
					    masthead_url          = 'http://azstarnet.com/content/tncms/live/global/resources/images/logo.gif'
 | 
				
			||||||
    needs_subscription    = True
 | 
					    needs_subscription    = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
@ -32,31 +32,27 @@ class Azstarnet(BasicNewsRecipe):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def get_browser(self):
 | 
					    def get_browser(self):
 | 
				
			||||||
        br = BasicNewsRecipe.get_browser()
 | 
					        br = BasicNewsRecipe.get_browser()
 | 
				
			||||||
 | 
					        br.open('http://azstarnet.com/')
 | 
				
			||||||
        if self.username is not None and self.password is not None:
 | 
					        if self.username is not None and self.password is not None:
 | 
				
			||||||
            br.open('http://azstarnet.com/registration/retro.php')
 | 
					            data = urllib.urlencode({ 'm':'login'
 | 
				
			||||||
            br.select_form(nr=1)
 | 
					                                     ,'u':self.username
 | 
				
			||||||
            br['email'] = self.username
 | 
					                                     ,'p':self.password
 | 
				
			||||||
            br['pass' ] = self.password
 | 
					                                     ,'z':'http://azstarnet.com/'
 | 
				
			||||||
            br.submit()
 | 
					                                   })
 | 
				
			||||||
 | 
					            br.open('http://azstarnet.com/app/registration/proxy.php',data)
 | 
				
			||||||
        return br
 | 
					        return br
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags = [dict(name=['object','link','iframe','base','img'])]
 | 
				
			||||||
    keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    remove_tags = [
 | 
					 | 
				
			||||||
                     dict(name=['object','link','iframe','base','img'])
 | 
					 | 
				
			||||||
                    ,dict(name='div',attrs={'class':'bannerinstory'})
 | 
					 | 
				
			||||||
                  ]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds = [
 | 
					    feeds = [
 | 
				
			||||||
               (u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')
 | 
					               (u'Local News'    , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
 | 
				
			||||||
              ,(u'Sports'       , u'http://rss.azstarnet.com/index.php?site=sports')
 | 
					              ,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc')
 | 
				
			||||||
              ,(u'Business'     , u'http://rss.azstarnet.com/index.php?site=biz-topheadlines')
 | 
					              ,(u'World News'    , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc')
 | 
				
			||||||
              ,(u'Nation-World' , u'http://rss.azstarnet.com/index.php?site=news')
 | 
					              ,(u'Sports'        , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc')
 | 
				
			||||||
              ,(u'Opinion'      , u'http://rss.azstarnet.com/index.php?site=opinion')
 | 
					              ,(u'Opinion'       , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc')
 | 
				
			||||||
              ,(u'Lifestyle'    , u'http://rss.azstarnet.com/index.php?site=accent')
 | 
					              ,(u'Movies'        , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc')
 | 
				
			||||||
              ,(u'Food'         , u'http://rss.azstarnet.com/index.php?site=food')
 | 
					              ,(u'Food'          , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
@ -64,4 +60,6 @@ class Azstarnet(BasicNewsRecipe):
 | 
				
			|||||||
            del item['style']
 | 
					            del item['style']
 | 
				
			||||||
        return soup
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        return url + '?print=1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,6 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
b92.net
 | 
					b92.net
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
@ -19,16 +18,15 @@ class B92(BasicNewsRecipe):
 | 
				
			|||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    encoding              = 'cp1250'
 | 
					    encoding              = 'cp1250'
 | 
				
			||||||
    language = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} '
 | 
				
			||||||
    lang                  = 'sr-Latn-RS'
 | 
					 | 
				
			||||||
    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
 | 
					 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
 | 
					                        , 'linearize_tables' : True
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
@ -50,20 +48,5 @@ class B92(BasicNewsRecipe):
 | 
				
			|||||||
        return url + '&version=print'
 | 
					        return url + '&version=print'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        del soup.body['onload']
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
        for item in soup.findAll('font'):
 | 
					
 | 
				
			||||||
            item.name='div'
 | 
					 | 
				
			||||||
            if item.has_key('size'):
 | 
					 | 
				
			||||||
               del item['size']
 | 
					 | 
				
			||||||
        attribs = [  'style','font','valign'
 | 
					 | 
				
			||||||
                    ,'colspan','width','height'
 | 
					 | 
				
			||||||
                    ,'rowspan','summary','align'
 | 
					 | 
				
			||||||
                    ,'cellspacing','cellpadding'
 | 
					 | 
				
			||||||
                    ,'frames','rules','border'
 | 
					 | 
				
			||||||
                  ]
 | 
					 | 
				
			||||||
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
 | 
					 | 
				
			||||||
            item.name = 'div'
 | 
					 | 
				
			||||||
            for attrib in attribs:
 | 
					 | 
				
			||||||
                if item.has_key(attrib):
 | 
					 | 
				
			||||||
                   del item[attrib]                           
 | 
					 | 
				
			||||||
        return soup
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,13 +1,11 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
beta.rs
 | 
					beta.rs
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
from calibre.ebooks.BeautifulSoup import Tag
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Danas(BasicNewsRecipe):
 | 
					class Danas(BasicNewsRecipe):
 | 
				
			||||||
    title                 = 'BETA'
 | 
					    title                 = 'BETA'
 | 
				
			||||||
@ -19,18 +17,14 @@ class Danas(BasicNewsRecipe):
 | 
				
			|||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = False
 | 
					    no_stylesheets        = False
 | 
				
			||||||
    use_embedded_content  = True
 | 
					    use_embedded_content  = True
 | 
				
			||||||
    language = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '
 | 
				
			||||||
    lang                  = 'sr-Latn-RS'
 | 
					 | 
				
			||||||
    direction             = 'ltr'
 | 
					 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , 'pretty_print'     : True
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -43,9 +37,4 @@ class Danas(BasicNewsRecipe):
 | 
				
			|||||||
                     ]
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        soup.html['lang'] = self.lang
 | 
					 | 
				
			||||||
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
 | 
					 | 
				
			||||||
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
 | 
					 | 
				
			||||||
        soup.head.insert(0,mlang)
 | 
					 | 
				
			||||||
        soup.head.insert(1,mcharset)
 | 
					 | 
				
			||||||
        return self.adeify_images(soup)
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
				
			|||||||
@ -14,14 +14,13 @@ class Blic(BasicNewsRecipe):
 | 
				
			|||||||
    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
 | 
					    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
 | 
				
			||||||
    publisher             = 'RINGIER d.o.o.'
 | 
					    publisher             = 'RINGIER d.o.o.'
 | 
				
			||||||
    category              = 'news, politics, Serbia'
 | 
					    category              = 'news, politics, Serbia'
 | 
				
			||||||
    delay                 = 1
 | 
					 | 
				
			||||||
    oldest_article        = 2
 | 
					    oldest_article        = 2
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.blic.rs/resources/images/header/header_back.png'
 | 
				
			||||||
    language              = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} '
 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'  : description
 | 
					                          'comment'  : description
 | 
				
			||||||
@ -31,13 +30,15 @@ class Blic(BasicNewsRecipe):
 | 
				
			|||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
 | 
					 | 
				
			||||||
    remove_tags_before = dict(name='div', attrs={'id':'article_info'})
 | 
					    remove_tags_before = dict(name='div', attrs={'id':'article_info'})
 | 
				
			||||||
 | 
					    remove_tags        = [dict(name=['object','link'])]
 | 
				
			||||||
 | 
					    remove_attributes  = ['width','height']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]
 | 
					    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags        = [dict(name=['object','link'])]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        return url + '/print'
 | 
					        return url + '/print'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										36
									
								
								resources/recipes/cetnixploitation.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								resources/recipes/cetnixploitation.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,36 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					chetnixploitation.blogspot.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Chetnixploitation(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'Chetnixploitation'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'Filmski blog'    
 | 
				
			||||||
 | 
					    oldest_article        = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    use_embedded_content  = True
 | 
				
			||||||
 | 
					    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {
 | 
				
			||||||
 | 
					                          'comment'  : description
 | 
				
			||||||
 | 
					                        , 'tags'     : 'film, blog, cetnici, srbija, ex-yu'
 | 
				
			||||||
 | 
					                        , 'publisher': 'Son of Man'
 | 
				
			||||||
 | 
					                        , 'language' : language
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
 | 
					    feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -20,7 +20,7 @@ class Danas(BasicNewsRecipe):
 | 
				
			|||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
 | 
					    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
 | 
				
			||||||
    language              = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
 | 
					    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
@ -38,7 +38,7 @@ class Danas(BasicNewsRecipe):
 | 
				
			|||||||
                    ,dict(name=['object','link','iframe'])
 | 
					                    ,dict(name=['object','link','iframe'])
 | 
				
			||||||
                  ]
 | 
					                  ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds          = [
 | 
					    feeds          = [ 
 | 
				
			||||||
                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
 | 
					                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
 | 
				
			||||||
                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
 | 
					                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
 | 
				
			||||||
                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
 | 
					                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
 | 
				
			||||||
@ -60,4 +60,4 @@ class Danas(BasicNewsRecipe):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        return url + '&action=print'
 | 
					        return url + '&action=print'
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										50
									
								
								resources/recipes/diariovasco.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								resources/recipes/diariovasco.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,50 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.diariovasco.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DiarioVasco(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'Diario Vasco'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'Noticias de pais Vasco y el resto del mundo'
 | 
				
			||||||
 | 
					    publisher             = 'Diario Vasco'
 | 
				
			||||||
 | 
					    category              = 'news, politics, Spain'
 | 
				
			||||||
 | 
					    oldest_article        = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 200
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    encoding              = 'cp1252'
 | 
				
			||||||
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    language              = 'es'
 | 
				
			||||||
 | 
					    remove_empty_feeds    = True
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
 | 
				
			||||||
 | 
					    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {
 | 
				
			||||||
 | 
					                          'comment'   : description
 | 
				
			||||||
 | 
					                        , 'tags'      : category
 | 
				
			||||||
 | 
					                        , 'publisher' : publisher
 | 
				
			||||||
 | 
					                        , 'language'  : language
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags = [
 | 
				
			||||||
 | 
					                         dict(attrs={'id':'title'})
 | 
				
			||||||
 | 
					                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
 | 
				
			||||||
 | 
					                     ]
 | 
				
			||||||
 | 
					    remove_tags = [dict(name='ul')]
 | 
				
			||||||
 | 
					    remove_attributes = ['width','height']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					              (u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml'       )
 | 
				
			||||||
 | 
					             ,(u'Portada'          , u'http://www.diariovasco.com/portada.xml'                )
 | 
				
			||||||
 | 
					             ,(u'Politica'         , u'http://www.diariovasco.com/rss/feeds/politica.xml'     )
 | 
				
			||||||
 | 
					             ,(u'Deportes'         , u'http://www.diariovasco.com/rss/feeds/deportes.xml'     )
 | 
				
			||||||
 | 
					             ,(u'Economia'         , u'http://www.diariovasco.com/rss/feeds/economia.xml'     )
 | 
				
			||||||
 | 
					             ,(u'Mundo'            , u'http://www.diariovasco.com/rss/feeds/mundo.xml'        )
 | 
				
			||||||
 | 
					             ,(u'Cultura'          , u'http://www.diariovasco.com/rss/feeds/cultura.xml'      )
 | 
				
			||||||
 | 
					             ,(u'Gente'            , u'http://www.diariovasco.com/rss/feeds/gente.xml'        )
 | 
				
			||||||
 | 
					             ,(u'Contraportada'    , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
 | 
				
			||||||
 | 
					            ]
 | 
				
			||||||
@ -1,7 +1,5 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
e-novine.com
 | 
					e-novine.com
 | 
				
			||||||
@ -9,7 +7,6 @@ e-novine.com
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
from calibre.ebooks.BeautifulSoup import Tag
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class E_novine(BasicNewsRecipe):
 | 
					class E_novine(BasicNewsRecipe):
 | 
				
			||||||
    title                 = 'E-Novine'
 | 
					    title                 = 'E-Novine'
 | 
				
			||||||
@ -20,40 +17,38 @@ class E_novine(BasicNewsRecipe):
 | 
				
			|||||||
    oldest_article        = 2
 | 
					    oldest_article        = 2
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    encoding              = 'cp1250'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    language = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
 | 
				
			||||||
    lang                  = 'sr'
 | 
					    extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} '
 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , 'pretty_print'     : True
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
 | 
					    keep_only_tags = [
 | 
				
			||||||
 | 
					                         dict(name='div', attrs={'class':'article_head'})
 | 
				
			||||||
 | 
					                        ,dict(name='div', attrs={'id':'article_body'})
 | 
				
			||||||
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags = [dict(name=['object','link','embed','iframe'])]
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					                     dict(name=['object','link','embed','iframe'])
 | 
				
			||||||
 | 
					                    ,dict(attrs={'id':'box_article_tools'})
 | 
				
			||||||
 | 
					                  ]
 | 
				
			||||||
 | 
					    remove_attributes = ['height','width','lang']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
 | 
					    feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        soup.html['xml:lang'] = self.lang
 | 
					 | 
				
			||||||
        soup.html['lang']     = self.lang
 | 
					 | 
				
			||||||
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
 | 
					 | 
				
			||||||
        soup.head.insert(0,mlang)
 | 
					 | 
				
			||||||
        for item in soup.findAll(style=True):
 | 
					        for item in soup.findAll(style=True):
 | 
				
			||||||
            del item['style']
 | 
					            del item['style']
 | 
				
			||||||
        ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
        if ftag:
 | 
					
 | 
				
			||||||
           it = ftag.div
 | 
					    def print_version(self, url):
 | 
				
			||||||
           it.extract()
 | 
					        return url + '?print'
 | 
				
			||||||
           ftag.div.extract()
 | 
					 | 
				
			||||||
           ftag.insert(0,it)
 | 
					 | 
				
			||||||
        return soup
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
 | 
				
			|||||||
            ns = NavigableString(self.tag_to_string(caption))
 | 
					            ns = NavigableString(self.tag_to_string(caption))
 | 
				
			||||||
            div.insert(0, ns)
 | 
					            div.insert(0, ns)
 | 
				
			||||||
            div.insert(1, Tag(soup, 'br'))
 | 
					            div.insert(1, Tag(soup, 'br'))
 | 
				
			||||||
 | 
					            del img['width']
 | 
				
			||||||
 | 
					            del img['height']
 | 
				
			||||||
            img.extract()
 | 
					            img.extract()
 | 
				
			||||||
            div.insert(2, img)
 | 
					            div.insert(2, img)
 | 
				
			||||||
            table.replaceWith(div)
 | 
					            table.replaceWith(div)
 | 
				
			||||||
 | 
				
			|||||||
@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
 | 
				
			|||||||
            div.insert(0, ns)
 | 
					            div.insert(0, ns)
 | 
				
			||||||
            div.insert(1, Tag(soup, 'br'))
 | 
					            div.insert(1, Tag(soup, 'br'))
 | 
				
			||||||
            img.extract()
 | 
					            img.extract()
 | 
				
			||||||
 | 
					            del img['width']
 | 
				
			||||||
 | 
					            del img['height']
 | 
				
			||||||
            div.insert(2, img)
 | 
					            div.insert(2, img)
 | 
				
			||||||
            table.replaceWith(div)
 | 
					            table.replaceWith(div)
 | 
				
			||||||
        return soup
 | 
					        return soup
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										58
									
								
								resources/recipes/epicurious.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								resources/recipes/epicurious.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,58 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Starson17'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.epicurious.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Epicurious(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'Epicurious'
 | 
				
			||||||
 | 
					    __author__  = 'Starson17'
 | 
				
			||||||
 | 
					    description = 'Food and Recipes from Epicurious'
 | 
				
			||||||
 | 
					    cover_url     = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
 | 
				
			||||||
 | 
					    publisher      = 'Epicurious'
 | 
				
			||||||
 | 
					    tags           = 'news, food, gourmet, recipes'
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					    use_embedded_content    = False
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					    recursions = 3
 | 
				
			||||||
 | 
					    oldest_article        = 14
 | 
				
			||||||
 | 
					    max_articles_per_feed = 20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
 | 
				
			||||||
 | 
					                      dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
 | 
				
			||||||
 | 
					                           ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
 | 
				
			||||||
 | 
					                   {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':['tagged','comments']})
 | 
				
			||||||
 | 
					                   ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					             (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
 | 
				
			||||||
 | 
					             (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
 | 
				
			||||||
 | 
					             (u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
 | 
				
			||||||
 | 
					             (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
 | 
				
			||||||
 | 
					             ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match_regexps = [
 | 
				
			||||||
 | 
					                     r'http://www.epicurious.com/.*recipes/.*/views'
 | 
				
			||||||
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    preprocess_regexps = [
 | 
				
			||||||
 | 
					        (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
 | 
				
			||||||
 | 
					        (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
 | 
				
			||||||
 | 
					        (re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def postprocess_html(self, soup, first_fetch):
 | 
				
			||||||
 | 
					        for t in soup.findAll(['table', 'tr', 'td']):
 | 
				
			||||||
 | 
					            t.name = 'div'
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1,27 +1,41 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2010 Starson17'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
fudzilla.com
 | 
					fudzilla.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Fudzilla(BasicNewsRecipe):
 | 
					class Fudzilla(BasicNewsRecipe):
 | 
				
			||||||
    title                 = u'Fudzilla'
 | 
					    title                 = u'Fudzilla'
 | 
				
			||||||
    __author__            = 'Darko Miletic'
 | 
					    __author__            = 'Starson17'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    description           = 'Tech news'
 | 
					    description           = 'Tech news'
 | 
				
			||||||
    oldest_article        = 7
 | 
					    oldest_article        = 7
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds = [ (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    remove_tags_before = dict(name='div', attrs={'class':['padding']})
 | 
				
			||||||
        nurl = url.replace('http://www.fudzilla.com/index.php','http://www.fudzilla.com/index2.php')
 | 
					
 | 
				
			||||||
        nmain, nsep, nrest = nurl.partition('&Itemid=')
 | 
					    remove_tags = [dict(name='td', attrs={'class':['left','right']}),
 | 
				
			||||||
        return  nmain + '&pop=1&page=0&Itemid=1'
 | 
					                   dict(name='div', attrs={'id':['toolbar','buttons']}), 
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}), 
 | 
				
			||||||
 | 
					                   dict(name='span', attrs={'class':['pathway']}), 
 | 
				
			||||||
 | 
					                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}), 
 | 
				
			||||||
 | 
					                   dict(name='table', attrs={'class':['headlines']}), 
 | 
				
			||||||
 | 
					                   ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
 | 
				
			||||||
 | 
					             ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    preprocess_regexps = [
 | 
				
			||||||
 | 
					        (re.compile(r'<p class="MsoNormal"> Welcome.*</p> ', re.DOTALL|re.IGNORECASE), lambda match: '')
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										56
									
								
								resources/recipes/gamasutra_fa.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								resources/recipes/gamasutra_fa.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,56 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					gamasutra.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Gamasutra(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'Gamasutra Featured articles'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'The Art and Business of Making Games'
 | 
				
			||||||
 | 
					    publisher             = 'Gamasutra'
 | 
				
			||||||
 | 
					    category              = 'news, games, IT'
 | 
				
			||||||
 | 
					    oldest_article        = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 200
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    encoding              = 'cp1252'
 | 
				
			||||||
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    language              = 'en'
 | 
				
			||||||
 | 
					    remove_empty_feeds    = True
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
 | 
				
			||||||
 | 
					    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .title{font-size: x-large; font-weight: bold} '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {
 | 
				
			||||||
 | 
					                          'comment'          : description
 | 
				
			||||||
 | 
					                        , 'tags'             : category
 | 
				
			||||||
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
 | 
					                        , 'language'         : language
 | 
				
			||||||
 | 
					                        , 'linearize_tables' : True
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					    preprocess_regexps = [
 | 
				
			||||||
 | 
					                           (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
 | 
				
			||||||
 | 
					                          ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
 | 
				
			||||||
 | 
					                          ,(re.compile(r'</head>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
 | 
				
			||||||
 | 
					                         ]
 | 
				
			||||||
 | 
					    remove_tags       = [
 | 
				
			||||||
 | 
					                          dict(name=['object','embed','iframe'])
 | 
				
			||||||
 | 
					                         ,dict(attrs={'class':'adBox'})
 | 
				
			||||||
 | 
					                         ]
 | 
				
			||||||
 | 
					    remove_tags_before = dict(attrs={'class':'title'})
 | 
				
			||||||
 | 
					    remove_attributes = ['width','height','name']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        return url + '?print=1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
 | 
					        return article.get('guid',  None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        for item in soup.findAll(style=True):
 | 
				
			||||||
 | 
					            del item['style']
 | 
				
			||||||
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
							
								
								
									
										45
									
								
								resources/recipes/gamasutra_news.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								resources/recipes/gamasutra_news.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,45 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					gamasutra.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Gamasutra(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'Gamasutra News'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'The Art and Business of Making Games'
 | 
				
			||||||
 | 
					    publisher             = 'Gamasutra'
 | 
				
			||||||
 | 
					    category              = 'news, games, IT'
 | 
				
			||||||
 | 
					    oldest_article        = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 200
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    encoding              = 'cp1252'
 | 
				
			||||||
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    language              = 'en'
 | 
				
			||||||
 | 
					    remove_empty_feeds    = True
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
 | 
				
			||||||
 | 
					    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .newsTitle{font-size: xx-large; font-weight: bold} '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {
 | 
				
			||||||
 | 
					                          'comment'          : description
 | 
				
			||||||
 | 
					                        , 'tags'             : category
 | 
				
			||||||
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
 | 
					                        , 'language'         : language
 | 
				
			||||||
 | 
					                        , 'linearize_tables' : True
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags       = [dict(attrs={'class':['relatedNews','adBox']})]
 | 
				
			||||||
 | 
					    keep_only_tags    = [dict(attrs={'class':['newsTitle','newsAuth','newsDate','newsText']})]
 | 
				
			||||||
 | 
					    remove_attributes = ['width','height']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
 | 
					        return article.get('guid',  None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        for item in soup.findAll(style=True):
 | 
				
			||||||
 | 
					            del item['style']
 | 
				
			||||||
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
@ -1,7 +1,6 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
glassrpske.com
 | 
					glassrpske.com
 | 
				
			||||||
@ -9,7 +8,6 @@ glassrpske.com
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
from calibre.ebooks.BeautifulSoup import Tag
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class GlasSrpske(BasicNewsRecipe):
 | 
					class GlasSrpske(BasicNewsRecipe):
 | 
				
			||||||
    title                 = 'Glas Srpske'
 | 
					    title                 = 'Glas Srpske'
 | 
				
			||||||
@ -22,20 +20,16 @@ class GlasSrpske(BasicNewsRecipe):
 | 
				
			|||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    cover_url             = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
 | 
					    masthead_url          = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
 | 
				
			||||||
    lang                  = 'sr-BA'
 | 
					    language              = 'sr'
 | 
				
			||||||
    language = 'sr'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    INDEX                 = 'http://www.glassrpske.com'
 | 
					    INDEX                 = 'http://www.glassrpske.com'
 | 
				
			||||||
 | 
					    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} '
 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , 'pretty_print'     : True
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
@ -63,11 +57,7 @@ class GlasSrpske(BasicNewsRecipe):
 | 
				
			|||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        soup.html['xml:lang'] = self.lang
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
        soup.html['lang']     = self.lang
 | 
					 | 
				
			||||||
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
 | 
					 | 
				
			||||||
        soup.head.insert(0,mlang)
 | 
					 | 
				
			||||||
        return soup
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def parse_index(self):
 | 
					    def parse_index(self):
 | 
				
			||||||
        totalfeeds = []
 | 
					        totalfeeds = []
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,6 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
www.glas-javnosti.rs
 | 
					www.glas-javnosti.rs
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
@ -18,18 +17,14 @@ class GlasJavnosti(BasicNewsRecipe):
 | 
				
			|||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = False
 | 
					    no_stylesheets        = False
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    language = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '
 | 
				
			||||||
    lang                  = 'sr-Latn-RS'
 | 
					 | 
				
			||||||
    direction             = 'ltr'
 | 
					 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , 'pretty_print'     : True
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -9,17 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class heiseDe(BasicNewsRecipe):
 | 
					class heiseDe(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    title = 'heise'
 | 
					    title = 'heise'
 | 
				
			||||||
    description = 'Computernews from Germany'
 | 
					    description = 'Computernews from Germany'
 | 
				
			||||||
    __author__ = 'Oliver Niesner'
 | 
					    __author__ = 'Oliver Niesner'
 | 
				
			||||||
    language = 'de'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    use_embedded_content   = False
 | 
					    use_embedded_content   = False
 | 
				
			||||||
    timefmt = ' [%d %b %Y]'
 | 
					    timefmt = ' [%d %b %Y]'
 | 
				
			||||||
    max_articles_per_feed = 40
 | 
					    max_articles_per_feed = 40
 | 
				
			||||||
    no_stylesheets = True
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    remove_tags = [dict(id='navi_top'),
 | 
					    remove_tags = [dict(id='navi_top'),
 | 
				
			||||||
		   dict(id='navi_bottom'),
 | 
							   dict(id='navi_bottom'),
 | 
				
			||||||
		   dict(id='logo'),
 | 
							   dict(id='logo'),
 | 
				
			||||||
@ -35,12 +33,10 @@ class heiseDe(BasicNewsRecipe):
 | 
				
			|||||||
		   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
 | 
							   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
 | 
				
			||||||
		   dict(name='p', attrs={'class':'news_option'}),
 | 
							   dict(name='p', attrs={'class':'news_option'}),
 | 
				
			||||||
		   dict(name='p', attrs={'class':'news_navi'}),
 | 
							   dict(name='p', attrs={'class':'news_navi'}),
 | 
				
			||||||
		   dict(name='p', attrs={'class':'news_foren'})]
 | 
							   dict(name='div', attrs={'class':'news_foren'})]
 | 
				
			||||||
    remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]
 | 
					    remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})]
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]
 | 
					    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,17 +1,41 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class HoustonChronicle(BasicNewsRecipe):
 | 
					class HoustonChronicle(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    title          = u'The Houston Chronicle'
 | 
					    title          = u'The Houston Chronicle'
 | 
				
			||||||
    description    = 'News from Houston, Texas'
 | 
					    description    = 'News from Houston, Texas'
 | 
				
			||||||
    __author__	   = 'Kovid Goyal'
 | 
					    __author__	   = 'Kovid Goyal and Sujata Raman'
 | 
				
			||||||
    language       = 'en'
 | 
					    language       = 'en'
 | 
				
			||||||
    timefmt        = ' [%a, %d %b, %Y]'
 | 
					    timefmt        = ' [%a, %d %b, %Y]'
 | 
				
			||||||
    no_stylesheets = True
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    keep_only_tags = [dict(id=['story-head', 'story'])]
 | 
					    keep_only_tags = [
 | 
				
			||||||
    remove_tags = [dict(id=['share-module', 'resource-box',
 | 
					                        dict(id=['story-head', 'story'])
 | 
				
			||||||
        'resource-box-header'])]
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags    = [
 | 
				
			||||||
 | 
					                        dict(id=['share-module', 'resource-box',
 | 
				
			||||||
 | 
					                        'resource-box-header'])
 | 
				
			||||||
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css      = '''
 | 
				
			||||||
 | 
					                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
 | 
				
			||||||
 | 
					                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
 | 
				
			||||||
 | 
					                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
 | 
				
			||||||
 | 
					                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
 | 
				
			||||||
 | 
					                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
 | 
				
			||||||
 | 
					                        #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
 | 
				
			||||||
 | 
					                        #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
 | 
				
			||||||
 | 
					                        #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
 | 
				
			||||||
 | 
					                        #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
 | 
				
			||||||
 | 
					                        #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
 | 
				
			||||||
 | 
					                        #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
 | 
				
			||||||
 | 
					                        .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
 | 
				
			||||||
 | 
					                        .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
 | 
				
			||||||
 | 
					                     '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def parse_index(self):
 | 
					    def parse_index(self):
 | 
				
			||||||
        soup = self.index_to_soup('http://www.chron.com/news/')
 | 
					        soup = self.index_to_soup('http://www.chron.com/news/')
 | 
				
			||||||
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
 | 
				
			|||||||
            feeds.append((current_section, current_articles))
 | 
					            feeds.append((current_section, current_articles))
 | 
				
			||||||
        return feeds
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -3,7 +3,7 @@ import re
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class HuffingtonPostRecipe(BasicNewsRecipe):
 | 
					class HuffingtonPostRecipe(BasicNewsRecipe):
 | 
				
			||||||
    __license__  = 'GPL v3'
 | 
					    __license__  = 'GPL v3'
 | 
				
			||||||
    __author__ = 'kwetal'
 | 
					    __author__ = 'kwetal and Archana Raman'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
    version = 2
 | 
					    version = 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    oldest_article = 1.1
 | 
					    oldest_article = 1.1
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    use_embedded_content = True
 | 
					    #use_embedded_content = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    encoding = 'utf-8'
 | 
					    encoding = 'utf-8'
 | 
				
			||||||
    remove_empty_feeds = True
 | 
					    remove_empty_feeds = True
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Feeds from: http://www.huffingtonpost.com/syndication/
 | 
					    # Feeds from: http://www.huffingtonpost.com/syndication/
 | 
				
			||||||
    feeds = []
 | 
					    feeds = []
 | 
				
			||||||
    feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
 | 
					    feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
 | 
					    feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
 | 
				
			||||||
    feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
 | 
					    #feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
 | 
				
			||||||
    feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
 | 
					    #feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
 | 
					    feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
 | 
				
			||||||
    feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
 | 
					    #feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
 | 
				
			||||||
    feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
 | 
					    #feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
 | 
					    feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
 | 
				
			||||||
    feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
 | 
					    #feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
 | 
				
			||||||
    feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
 | 
					    #feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
 | 
					    feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
 | 
				
			||||||
    feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
 | 
					    #feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
 | 
				
			||||||
    feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
 | 
					    #feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
 | 
					    feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
 | 
				
			||||||
    feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
 | 
					    #feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
 | 
				
			||||||
    feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
 | 
					    #feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
 | 
					    feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
 | 
				
			||||||
    feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
 | 
					    #feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
 | 
				
			||||||
    feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
 | 
					    #feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
 | 
					    feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
 | 
				
			||||||
    feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
 | 
					    #feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
 | 
				
			||||||
    feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
 | 
					    #feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
 | 
					    feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
 | 
				
			||||||
    feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
 | 
					    #feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
 | 
				
			||||||
    feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
 | 
					    #feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
 | 
					    feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
 | 
				
			||||||
    feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
 | 
					    #feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
 | 
				
			||||||
    feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
 | 
					    #feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
 | 
					    feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
 | 
				
			||||||
    feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
 | 
					    #feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
 | 
				
			||||||
    feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
 | 
					    #feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
 | 
					    feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
 | 
				
			||||||
    feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
 | 
					    #feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags = []
 | 
					    remove_tags = []
 | 
				
			||||||
    remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
 | 
					    remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
 | 
				
			||||||
    remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
 | 
					    remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='img', alt="Connect"))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='div', attrs={'class' : ['logo']}))    #'share_boxes_box_block_b_wraper',
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
 | 
				
			||||||
 | 
					   # remove_attributes = ['style']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_attributes = ['style']
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    extra_css = '''
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                    h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
 | 
				
			||||||
 | 
					                    h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
 | 
				
			||||||
 | 
					                    h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
 | 
				
			||||||
                    body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
 | 
					                    body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
 | 
				
			||||||
                    h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
 | 
					                    #title_permalink{color:black;font-size:large;}
 | 
				
			||||||
                    a[href]{color: blue; text-decoration: none; cursor: pointer;}
 | 
					                    .date{color:#858585;font-family:"Times New Roman",sans-serif;}
 | 
				
			||||||
 | 
					                    .comments_datetime v05{color:#696969;}
 | 
				
			||||||
 | 
					                    .teaser_permalink{font-style:italic;font-size:xx-small;}
 | 
				
			||||||
 | 
					                    .blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
 | 
				
			||||||
                    '''
 | 
					                    '''
 | 
				
			||||||
 | 
					#a[href]{color: blue; text-decoration: none; cursor: pointer;}
 | 
				
			||||||
    def get_article_url(self, article):
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
            Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
 | 
					            Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
 | 
				
			||||||
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
 | 
				
			|||||||
            Todo: refactor to searching this list to avoid the hardcoded zero-index
 | 
					            Todo: refactor to searching this list to avoid the hardcoded zero-index
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        link = article.get('link')
 | 
					        link = article.get('link')
 | 
				
			||||||
 | 
					        print("Link:"+link)
 | 
				
			||||||
        if not link:
 | 
					        if not link:
 | 
				
			||||||
            links = article.get('links')
 | 
					            links = article.get('links')
 | 
				
			||||||
            if links:
 | 
					            if links:
 | 
				
			||||||
                link = links[0]['href']
 | 
					                link = links[0]['href']
 | 
				
			||||||
 | 
					                if not links[0]['href']:
 | 
				
			||||||
 | 
					                    link = links[1]['href']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return link
 | 
					        return link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def postprocess_html(self, soup, first_fetch):
 | 
				
			||||||
 | 
					        for tag in soup.findAll('div',text = "What's Your Reaction?"):
 | 
				
			||||||
 | 
					                tag.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for tg in soup.findAll('blockquote'):
 | 
				
			||||||
 | 
					            tg.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										87
									
								
								resources/recipes/johm.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								resources/recipes/johm.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,87 @@
 | 
				
			|||||||
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class JournalofHospitalMedicine(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title       = 'Journal of Hospital Medicine'
 | 
				
			||||||
 | 
					    __author__  = 'Krittika Goyal'
 | 
				
			||||||
 | 
					    description = 'Medical news'
 | 
				
			||||||
 | 
					    timefmt = ' [%d %b, %Y]'
 | 
				
			||||||
 | 
					    needs_subscription = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    #remove_tags_before = dict(name='div', attrs={'align':'center'})
 | 
				
			||||||
 | 
					    #remove_tags_after  = dict(name='ol', attrs={'compact':'COMPACT'})
 | 
				
			||||||
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					       dict(name='iframe'),
 | 
				
			||||||
 | 
					       dict(name='div', attrs={'class':'subContent'}),
 | 
				
			||||||
 | 
					       dict(name='div', attrs={'id':['contentFrame']}),
 | 
				
			||||||
 | 
					       #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
 | 
				
			||||||
 | 
					       #dict(name='table', attrs={'align':'RIGHT'}),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   # TO LOGIN
 | 
				
			||||||
 | 
					    def get_browser(self):
 | 
				
			||||||
 | 
					        br = BasicNewsRecipe.get_browser()
 | 
				
			||||||
 | 
					        br.open('http://www3.interscience.wiley.com/cgi-bin/home')
 | 
				
			||||||
 | 
					        br.select_form(name='siteLogin')
 | 
				
			||||||
 | 
					        br['LoginName'] = self.username
 | 
				
			||||||
 | 
					        br['Password'] = self.password
 | 
				
			||||||
 | 
					        response = br.submit()
 | 
				
			||||||
 | 
					        raw = response.read()
 | 
				
			||||||
 | 
					        if 'userName = ""' in raw:
 | 
				
			||||||
 | 
					            raise Exception('Login failed. Check your username and password')
 | 
				
			||||||
 | 
					        return br
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #TO GET ARTICLE TOC
 | 
				
			||||||
 | 
					    def johm_get_index(self):
 | 
				
			||||||
 | 
					            return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # To parse artice toc
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					            parse_soup = self.johm_get_index()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            div = parse_soup.find(id='contentCell')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            current_section = None
 | 
				
			||||||
 | 
					            current_articles = []
 | 
				
			||||||
 | 
					            feeds = []
 | 
				
			||||||
 | 
					            for x in div.findAll(True):
 | 
				
			||||||
 | 
					                if x.name == 'h4':
 | 
				
			||||||
 | 
					                    # Section heading found
 | 
				
			||||||
 | 
					                    if current_articles and current_section:
 | 
				
			||||||
 | 
					                        feeds.append((current_section, current_articles))
 | 
				
			||||||
 | 
					                    current_section = self.tag_to_string(x)
 | 
				
			||||||
 | 
					                    current_articles = []
 | 
				
			||||||
 | 
					                    self.log('\tFound section:', current_section)
 | 
				
			||||||
 | 
					                if current_section is not None and x.name == 'strong':
 | 
				
			||||||
 | 
					                    title = self.tag_to_string(x)
 | 
				
			||||||
 | 
					                    p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
 | 
				
			||||||
 | 
					                    if p is None:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    url = p.get('href', False)
 | 
				
			||||||
 | 
					                    if not url or not title:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    if url.startswith('/'):
 | 
				
			||||||
 | 
					                         url = 'http://www3.interscience.wiley.com'+url
 | 
				
			||||||
 | 
					                    url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
 | 
				
			||||||
 | 
					                    self.log('\t\tFound article:', title)
 | 
				
			||||||
 | 
					                    self.log('\t\t\t', url)
 | 
				
			||||||
 | 
					                    #if url.startswith('/'):
 | 
				
			||||||
 | 
					                        #url = 'http://online.wsj.com'+url
 | 
				
			||||||
 | 
					                    current_articles.append({'title': title, 'url':url,
 | 
				
			||||||
 | 
					                        'description':'', 'date':''})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if current_articles and current_section:
 | 
				
			||||||
 | 
					                feeds.append((current_section, current_articles))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        for img in soup.findAll('img', src=True):
 | 
				
			||||||
 | 
					            img['src'] = img['src'].replace('tfig', 'nfig')
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										37
									
								
								resources/recipes/kukuburi.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								resources/recipes/kukuburi.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__ = 'GPL v3'
 | 
				
			||||||
 | 
					__author__ = 'Mori'
 | 
				
			||||||
 | 
					__version__ = 'v. 0.1'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					Kukuburi.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class KukuburiRecipe(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    __author__ = 'Mori'
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = u'Kukuburi'
 | 
				
			||||||
 | 
					    publisher = u'Ramón Pérez'
 | 
				
			||||||
 | 
					    description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    oldest_article = 100
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					        (u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    preprocess_regexps = [
 | 
				
			||||||
 | 
					        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
 | 
				
			||||||
 | 
					        [
 | 
				
			||||||
 | 
					            (r'<!--.*?-->', lambda match: ''),
 | 
				
			||||||
 | 
					            (r'<div class="feedflare".*?</div>', lambda match: '')
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
@ -1,7 +1,5 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
lasegunda.com
 | 
					lasegunda.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
 | 
				
			|||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    encoding              = 'cp1252'
 | 
					    encoding              = 'cp1252'
 | 
				
			||||||
    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
 | 
					    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
 | 
				
			||||||
    remove_javascript     = True
 | 
					    remove_empty_feeds    = True
 | 
				
			||||||
    language = 'es'
 | 
					    language              = 'es'
 | 
				
			||||||
 | 
					    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    
 | 
					    conversion_options = {
 | 
				
			||||||
    html2lrf_options = [
 | 
					                          'comment'          : description
 | 
				
			||||||
                          '--comment', description
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , '--category', category
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , '--publisher', publisher
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , '--ignore-tables'
 | 
											, 'linearize_tables' : True
 | 
				
			||||||
                        ]
 | 
					                        }
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
 | 
					 | 
				
			||||||
                        
 | 
					                        
 | 
				
			||||||
    keep_only_tags = [dict(name='table')]
 | 
					    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
 | 
				
			||||||
 | 
					    remove_tags        = [dict(name='img')]
 | 
				
			||||||
 | 
					    remove_attributes  = ['width','height']
 | 
				
			||||||
 | 
						
 | 
				
			||||||
                        
 | 
					                        
 | 
				
			||||||
    feeds = [ 
 | 
					    feeds = [ 
 | 
				
			||||||
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
 | 
					               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
 | 
				
			||||||
              ,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
 | 
					              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
 | 
				
			||||||
              ,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
 | 
					              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
 | 
				
			||||||
              ,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
 | 
					              ,(u'Internacional'          , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
 | 
				
			||||||
              ,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
 | 
					              ,(u'Deportes'               , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
 | 
				
			||||||
              ,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
 | 
					              ,(u'Epectaculos/Cultura'    , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
 | 
				
			||||||
              ,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
 | 
					              ,(u'Educacion'              , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
 | 
				
			||||||
              ,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
 | 
					              ,(u'Ciencia y Tecnologia'   , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
 | 
				
			||||||
              ,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
 | 
					              ,(u'Solidaridad'            , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
 | 
				
			||||||
              ,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
 | 
					              ,(u'Buena Vida'             , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
 | 
					        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
 | 
				
			||||||
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
 | 
					        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
 | 
				
			||||||
 | 
					 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					 | 
				
			||||||
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
 | 
					 | 
				
			||||||
        soup.head.insert(0,mtag)
 | 
					 | 
				
			||||||
        for item in soup.findAll(style=True):
 | 
					 | 
				
			||||||
            del item['style']
 | 
					 | 
				
			||||||
        return soup
 | 
					 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,5 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
latercera.com
 | 
					latercera.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
 | 
				
			|||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    encoding              = 'cp1252'
 | 
					    encoding              = 'cp1252'
 | 
				
			||||||
    remove_javascript     = True
 | 
					 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    remove_empty_feeds    = True
 | 
				
			||||||
    html2lrf_options = [
 | 
					    language              = 'es'
 | 
				
			||||||
                          '--comment', description
 | 
					 
 | 
				
			||||||
                        , '--category', category
 | 
					    conversion_options = {
 | 
				
			||||||
                        , '--publisher', publisher
 | 
					                          'comment'          : description
 | 
				
			||||||
                        ]
 | 
					                        , 'tags'             : category
 | 
				
			||||||
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
 | 
					                        , 'language'         : language
 | 
				
			||||||
 | 
											, 'linearize_tables' : True
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
 | 
					    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags = [
 | 
					    remove_tags = [
 | 
				
			||||||
                     dict(name='script')
 | 
					                     dict(name=['ul','input','base'])
 | 
				
			||||||
                    ,dict(name='ul')
 | 
					 | 
				
			||||||
                    ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
 | 
					                    ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
 | 
				
			||||||
                    ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
 | 
					                    ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
 | 
				
			||||||
                    ,dict(name='input')
 | 
					 | 
				
			||||||
                    ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
 | 
					                    ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
 | 
				
			||||||
                  ]
 | 
					                  ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds = [
 | 
					    feeds = [
 | 
				
			||||||
               (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
 | 
					               (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
 | 
				
			||||||
              ,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
 | 
					              ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
 | 
				
			||||||
 | 
					              ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')              
 | 
				
			||||||
              ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
 | 
					              ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
 | 
				
			||||||
              ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
 | 
					              ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
 | 
				
			||||||
              ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
 | 
					              ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
 | 
				
			||||||
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
 | 
				
			|||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
 | 
					 | 
				
			||||||
        soup.head.insert(0,mtag)
 | 
					 | 
				
			||||||
        for item in soup.findAll(style=True):
 | 
					        for item in soup.findAll(style=True):
 | 
				
			||||||
            del item['style']
 | 
					            del item['style']
 | 
				
			||||||
        return soup
 | 
					        return soup
 | 
				
			||||||
 | 
					 | 
				
			||||||
    language = 'es'
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,11 +1,11 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2008-2009, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
newscientist.com
 | 
					newscientist.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NewScientist(BasicNewsRecipe):
 | 
					class NewScientist(BasicNewsRecipe):
 | 
				
			||||||
@ -15,12 +15,14 @@ class NewScientist(BasicNewsRecipe):
 | 
				
			|||||||
    language              = 'en'
 | 
					    language              = 'en'
 | 
				
			||||||
    publisher             = 'New Scientist'
 | 
					    publisher             = 'New Scientist'
 | 
				
			||||||
    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
 | 
					    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
 | 
				
			||||||
    delay                 = 3
 | 
					 | 
				
			||||||
    oldest_article        = 7
 | 
					    oldest_article        = 7
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    cover_url             = 'http://www.newscientist.com/currentcover.jpg'
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
 | 
				
			||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
 | 
					    extra_css             = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
@ -28,14 +30,18 @@ class NewScientist(BasicNewsRecipe):
 | 
				
			|||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : language
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					    preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol']})]
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags = [
 | 
					    remove_tags = [
 | 
				
			||||||
                     dict(name='div', attrs={'class':['hldBd','adline','pnl','infotext' ]})
 | 
					                     dict(name='div'  , attrs={'class':['hldBd','adline','pnl','infotext' ]})
 | 
				
			||||||
                    ,dict(name='div', attrs={'id'   :['compnl','artIssueInfo','artTools']})
 | 
					                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools']})
 | 
				
			||||||
                    ,dict(name='p'  , attrs={'class':['marker','infotext'               ]})
 | 
					                    ,dict(name='p'    , attrs={'class':['marker','infotext'               ]})
 | 
				
			||||||
 | 
					                    ,dict(name='meta' , attrs={'name' :'description'                       })
 | 
				
			||||||
                  ]
 | 
					                  ]
 | 
				
			||||||
 | 
					    remove_tags_after = dict(attrs={'class':'nbpcopy'})
 | 
				
			||||||
 | 
					    remove_attributes = ['height','width']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds          = [
 | 
					    feeds          = [
 | 
				
			||||||
                        (u'Latest Headlines'        , u'http://feeds.newscientist.com/science-news'              )
 | 
					                        (u'Latest Headlines'        , u'http://feeds.newscientist.com/science-news'              )
 | 
				
			||||||
@ -50,9 +56,15 @@ class NewScientist(BasicNewsRecipe):
 | 
				
			|||||||
                     ]
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_article_url(self, article):
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
        url = article.get('guid',  None)
 | 
					        return article.get('guid',  None)
 | 
				
			||||||
        return url
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        return url + '?full=true&print=true'
 | 
					        return url + '?full=true&print=true'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        for tg in soup.findAll('a'):
 | 
				
			||||||
 | 
					            if tg.string == 'Home':
 | 
				
			||||||
 | 
					                tg.parent.extract()
 | 
				
			||||||
 | 
					                return self.adeify_images(soup)
 | 
				
			||||||
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,5 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
nspm.rs
 | 
					nspm.rs
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
@ -21,17 +19,16 @@ class Nspm(BasicNewsRecipe):
 | 
				
			|||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
 | 
					    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
 | 
				
			||||||
    encoding              = 'utf-8'
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
    language = 'sr'
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    masthead_url          = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
 | 
				
			||||||
    lang                  = 'sr-Latn-RS'
 | 
					 | 
				
			||||||
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
 | 
					    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conversion_options = {
 | 
					    conversion_options = {
 | 
				
			||||||
                          'comment'          : description
 | 
					                          'comment'          : description
 | 
				
			||||||
                        , 'tags'             : category
 | 
					                        , 'tags'             : category
 | 
				
			||||||
                        , 'publisher'        : publisher
 | 
					                        , 'publisher'        : publisher
 | 
				
			||||||
                        , 'language'         : lang
 | 
					                        , 'language'         : language
 | 
				
			||||||
                        , 'pretty_print'     : True
 | 
					                        , 'linearize_tables' : True
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
@ -39,6 +36,8 @@ class Nspm(BasicNewsRecipe):
 | 
				
			|||||||
                            dict(name=['link','object','embed'])
 | 
					                            dict(name=['link','object','embed'])
 | 
				
			||||||
                           ,dict(name='td', attrs={'class':'buttonheading'})
 | 
					                           ,dict(name='td', attrs={'class':'buttonheading'})
 | 
				
			||||||
                         ]
 | 
					                         ]
 | 
				
			||||||
 | 
					    remove_tags_after = dict(attrs={'class':'article_separator'})
 | 
				
			||||||
 | 
					    remove_attributes = ['width','height']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_browser(self):
 | 
					    def get_browser(self):
 | 
				
			||||||
        br = BasicNewsRecipe.get_browser()
 | 
					        br = BasicNewsRecipe.get_browser()
 | 
				
			||||||
@ -51,17 +50,6 @@ class Nspm(BasicNewsRecipe):
 | 
				
			|||||||
        return url.replace('.html','/stampa.html')
 | 
					        return url.replace('.html','/stampa.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
        soup.html['xml:lang'] = self.lang
 | 
					        for item in soup.body.findAll(style=True):
 | 
				
			||||||
        soup.html['lang']     = self.lang
 | 
					            del item['style']
 | 
				
			||||||
        attribs = [  'style','font','valign'
 | 
					 | 
				
			||||||
                    ,'colspan','width','height'
 | 
					 | 
				
			||||||
                    ,'rowspan','summary','align'
 | 
					 | 
				
			||||||
                    ,'cellspacing','cellpadding'
 | 
					 | 
				
			||||||
                    ,'frames','rules','border'
 | 
					 | 
				
			||||||
                  ]
 | 
					 | 
				
			||||||
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
 | 
					 | 
				
			||||||
            item.name = 'div'
 | 
					 | 
				
			||||||
            for attrib in attribs:
 | 
					 | 
				
			||||||
                if item.has_key(attrib):
 | 
					 | 
				
			||||||
                   del item[attrib]
 | 
					 | 
				
			||||||
        return self.adeify_images(soup)
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
				
			|||||||
@ -7,10 +7,11 @@ sfgate.com
 | 
				
			|||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SanFranciscoChronicle(BasicNewsRecipe):
 | 
					class SanFranciscoChronicle(BasicNewsRecipe):
 | 
				
			||||||
    title                 = u'San Francisco Chronicle'
 | 
					    title                 = u'San Francisco Chronicle'
 | 
				
			||||||
    __author__            = u'Darko Miletic'
 | 
					    __author__            = u'Darko Miletic and Sujata Raman'
 | 
				
			||||||
    description           = u'San Francisco news'
 | 
					    description           = u'San Francisco news'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
 | 
				
			|||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags_before = {'class':'articleheadings'}
 | 
					
 | 
				
			||||||
    remove_tags_after =  dict(name='div', attrs={'id':'articlecontent' })
 | 
					
 | 
				
			||||||
    remove_tags = [
 | 
					    remove_tags_before  = {'id':'printheader'}
 | 
				
			||||||
                     dict(name='div', attrs={'class':'tools tools_top'})
 | 
					
 | 
				
			||||||
                    ,dict(name='div', attrs={'id':'articlebox'        })
 | 
					    remove_tags         = [
 | 
				
			||||||
                  ]
 | 
					                            dict(name='div',attrs={'id':'printheader'})
 | 
				
			||||||
 | 
					                           ,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
 | 
				
			||||||
 | 
					                           ,dict(name='div',attrs={'id':'footer'})
 | 
				
			||||||
 | 
					                          ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css       = '''
 | 
				
			||||||
 | 
					                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
 | 
				
			||||||
 | 
					                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
 | 
				
			||||||
 | 
					                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
 | 
				
			||||||
 | 
					                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
 | 
				
			||||||
 | 
					                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
 | 
				
			||||||
 | 
					                        .byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
 | 
				
			||||||
 | 
					                        .date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
 | 
				
			||||||
 | 
					                        .dtlcomment{font-style:italic;}
 | 
				
			||||||
 | 
					                        .georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
 | 
				
			||||||
 | 
					                     '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feeds          = [
 | 
					    feeds          = [
 | 
				
			||||||
                         (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
 | 
					                         (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
 | 
				
			||||||
                     ]
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self,url):
 | 
				
			||||||
 | 
					        url= url +"&type=printable"
 | 
				
			||||||
 | 
					        return url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
 | 
					        print str(article['title_detail']['value'])
 | 
				
			||||||
 | 
					        url = article.get('guid',None)
 | 
				
			||||||
 | 
					        url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
 | 
				
			||||||
 | 
					        if "Presented By:" in str(article['title_detail']['value']):
 | 
				
			||||||
 | 
					            url = ''
 | 
				
			||||||
 | 
					        return url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										42
									
								
								resources/recipes/sfbg.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								resources/recipes/sfbg.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,42 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class SanFranciscoBayGuardian(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'San Francisco Bay Guardian'
 | 
				
			||||||
 | 
					    language       = 'en'
 | 
				
			||||||
 | 
					    __author__     = 'Krittika Goyal'
 | 
				
			||||||
 | 
					    oldest_article = 1 #days
 | 
				
			||||||
 | 
					    max_articles_per_feed = 25
 | 
				
			||||||
 | 
					    #encoding = 'latin1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    remove_tags_before = dict(name='div', attrs={'id':'story_header'})
 | 
				
			||||||
 | 
					    remove_tags_after  = dict(name='div', attrs={'id':'shirttail'})
 | 
				
			||||||
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					       dict(name='iframe'),
 | 
				
			||||||
 | 
					       #dict(name='div', attrs={'class':'related-articles'}),
 | 
				
			||||||
 | 
					        dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
 | 
				
			||||||
 | 
					       #dict(name='ul', attrs={'class':'article-tools'}),
 | 
				
			||||||
 | 
					       dict(name='ul', attrs={'id':'story_tabs'}),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					        ('Cover', 'http://www.newsobserver.com/100/index.rss'),
 | 
				
			||||||
 | 
					        ('News', 'http://www.newsobserver.com/102/index.rss'),
 | 
				
			||||||
 | 
					        ('Politics', 'http://www.newsobserver.com/105/index.rss'),
 | 
				
			||||||
 | 
					        ('Business', 'http://www.newsobserver.com/104/index.rss'),
 | 
				
			||||||
 | 
					        ('Sports', 'http://www.newsobserver.com/103/index.rss'),
 | 
				
			||||||
 | 
					        ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
 | 
				
			||||||
 | 
					        ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
 | 
				
			||||||
 | 
					        ('Editorials', 'http://www.newsobserver.com/158/index.rss')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        story = soup.find(name='div', attrs={'id':'story_body'})
 | 
				
			||||||
 | 
					        #td = heading.findParent(name='td')
 | 
				
			||||||
 | 
					        #td.extract()
 | 
				
			||||||
 | 
					        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
 | 
				
			||||||
 | 
					        body = soup.find(name='body')
 | 
				
			||||||
 | 
					        body.insert(0, story)
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
							
								
								
									
										52
									
								
								resources/recipes/smith.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								resources/recipes/smith.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,52 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class SmithsonianMagazine(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'Smithsonian Magazine'
 | 
				
			||||||
 | 
					    language       = 'en'
 | 
				
			||||||
 | 
					    __author__     = 'Krittika Goyal'
 | 
				
			||||||
 | 
					    oldest_article = 31#days
 | 
				
			||||||
 | 
					    max_articles_per_feed = 50
 | 
				
			||||||
 | 
					    #encoding = 'latin1'
 | 
				
			||||||
 | 
					    recursions = 1
 | 
				
			||||||
 | 
					    match_regexps = ['&page=[2-9]$']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_stylesheets = True
 | 
				
			||||||
 | 
					    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
 | 
				
			||||||
 | 
					    remove_tags_after  = dict(name='p', attrs={'id':'articlePaginationWrapper'})
 | 
				
			||||||
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					       dict(name='iframe'),
 | 
				
			||||||
 | 
					       dict(name='div', attrs={'class':'article_sidebar_border'}),
 | 
				
			||||||
 | 
					       dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}),
 | 
				
			||||||
 | 
					       #dict(name='ul', attrs={'class':'article-tools'}),
 | 
				
			||||||
 | 
					       dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [
 | 
				
			||||||
 | 
					('History and Archeology',
 | 
				
			||||||
 | 
					 'http://feeds.feedburner.com/smithsonianmag/history-archaeology'),
 | 
				
			||||||
 | 
					('People and Places',
 | 
				
			||||||
 | 
					 'http://feeds.feedburner.com/smithsonianmag/people-places'),
 | 
				
			||||||
 | 
					('Science and Nature',
 | 
				
			||||||
 | 
					 'http://feeds.feedburner.com/smithsonianmag/science-nature'),
 | 
				
			||||||
 | 
					('Arts and Culture',
 | 
				
			||||||
 | 
					 'http://feeds.feedburner.com/smithsonianmag/arts-culture'),
 | 
				
			||||||
 | 
					('Travel',
 | 
				
			||||||
 | 
					 'http://feeds.feedburner.com/smithsonianmag/travel'),
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        story = soup.find(name='div', attrs={'id':'article-left'})
 | 
				
			||||||
 | 
					        #td = heading.findParent(name='td')
 | 
				
			||||||
 | 
					        #td.extract()
 | 
				
			||||||
 | 
					        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
 | 
				
			||||||
 | 
					        body = soup.find(name='body')
 | 
				
			||||||
 | 
					        body.insert(0, story)
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def postprocess_html(self, soup, first):
 | 
				
			||||||
 | 
					        for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
 | 
				
			||||||
 | 
					        if not first:
 | 
				
			||||||
 | 
					             for div in soup.findAll(id='article-head'): div.extract()
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
@ -9,16 +9,35 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
 | 
				
			|||||||
    title = u'Strategy+Business'
 | 
					    title = u'Strategy+Business'
 | 
				
			||||||
    publisher = u' Booz & Company'
 | 
					    publisher = u' Booz & Company'
 | 
				
			||||||
    category = u'Business'
 | 
					    category = u'Business'
 | 
				
			||||||
    description = u'Business magazine for senior business executives and the people who influence them.'
 | 
					    description = (u'Business magazine for senior business executives and the people who influence them.'
 | 
				
			||||||
 | 
					            'Go to http://www.strategy-business.com/registration to sign up for a free account')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    oldest_article = 13 * 7 # 3 months
 | 
					    oldest_article = 13 * 7 # 3 months
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    use_embedded_content = False
 | 
					    use_embedded_content = False
 | 
				
			||||||
    remove_empty_feeds = True
 | 
					    remove_empty_feeds = True
 | 
				
			||||||
 | 
					    needs_subscription = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    no_stylesheets = True
 | 
					    no_stylesheets = True
 | 
				
			||||||
    remove_javascript = True
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_browser(self):
 | 
				
			||||||
 | 
					        br = BasicNewsRecipe.get_browser(self)
 | 
				
			||||||
 | 
					        br.open('http://www.strategy-business.com/registration')
 | 
				
			||||||
 | 
					        for i, f in enumerate(br.forms()):
 | 
				
			||||||
 | 
					            if 'gatekeeper_edit' in f.name:
 | 
				
			||||||
 | 
					                br.select_form(name=f.name)
 | 
				
			||||||
 | 
					                for c in f.controls:
 | 
				
			||||||
 | 
					                    if c.name.endswith('_email'):
 | 
				
			||||||
 | 
					                        br[c.name] = self.username
 | 
				
			||||||
 | 
					                    elif c.name.endswith('_password'):
 | 
				
			||||||
 | 
					                        br[c.name] = self.password
 | 
				
			||||||
 | 
					                raw = br.submit().read()
 | 
				
			||||||
 | 
					                if '>Logout' not in raw:
 | 
				
			||||||
 | 
					                    raise ValueError('Failed to login, check your username and password')
 | 
				
			||||||
 | 
					        return br
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    extra_css = '''
 | 
					    extra_css = '''
 | 
				
			||||||
                body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
 | 
					                body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
 | 
				
			||||||
                a {text-decoration: none; color: blue;}
 | 
					                a {text-decoration: none; color: blue;}
 | 
				
			||||||
 | 
				
			|||||||
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class al(BasicNewsRecipe):
 | 
					class al(BasicNewsRecipe):
 | 
				
			||||||
    author        = 'Lorenzo Vigentini'
 | 
					    author        = 'Lorenzo Vigentini'
 | 
				
			||||||
    description   = 'the Escapist Magazine'
 | 
					    description   = 'The Escapist Magazine'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cover_url      = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
 | 
					    cover_url      = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
 | 
				
			||||||
    title          = u'the Escapist Magazine'
 | 
					    title          = u'the Escapist Magazine'
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										39
									
								
								resources/recipes/thecultofghoul.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								resources/recipes/thecultofghoul.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					cultofghoul.blogspot.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TheCultOfGhoul(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'The Cult of Ghoul'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'Filmski blog'    
 | 
				
			||||||
 | 
					    oldest_article        = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    language              = 'sr'
 | 
				
			||||||
 | 
					    encoding              = 'utf-8'
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    use_embedded_content  = True
 | 
				
			||||||
 | 
					    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {
 | 
				
			||||||
 | 
					                          'comment'  : description
 | 
				
			||||||
 | 
					                        , 'tags'     : 'film, blog, srbija, strava, uzas'
 | 
				
			||||||
 | 
					                        , 'publisher': 'Dejan Ognjanovic'
 | 
				
			||||||
 | 
					                        , 'language' : language
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [(u'Posts', u'http://cultofghoul.blogspot.com/feeds/posts/default')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        for item in soup.findAll(style=True):
 | 
				
			||||||
 | 
					            del item['style']
 | 
				
			||||||
 | 
					        return self.adeify_images(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
 | 
				
			|||||||
                br.select_form(nr=0)
 | 
					                br.select_form(nr=0)
 | 
				
			||||||
                br['user']   = self.username
 | 
					                br['user']   = self.username
 | 
				
			||||||
                br['password'] = self.password
 | 
					                br['password'] = self.password
 | 
				
			||||||
                br.submit()
 | 
					                res = br.submit()
 | 
				
			||||||
 | 
					                raw = res.read()
 | 
				
			||||||
 | 
					                if 'Welcome,' not in raw:
 | 
				
			||||||
 | 
					                    raise ValueError('Failed to log in to wsj.com, check your '
 | 
				
			||||||
 | 
					                            'username and password')
 | 
				
			||||||
            return br
 | 
					            return br
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def postprocess_html(self, soup, first):
 | 
					        def postprocess_html(self, soup, first):
 | 
				
			||||||
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
 | 
				
			|||||||
            soup = self.wsj_get_index()
 | 
					            soup = self.wsj_get_index()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            year = strftime('%Y')
 | 
					            year = strftime('%Y')
 | 
				
			||||||
            for x in soup.findAll('td', attrs={'class':'b14'}):
 | 
					            for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
 | 
				
			||||||
                txt = self.tag_to_string(x).strip()
 | 
					                txt = self.tag_to_string(x).strip()
 | 
				
			||||||
 | 
					                txt = txt.replace(u'\xa0', ' ')
 | 
				
			||||||
 | 
					                txt = txt.encode('ascii', 'ignore')
 | 
				
			||||||
                if year in txt:
 | 
					                if year in txt:
 | 
				
			||||||
                    self.timefmt = ' [%s]'%txt
 | 
					                    self.timefmt = ' [%s]'%txt
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
 | 
				
			|||||||
@ -11,7 +11,8 @@ import sys, re, os, platform
 | 
				
			|||||||
is64bit = platform.architecture()[0] == '64bit'
 | 
					is64bit = platform.architecture()[0] == '64bit'
 | 
				
			||||||
iswindows = re.search('win(32|64)', sys.platform)
 | 
					iswindows = re.search('win(32|64)', sys.platform)
 | 
				
			||||||
isosx = 'darwin' in sys.platform
 | 
					isosx = 'darwin' in sys.platform
 | 
				
			||||||
islinux = not isosx and not iswindows
 | 
					isfreebsd = 'freebsd' in sys.platform
 | 
				
			||||||
 | 
					islinux = not isosx and not iswindows and not isfreebsd
 | 
				
			||||||
SRC = os.path.abspath('src')
 | 
					SRC = os.path.abspath('src')
 | 
				
			||||||
sys.path.insert(0, SRC)
 | 
					sys.path.insert(0, SRC)
 | 
				
			||||||
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
 | 
					sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
 | 
				
			||||||
@ -117,7 +118,7 @@ class Command(object):
 | 
				
			|||||||
        self.real_user = os.environ.get('SUDO_USER', None)
 | 
					        self.real_user = os.environ.get('SUDO_USER', None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def drop_privileges(self):
 | 
					    def drop_privileges(self):
 | 
				
			||||||
        if not islinux or isosx:
 | 
					        if not islinux or isosx or isfreebsd:
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        if self.real_user is not None:
 | 
					        if self.real_user is not None:
 | 
				
			||||||
            self.info('Dropping privileges to those of', self.real_user+':',
 | 
					            self.info('Dropping privileges to those of', self.real_user+':',
 | 
				
			||||||
@ -128,7 +129,7 @@ class Command(object):
 | 
				
			|||||||
            os.seteuid(int(self.real_uid))
 | 
					            os.seteuid(int(self.real_uid))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def regain_privileges(self):
 | 
					    def regain_privileges(self):
 | 
				
			||||||
        if not islinux or isosx:
 | 
					        if not islinux or isosx or isfreebsd:
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        if os.geteuid() != 0 and self.orig_euid == 0:
 | 
					        if os.geteuid() != 0 and self.orig_euid == 0:
 | 
				
			||||||
            self.info('Trying to get root privileges')
 | 
					            self.info('Trying to get root privileges')
 | 
				
			||||||
 | 
				
			|||||||
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
 | 
				
			|||||||
fc_lib = '/usr/lib'
 | 
					fc_lib = '/usr/lib'
 | 
				
			||||||
podofo_inc = '/usr/include/podofo'
 | 
					podofo_inc = '/usr/include/podofo'
 | 
				
			||||||
podofo_lib = '/usr/lib'
 | 
					podofo_lib = '/usr/lib'
 | 
				
			||||||
 | 
					chmlib_inc_dirs = chmlib_lib_dirs = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if iswindows:
 | 
					if iswindows:
 | 
				
			||||||
    prefix  = r'C:\cygwin\home\kovid\sw'
 | 
					    prefix  = r'C:\cygwin\home\kovid\sw'
 | 
				
			||||||
@ -96,6 +97,10 @@ if iswindows:
 | 
				
			|||||||
    sw_lib_dir  = os.path.join(prefix, 'lib')
 | 
					    sw_lib_dir  = os.path.join(prefix, 'lib')
 | 
				
			||||||
    fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
 | 
					    fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
 | 
				
			||||||
    fc_lib = sw_lib_dir
 | 
					    fc_lib = sw_lib_dir
 | 
				
			||||||
 | 
					    chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
 | 
				
			||||||
 | 
					        'build', 'chmlib-0.40', 'src'))
 | 
				
			||||||
 | 
					    chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
 | 
				
			||||||
 | 
					        'build', 'chmlib-0.40', 'src', 'Release'))
 | 
				
			||||||
    png_inc_dirs = [sw_inc_dir]
 | 
					    png_inc_dirs = [sw_inc_dir]
 | 
				
			||||||
    png_lib_dirs = [sw_lib_dir]
 | 
					    png_lib_dirs = [sw_lib_dir]
 | 
				
			||||||
    png_libs = ['png12']
 | 
					    png_libs = ['png12']
 | 
				
			||||||
 | 
				
			|||||||
@ -11,15 +11,16 @@ from distutils import sysconfig
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from PyQt4.pyqtconfig import QtGuiModuleMakefile
 | 
					from PyQt4.pyqtconfig import QtGuiModuleMakefile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from setup import Command, islinux, isosx, SRC, iswindows
 | 
					from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
 | 
				
			||||||
from setup.build_environment import fc_inc, fc_lib, \
 | 
					from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
 | 
				
			||||||
        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
 | 
					        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
 | 
				
			||||||
        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
 | 
					        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
 | 
				
			||||||
        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
 | 
					        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
 | 
				
			||||||
        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
 | 
					        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
 | 
				
			||||||
        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
 | 
					        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
 | 
				
			||||||
 | 
					        jpg_lib_dirs, chmlib_lib_dirs
 | 
				
			||||||
MT
 | 
					MT
 | 
				
			||||||
isunix = islinux or isosx
 | 
					isunix = islinux or isosx or isfreebsd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
make = 'make' if isunix else NMAKE
 | 
					make = 'make' if isunix else NMAKE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -56,6 +57,22 @@ if iswindows:
 | 
				
			|||||||
    pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
 | 
					    pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extensions = [
 | 
					extensions = [
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Extension('chmlib',
 | 
				
			||||||
 | 
					            ['calibre/utils/chm/swig_chm.c'],
 | 
				
			||||||
 | 
					            libraries=['ChmLib' if iswindows else 'chm'],
 | 
				
			||||||
 | 
					            inc_dirs=chmlib_inc_dirs,
 | 
				
			||||||
 | 
					            lib_dirs=chmlib_lib_dirs,
 | 
				
			||||||
 | 
					            cflags=["-DSWIG_COBJECT_TYPES"]),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Extension('chm_extra',
 | 
				
			||||||
 | 
					            ['calibre/utils/chm/extra.c'],
 | 
				
			||||||
 | 
					            libraries=['ChmLib' if iswindows else 'chm'],
 | 
				
			||||||
 | 
					            inc_dirs=chmlib_inc_dirs,
 | 
				
			||||||
 | 
					            lib_dirs=chmlib_lib_dirs,
 | 
				
			||||||
 | 
					            cflags=["-D__PYTHON__"]),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Extension('pdfreflow',
 | 
					    Extension('pdfreflow',
 | 
				
			||||||
                reflow_sources,
 | 
					                reflow_sources,
 | 
				
			||||||
                headers=reflow_headers,
 | 
					                headers=reflow_headers,
 | 
				
			||||||
@ -126,7 +143,7 @@ extensions = [
 | 
				
			|||||||
if iswindows:
 | 
					if iswindows:
 | 
				
			||||||
    extensions.append(Extension('winutil',
 | 
					    extensions.append(Extension('winutil',
 | 
				
			||||||
                ['calibre/utils/windows/winutil.c'],
 | 
					                ['calibre/utils/windows/winutil.c'],
 | 
				
			||||||
                libraries=['shell32', 'setupapi'],
 | 
					                libraries=['shell32', 'setupapi', 'wininet'],
 | 
				
			||||||
                cflags=['/X']
 | 
					                cflags=['/X']
 | 
				
			||||||
                ))
 | 
					                ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -154,6 +171,13 @@ if islinux:
 | 
				
			|||||||
    ldflags.append('-lpython'+sysconfig.get_python_version())
 | 
					    ldflags.append('-lpython'+sysconfig.get_python_version())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if isfreebsd:
 | 
				
			||||||
 | 
					    cflags.append('-pthread')
 | 
				
			||||||
 | 
					    ldflags.append('-shared')
 | 
				
			||||||
 | 
					    cflags.append('-I'+sysconfig.get_python_inc())
 | 
				
			||||||
 | 
					    ldflags.append('-lpython'+sysconfig.get_python_version())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if isosx:
 | 
					if isosx:
 | 
				
			||||||
    x, p = ('i386', 'ppc')
 | 
					    x, p = ('i386', 'ppc')
 | 
				
			||||||
    archs = ['-arch', x, '-arch', p, '-isysroot',
 | 
					    archs = ['-arch', x, '-arch', p, '-isysroot',
 | 
				
			||||||
 | 
				
			|||||||
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
 | 
					import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from setup import Command, islinux, basenames, modules, functions, \
 | 
					from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
 | 
				
			||||||
        __appname__, __version__
 | 
					        __appname__, __version__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HEADER = '''\
 | 
					HEADER = '''\
 | 
				
			||||||
@ -116,7 +116,7 @@ class Develop(Command):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def pre_sub_commands(self, opts):
 | 
					    def pre_sub_commands(self, opts):
 | 
				
			||||||
        if not islinux:
 | 
					        if not (islinux or isfreebsd):
 | 
				
			||||||
            self.info('\nSetting up a source based development environment is only '
 | 
					            self.info('\nSetting up a source based development environment is only '
 | 
				
			||||||
                    'supported on linux. On other platforms, see the User Manual'
 | 
					                    'supported on linux. On other platforms, see the User Manual'
 | 
				
			||||||
                    ' for help with setting up a development environment.')
 | 
					                    ' for help with setting up a development environment.')
 | 
				
			||||||
@ -156,7 +156,7 @@ class Develop(Command):
 | 
				
			|||||||
            self.warn('Failed to compile mount helper. Auto mounting of',
 | 
					            self.warn('Failed to compile mount helper. Auto mounting of',
 | 
				
			||||||
                ' devices will not work')
 | 
					                ' devices will not work')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if os.geteuid() != 0:
 | 
					        if not isfreebsd and os.geteuid() != 0:
 | 
				
			||||||
            return self.warn('Must be run as root to compile mount helper. Auto '
 | 
					            return self.warn('Must be run as root to compile mount helper. Auto '
 | 
				
			||||||
                    'mounting of devices will not work.')
 | 
					                    'mounting of devices will not work.')
 | 
				
			||||||
        src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
 | 
					        src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
 | 
				
			||||||
@ -168,9 +168,10 @@ class Develop(Command):
 | 
				
			|||||||
        ret = p.wait()
 | 
					        ret = p.wait()
 | 
				
			||||||
        if ret != 0:
 | 
					        if ret != 0:
 | 
				
			||||||
            return warn()
 | 
					            return warn()
 | 
				
			||||||
        os.chown(dest, 0, 0)
 | 
					        if not isfreebsd:
 | 
				
			||||||
        os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
 | 
					            os.chown(dest, 0, 0)
 | 
				
			||||||
                stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
 | 
					            os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
 | 
				
			||||||
 | 
					                    stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
 | 
				
			||||||
        self.manifest.append(dest)
 | 
					        self.manifest.append(dest)
 | 
				
			||||||
        return dest
 | 
					        return dest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
 | 
				
			|||||||
                        '/usr/lib/liblcms.so.1',
 | 
					                        '/usr/lib/liblcms.so.1',
 | 
				
			||||||
                        '/tmp/calibre-mount-helper',
 | 
					                        '/tmp/calibre-mount-helper',
 | 
				
			||||||
                        '/usr/lib/libunrar.so',
 | 
					                        '/usr/lib/libunrar.so',
 | 
				
			||||||
 | 
					                        '/usr/lib/libchm.so.0',
 | 
				
			||||||
                        '/usr/lib/libsqlite3.so.0',
 | 
					                        '/usr/lib/libsqlite3.so.0',
 | 
				
			||||||
                        '/usr/lib/libsqlite3.so.0',
 | 
					                        '/usr/lib/libsqlite3.so.0',
 | 
				
			||||||
                        '/usr/lib/libmng.so.1',
 | 
					                        '/usr/lib/libmng.so.1',
 | 
				
			||||||
 | 
				
			|||||||
@ -459,7 +459,7 @@ class Py2App(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @flush
 | 
					    @flush
 | 
				
			||||||
    def add_misc_libraries(self):
 | 
					    def add_misc_libraries(self):
 | 
				
			||||||
        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
 | 
					        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
 | 
				
			||||||
            info('\nAdding', x)
 | 
					            info('\nAdding', x)
 | 
				
			||||||
            x = 'lib%s.dylib'%x
 | 
					            x = 'lib%s.dylib'%x
 | 
				
			||||||
            shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
 | 
					            shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
 | 
				
			||||||
 | 
				
			|||||||
@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.startup import plugins, winutil, winutilerror
 | 
					from calibre.startup import plugins, winutil, winutilerror
 | 
				
			||||||
from calibre.constants import iswindows, isosx, islinux, isfrozen, \
 | 
					from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
 | 
				
			||||||
                              terminal_controller, preferred_encoding, \
 | 
					                              terminal_controller, preferred_encoding, \
 | 
				
			||||||
                              __appname__, __version__, __author__, \
 | 
					                              __appname__, __version__, __author__, \
 | 
				
			||||||
                              win32event, win32api, winerror, fcntl, \
 | 
					                              win32event, win32api, winerror, fcntl, \
 | 
				
			||||||
@ -22,7 +22,7 @@ import mechanize
 | 
				
			|||||||
if False:
 | 
					if False:
 | 
				
			||||||
    winutil, winutilerror, __appname__, islinux, __version__
 | 
					    winutil, winutilerror, __appname__, islinux, __version__
 | 
				
			||||||
    fcntl, win32event, isfrozen, __author__, terminal_controller
 | 
					    fcntl, win32event, isfrozen, __author__, terminal_controller
 | 
				
			||||||
    winerror, win32api
 | 
					    winerror, win32api, isfreebsd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mimetypes.add_type('application/epub+zip',                '.epub')
 | 
					mimetypes.add_type('application/epub+zip',                '.epub')
 | 
				
			||||||
mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
 | 
					mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
 | 
				
			||||||
 | 
				
			|||||||
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 | 
				
			|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 | 
					__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 | 
				
			||||||
__docformat__ = 'restructuredtext en'
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
__appname__   = 'calibre'
 | 
					__appname__   = 'calibre'
 | 
				
			||||||
__version__   = '0.6.42'
 | 
					__version__   = '0.6.44'
 | 
				
			||||||
__author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 | 
					__author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
 | 
				
			|||||||
iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
 | 
					iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
 | 
				
			||||||
isosx     = 'darwin' in sys.platform.lower()
 | 
					isosx     = 'darwin' in sys.platform.lower()
 | 
				
			||||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
 | 
					isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
 | 
				
			||||||
islinux   = not(iswindows or isosx)
 | 
					isfreebsd = 'freebsd' in sys.platform.lower()
 | 
				
			||||||
 | 
					islinux   = not(iswindows or isosx or isfreebsd)
 | 
				
			||||||
isfrozen  = hasattr(sys, 'frozen')
 | 
					isfrozen  = hasattr(sys, 'frozen')
 | 
				
			||||||
isunix = isosx or islinux
 | 
					isunix = isosx or islinux
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -56,7 +57,8 @@ if plugins is None:
 | 
				
			|||||||
        sys.path.insert(0, plugin_path)
 | 
					        sys.path.insert(0, plugin_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
 | 
					        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
 | 
				
			||||||
            'fontconfig', 'pdfreflow', 'progress_indicator'] + \
 | 
					            'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
 | 
				
			||||||
 | 
					            'chm_extra'] + \
 | 
				
			||||||
                    (['winutil'] if iswindows else []) + \
 | 
					                    (['winutil'] if iswindows else []) + \
 | 
				
			||||||
                    (['usbobserver'] if isosx else []):
 | 
					                    (['usbobserver'] if isosx else []):
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
 | 
				
			|||||||
@ -119,11 +119,34 @@ class Plugin(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def __enter__(self, *args):
 | 
					    def __enter__(self, *args):
 | 
				
			||||||
        if self.plugin_path is not None:
 | 
					        if self.plugin_path is not None:
 | 
				
			||||||
            sys.path.insert(0, self.plugin_path)
 | 
					            from calibre.utils.zipfile import ZipFile
 | 
				
			||||||
 | 
					            zf = ZipFile(self.plugin_path)
 | 
				
			||||||
 | 
					            extensions = set([x.rpartition('.')[-1].lower() for x in
 | 
				
			||||||
 | 
					                zf.namelist()])
 | 
				
			||||||
 | 
					            zip_safe = True
 | 
				
			||||||
 | 
					            for ext in ('pyd', 'so', 'dll', 'dylib'):
 | 
				
			||||||
 | 
					                if ext in extensions:
 | 
				
			||||||
 | 
					                    zip_safe = False
 | 
				
			||||||
 | 
					            if zip_safe:
 | 
				
			||||||
 | 
					                sys.path.insert(0, self.plugin_path)
 | 
				
			||||||
 | 
					                self.sys_insertion_path = self.plugin_path
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                from calibre.ptempfile import TemporaryDirectory
 | 
				
			||||||
 | 
					                self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
 | 
				
			||||||
 | 
					                self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
 | 
				
			||||||
 | 
					                zf.extractall(self.sys_insertion_path)
 | 
				
			||||||
 | 
					                sys.path.insert(0, self.sys_insertion_path)
 | 
				
			||||||
 | 
					            zf.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __exit__(self, *args):
 | 
					    def __exit__(self, *args):
 | 
				
			||||||
        if self.plugin_path in sys.path:
 | 
					        ip, it = getattr(self, 'sys_insertion_path', None), getattr(self,
 | 
				
			||||||
            sys.path.remove(self.plugin_path)
 | 
					                '_sys_insertion_tdir', None)
 | 
				
			||||||
 | 
					        if ip in sys.path:
 | 
				
			||||||
 | 
					            sys.path.remove(ip)
 | 
				
			||||||
 | 
					        if hasattr(it, '__exit__'):
 | 
				
			||||||
 | 
					            it.__exit__(*args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FileTypePlugin(Plugin):
 | 
					class FileTypePlugin(Plugin):
 | 
				
			||||||
 | 
				
			|||||||
@ -70,9 +70,10 @@ class PML2PMLZ(FileTypePlugin):
 | 
				
			|||||||
        pmlz = zipfile.ZipFile(of.name, 'w')
 | 
					        pmlz = zipfile.ZipFile(of.name, 'w')
 | 
				
			||||||
        pmlz.write(pmlfile, os.path.basename(pmlfile))
 | 
					        pmlz.write(pmlfile, os.path.basename(pmlfile))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        pml_img = os.path.basename(pmlfile)[0] + '_img'
 | 
					        pml_img = os.path.splitext(pmlfile)[0] + '_img'
 | 
				
			||||||
        img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
 | 
					        i_img = os.path.join(os.path.dirname(pmlfile),'images')
 | 
				
			||||||
            os.path.exists('images') else ''
 | 
					        img_dir = pml_img if os.path.isdir(pml_img) else i_img if \
 | 
				
			||||||
 | 
					            os.path.isdir(i_img) else ''
 | 
				
			||||||
        if img_dir:
 | 
					        if img_dir:
 | 
				
			||||||
            for image in glob.glob(os.path.join(img_dir, '*.png')):
 | 
					            for image in glob.glob(os.path.join(img_dir, '*.png')):
 | 
				
			||||||
                pmlz.write(image, os.path.join('images', (os.path.basename(image))))
 | 
					                pmlz.write(image, os.path.join('images', (os.path.basename(image))))
 | 
				
			||||||
@ -81,17 +82,6 @@ class PML2PMLZ(FileTypePlugin):
 | 
				
			|||||||
        return of.name
 | 
					        return of.name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# CHM MODIFIED
 | 
					 | 
				
			||||||
class CHMMetadataReader(MetadataReaderPlugin):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    name        = 'Read CHM metadata'
 | 
					 | 
				
			||||||
    file_types  = set(['chm'])
 | 
					 | 
				
			||||||
    description = _('Read metadata from %s files') % 'CHM'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_metadata(self, stream, ftype):
 | 
					 | 
				
			||||||
        from calibre.ebooks.metadata.chm import get_metadata
 | 
					 | 
				
			||||||
        return get_metadata(stream)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ComicMetadataReader(MetadataReaderPlugin):
 | 
					class ComicMetadataReader(MetadataReaderPlugin):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    name = 'Read comic metadata'
 | 
					    name = 'Read comic metadata'
 | 
				
			||||||
@ -113,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
 | 
				
			|||||||
            mi.cover_data = (ext.lower(), data)
 | 
					            mi.cover_data = (ext.lower(), data)
 | 
				
			||||||
        return mi
 | 
					        return mi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CHMMetadataReader(MetadataReaderPlugin):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    name        = 'Read CHM metadata'
 | 
				
			||||||
 | 
					    file_types  = set(['chm'])
 | 
				
			||||||
 | 
					    description = _('Read metadata from %s files') % 'CHM'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_metadata(self, stream, ftype):
 | 
				
			||||||
 | 
					        from calibre.ebooks.chm.metadata import get_metadata
 | 
				
			||||||
 | 
					        return get_metadata(stream)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class EPUBMetadataReader(MetadataReaderPlugin):
 | 
					class EPUBMetadataReader(MetadataReaderPlugin):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    name        = 'Read EPUB metadata'
 | 
					    name        = 'Read EPUB metadata'
 | 
				
			||||||
@ -394,7 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
 | 
				
			|||||||
from calibre.ebooks.tcr.input import TCRInput
 | 
					from calibre.ebooks.tcr.input import TCRInput
 | 
				
			||||||
from calibre.ebooks.txt.input import TXTInput
 | 
					from calibre.ebooks.txt.input import TXTInput
 | 
				
			||||||
from calibre.ebooks.lrf.input import LRFInput
 | 
					from calibre.ebooks.lrf.input import LRFInput
 | 
				
			||||||
from calibre.ebooks.chm.input import CHMInput # CHM MODIFIED
 | 
					from calibre.ebooks.chm.input import CHMInput
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.ebooks.epub.output import EPUBOutput
 | 
					from calibre.ebooks.epub.output import EPUBOutput
 | 
				
			||||||
from calibre.ebooks.fb2.output import FB2Output
 | 
					from calibre.ebooks.fb2.output import FB2Output
 | 
				
			||||||
@ -418,7 +419,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
 | 
				
			|||||||
from calibre.devices.cybook.driver import CYBOOK
 | 
					from calibre.devices.cybook.driver import CYBOOK
 | 
				
			||||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
 | 
					from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
 | 
				
			||||||
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
 | 
					                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
 | 
				
			||||||
                BOOQ
 | 
					                BOOQ, ELONEX
 | 
				
			||||||
from calibre.devices.iliad.driver import ILIAD
 | 
					from calibre.devices.iliad.driver import ILIAD
 | 
				
			||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 | 
					from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 | 
				
			||||||
from calibre.devices.jetbook.driver import JETBOOK
 | 
					from calibre.devices.jetbook.driver import JETBOOK
 | 
				
			||||||
@ -433,6 +434,7 @@ from calibre.devices.nuut2.driver import NUUT2
 | 
				
			|||||||
from calibre.devices.iriver.driver import IRIVER_STORY
 | 
					from calibre.devices.iriver.driver import IRIVER_STORY
 | 
				
			||||||
from calibre.devices.binatone.driver import README
 | 
					from calibre.devices.binatone.driver import README
 | 
				
			||||||
from calibre.devices.hanvon.driver import N516, EB511
 | 
					from calibre.devices.hanvon.driver import N516, EB511
 | 
				
			||||||
 | 
					from calibre.devices.teclast.driver import TECLAST_K3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 | 
					from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 | 
				
			||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
 | 
					from calibre.library.catalog import CSV_XML, EPUB_MOBI
 | 
				
			||||||
@ -454,7 +456,7 @@ plugins += [
 | 
				
			|||||||
    TCRInput,
 | 
					    TCRInput,
 | 
				
			||||||
    TXTInput,
 | 
					    TXTInput,
 | 
				
			||||||
    LRFInput,
 | 
					    LRFInput,
 | 
				
			||||||
    CHMInput, # CHM MODIFIED
 | 
					    CHMInput,
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
plugins += [
 | 
					plugins += [
 | 
				
			||||||
    EPUBOutput,
 | 
					    EPUBOutput,
 | 
				
			||||||
@ -508,6 +510,8 @@ plugins += [
 | 
				
			|||||||
    README,
 | 
					    README,
 | 
				
			||||||
    N516,
 | 
					    N516,
 | 
				
			||||||
    EB511,
 | 
					    EB511,
 | 
				
			||||||
 | 
					    ELONEX,
 | 
				
			||||||
 | 
					    TECLAST_K3
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
 | 
					plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
 | 
				
			||||||
                                        x.__name__.endswith('MetadataReader')]
 | 
					                                        x.__name__.endswith('MetadataReader')]
 | 
				
			||||||
 | 
				
			|||||||
@ -214,8 +214,21 @@ class InputFormatPlugin(Plugin):
 | 
				
			|||||||
        return ret
 | 
					        return ret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def postprocess_book(self, oeb, opts, log):
 | 
					    def postprocess_book(self, oeb, opts, log):
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        Called to allow the input plugin to perform postprocessing after
 | 
				
			||||||
 | 
					        the book has been parsed.
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def specialize(self, oeb, opts, log, output_fmt):
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        Called to allow the input plugin to specialize the parsed book
 | 
				
			||||||
 | 
					        for a particular output format. Called after postprocess_book
 | 
				
			||||||
 | 
					        and before any transforms are performed on the parsed book.
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class OutputFormatPlugin(Plugin):
 | 
					class OutputFormatPlugin(Plugin):
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    OutputFormatPlugins are responsible for converting an OEB document
 | 
					    OutputFormatPlugins are responsible for converting an OEB document
 | 
				
			||||||
 | 
				
			|||||||
@ -235,7 +235,7 @@ class SonyReaderOutput(OutputProfile):
 | 
				
			|||||||
    description = _('This profile is intended for the SONY PRS line. '
 | 
					    description = _('This profile is intended for the SONY PRS line. '
 | 
				
			||||||
                    'The 500/505/600/700 etc.')
 | 
					                    'The 500/505/600/700 etc.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    screen_size               = (600, 775)
 | 
					    screen_size               = (590, 775)
 | 
				
			||||||
    dpi                       = 168.451
 | 
					    dpi                       = 168.451
 | 
				
			||||||
    fbase                     = 12
 | 
					    fbase                     = 12
 | 
				
			||||||
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
 | 
					    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
 | 
				
			||||||
 | 
				
			|||||||
@ -235,6 +235,8 @@ def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
 | 
				
			|||||||
        with plugin:
 | 
					        with plugin:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                nfp = plugin.run(path_to_file)
 | 
					                nfp = plugin.run(path_to_file)
 | 
				
			||||||
 | 
					                if not nfp:
 | 
				
			||||||
 | 
					                    nfp = path_to_file
 | 
				
			||||||
            except:
 | 
					            except:
 | 
				
			||||||
                print 'Running file type plugin %s failed with traceback:'%plugin.name
 | 
					                print 'Running file type plugin %s failed with traceback:'%plugin.name
 | 
				
			||||||
                traceback.print_exc()
 | 
					                traceback.print_exc()
 | 
				
			||||||
@ -399,7 +401,7 @@ def initialize_plugins():
 | 
				
			|||||||
                plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
 | 
					                plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
 | 
				
			||||||
            except PluginNotFound:
 | 
					            except PluginNotFound:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
 | 
					            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
 | 
				
			||||||
            _initialized_plugins.append(plugin)
 | 
					            _initialized_plugins.append(plugin)
 | 
				
			||||||
        except:
 | 
					        except:
 | 
				
			||||||
            print 'Failed to initialize plugin...'
 | 
					            print 'Failed to initialize plugin...'
 | 
				
			||||||
 | 
				
			|||||||
@ -23,6 +23,8 @@ Run an embedded python interpreter.
 | 
				
			|||||||
                      help='Debug the specified device driver.')
 | 
					                      help='Debug the specified device driver.')
 | 
				
			||||||
    parser.add_option('-g', '--gui',  default=False, action='store_true',
 | 
					    parser.add_option('-g', '--gui',  default=False, action='store_true',
 | 
				
			||||||
                      help='Run the GUI',)
 | 
					                      help='Run the GUI',)
 | 
				
			||||||
 | 
					    parser.add_option('-w', '--viewer',  default=False, action='store_true',
 | 
				
			||||||
 | 
					                      help='Run the ebook viewer',)
 | 
				
			||||||
    parser.add_option('--paths', default=False, action='store_true',
 | 
					    parser.add_option('--paths', default=False, action='store_true',
 | 
				
			||||||
            help='Output the paths necessary to setup the calibre environment')
 | 
					            help='Output the paths necessary to setup the calibre environment')
 | 
				
			||||||
    parser.add_option('--migrate', action='store_true', default=False,
 | 
					    parser.add_option('--migrate', action='store_true', default=False,
 | 
				
			||||||
@ -98,6 +100,12 @@ def main(args=sys.argv):
 | 
				
			|||||||
    if opts.gui:
 | 
					    if opts.gui:
 | 
				
			||||||
        from calibre.gui2.main import main
 | 
					        from calibre.gui2.main import main
 | 
				
			||||||
        main(['calibre'])
 | 
					        main(['calibre'])
 | 
				
			||||||
 | 
					    elif opts.viewer:
 | 
				
			||||||
 | 
					        from calibre.gui2.viewer.main import main
 | 
				
			||||||
 | 
					        vargs = ['ebook-viewer', '--debug-javascript']
 | 
				
			||||||
 | 
					        if len(args) > 1:
 | 
				
			||||||
 | 
					            vargs.append(args[-1])
 | 
				
			||||||
 | 
					        main(vargs)
 | 
				
			||||||
    elif opts.command:
 | 
					    elif opts.command:
 | 
				
			||||||
        sys.argv = args[:1]
 | 
					        sys.argv = args[:1]
 | 
				
			||||||
        exec opts.command
 | 
					        exec opts.command
 | 
				
			||||||
 | 
				
			|||||||
@ -60,8 +60,10 @@ def debug(ioreg_to_tmp=False, buf=None):
 | 
				
			|||||||
        if isosx:
 | 
					        if isosx:
 | 
				
			||||||
            from calibre.devices.usbms.device import Device
 | 
					            from calibre.devices.usbms.device import Device
 | 
				
			||||||
            mount = repr(Device.osx_run_mount())
 | 
					            mount = repr(Device.osx_run_mount())
 | 
				
			||||||
            ioreg = Device.run_ioreg()
 | 
					            drives = pprint.pformat(Device.osx_get_usb_drives())
 | 
				
			||||||
            ioreg = 'Output from mount:\n\n'+mount+'\n\n'+ioreg
 | 
					            ioreg = 'Output from mount:\n'+mount+'\n\n'
 | 
				
			||||||
 | 
					            ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
 | 
				
			||||||
 | 
					            ioreg += Device.run_ioreg()
 | 
				
			||||||
        connected_devices = []
 | 
					        connected_devices = []
 | 
				
			||||||
        for dev in device_plugins():
 | 
					        for dev in device_plugins():
 | 
				
			||||||
            out('Looking for', dev.__class__.__name__)
 | 
					            out('Looking for', dev.__class__.__name__)
 | 
				
			||||||
 | 
				
			|||||||
@ -15,7 +15,7 @@ class ANDROID(USBMS):
 | 
				
			|||||||
    supported_platforms = ['windows', 'osx', 'linux']
 | 
					    supported_platforms = ['windows', 'osx', 'linux']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Ordered list of supported formats
 | 
					    # Ordered list of supported formats
 | 
				
			||||||
    FORMATS     = ['epub']
 | 
					    FORMATS     = ['epub', 'pdf']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VENDOR_ID   = {
 | 
					    VENDOR_ID   = {
 | 
				
			||||||
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
 | 
					            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
 | 
				
			||||||
 | 
				
			|||||||
@ -195,3 +195,15 @@ class BOOQ(EB600):
 | 
				
			|||||||
    WINDOWS_MAIN_MEM = 'EB600'
 | 
					    WINDOWS_MAIN_MEM = 'EB600'
 | 
				
			||||||
    WINDOWS_CARD_A_MEM = 'EB600'
 | 
					    WINDOWS_CARD_A_MEM = 'EB600'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ELONEX(EB600):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    name = 'Elonex 600EB'
 | 
				
			||||||
 | 
					    gui_name = 'Elonex'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    FORMATS = ['epub', 'pdf', 'txt', 'html']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    VENDOR_NAME = 'ELONEX'
 | 
				
			||||||
 | 
					    WINDOWS_MAIN_MEM = 'EBOOK'
 | 
				
			||||||
 | 
					    WINDOWS_CARD_A_MEM = 'EBOOK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -6,6 +6,7 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
 | 
				
			|||||||
a backend that implement the Device interface for the SONY PRS500 Reader.
 | 
					a backend that implement the Device interface for the SONY PRS500 Reader.
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					from collections import namedtuple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.customize import Plugin
 | 
					from calibre.customize import Plugin
 | 
				
			||||||
from calibre.constants import iswindows
 | 
					from calibre.constants import iswindows
 | 
				
			||||||
@ -43,6 +44,9 @@ class DevicePlugin(Plugin):
 | 
				
			|||||||
    #: Icon for this device
 | 
					    #: Icon for this device
 | 
				
			||||||
    icon = I('reader.svg')
 | 
					    icon = I('reader.svg')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
 | 
				
			||||||
 | 
					    UserAnnotation = namedtuple('Annotation','type, bookmark')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def get_gui_name(cls):
 | 
					    def get_gui_name(cls):
 | 
				
			||||||
        if hasattr(cls, 'gui_name'):
 | 
					        if hasattr(cls, 'gui_name'):
 | 
				
			||||||
 | 
				
			|||||||
@ -7,10 +7,9 @@ __docformat__ = 'restructuredtext en'
 | 
				
			|||||||
'''
 | 
					'''
 | 
				
			||||||
Device driver for Amazon's Kindle
 | 
					Device driver for Amazon's Kindle
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					import os, re, sys
 | 
				
			||||||
import os
 | 
					from cStringIO import StringIO
 | 
				
			||||||
import re
 | 
					from struct import unpack
 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.devices.usbms.driver import USBMS
 | 
					from calibre.devices.usbms.driver import USBMS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -44,6 +43,7 @@ class KINDLE(USBMS):
 | 
				
			|||||||
    EBOOK_DIR_CARD_A = 'documents'
 | 
					    EBOOK_DIR_CARD_A = 'documents'
 | 
				
			||||||
    DELETE_EXTS = ['.mbp']
 | 
					    DELETE_EXTS = ['.mbp']
 | 
				
			||||||
    SUPPORTS_SUB_DIRS = True
 | 
					    SUPPORTS_SUB_DIRS = True
 | 
				
			||||||
 | 
					    SUPPORTS_ANNOTATIONS = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    WIRELESS_FILE_NAME_PATTERN = re.compile(
 | 
					    WIRELESS_FILE_NAME_PATTERN = re.compile(
 | 
				
			||||||
    r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
 | 
					    r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
 | 
				
			||||||
@ -60,6 +60,73 @@ class KINDLE(USBMS):
 | 
				
			|||||||
                                               'replace')
 | 
					                                               'replace')
 | 
				
			||||||
        return mi
 | 
					        return mi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_annotations(self, path_map):
 | 
				
			||||||
 | 
					        MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt']
 | 
				
			||||||
 | 
					        TAN_FORMATS = [u'tpz', u'azw1']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mbp_formats = set()
 | 
				
			||||||
 | 
					        for fmt in MBP_FORMATS:
 | 
				
			||||||
 | 
					            mbp_formats.add(fmt)
 | 
				
			||||||
 | 
					        tan_formats = set()
 | 
				
			||||||
 | 
					        for fmt in TAN_FORMATS:
 | 
				
			||||||
 | 
					            tan_formats.add(fmt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def get_storage():
 | 
				
			||||||
 | 
					            storage = []
 | 
				
			||||||
 | 
					            if self._main_prefix:
 | 
				
			||||||
 | 
					                storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
 | 
				
			||||||
 | 
					            if self._card_a_prefix:
 | 
				
			||||||
 | 
					                storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
 | 
				
			||||||
 | 
					            if self._card_b_prefix:
 | 
				
			||||||
 | 
					                storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
 | 
				
			||||||
 | 
					            return storage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def resolve_bookmark_paths(storage, path_map):
 | 
				
			||||||
 | 
					            pop_list = []
 | 
				
			||||||
 | 
					            book_ext = {}
 | 
				
			||||||
 | 
					            for id in path_map:
 | 
				
			||||||
 | 
					                file_fmts = set()
 | 
				
			||||||
 | 
					                for fmt in path_map[id]['fmts']:
 | 
				
			||||||
 | 
					                    file_fmts.add(fmt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                bookmark_extension = None
 | 
				
			||||||
 | 
					                if file_fmts.intersection(mbp_formats):
 | 
				
			||||||
 | 
					                    book_extension = list(file_fmts.intersection(mbp_formats))[0]
 | 
				
			||||||
 | 
					                    bookmark_extension = 'mbp'
 | 
				
			||||||
 | 
					                elif file_fmts.intersection(tan_formats):
 | 
				
			||||||
 | 
					                    book_extension = list(file_fmts.intersection(tan_formats))[0]
 | 
				
			||||||
 | 
					                    bookmark_extension = 'tan'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if bookmark_extension:
 | 
				
			||||||
 | 
					                    for vol in storage:
 | 
				
			||||||
 | 
					                        bkmk_path = path_map[id]['path'].replace(os.path.abspath('/<storage>'),vol)
 | 
				
			||||||
 | 
					                        bkmk_path = bkmk_path.replace('bookmark',bookmark_extension)
 | 
				
			||||||
 | 
					                        if os.path.exists(bkmk_path):
 | 
				
			||||||
 | 
					                            path_map[id] = bkmk_path
 | 
				
			||||||
 | 
					                            book_ext[id] = book_extension
 | 
				
			||||||
 | 
					                            break
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        pop_list.append(id)
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    pop_list.append(id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Remove non-existent bookmark templates
 | 
				
			||||||
 | 
					            for id in pop_list:
 | 
				
			||||||
 | 
					                path_map.pop(id)
 | 
				
			||||||
 | 
					            return path_map, book_ext
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        storage = get_storage()
 | 
				
			||||||
 | 
					        path_map, book_ext = resolve_bookmark_paths(storage, path_map)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        bookmarked_books = {}
 | 
				
			||||||
 | 
					        for id in path_map:
 | 
				
			||||||
 | 
					            bookmark_ext = path_map[id].rpartition('.')[2]
 | 
				
			||||||
 | 
					            myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
 | 
				
			||||||
 | 
					            bookmarked_books[id] = self.UserAnnotation(type='kindle', bookmark=myBookmark)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
 | 
				
			||||||
 | 
					        return bookmarked_books
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class KINDLE2(KINDLE):
 | 
					class KINDLE2(KINDLE):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -79,3 +146,213 @@ class KINDLE_DX(KINDLE2):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    PRODUCT_ID = [0x0003]
 | 
					    PRODUCT_ID = [0x0003]
 | 
				
			||||||
    BCD        = [0x0100]
 | 
					    BCD        = [0x0100]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Bookmark():
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    A simple class fetching bookmark data
 | 
				
			||||||
 | 
					    Kindle-specific
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    def __init__(self, path, id, book_format, bookmark_extension):
 | 
				
			||||||
 | 
					        self.book_format = book_format
 | 
				
			||||||
 | 
					        self.bookmark_extension = bookmark_extension
 | 
				
			||||||
 | 
					        self.book_length = 0
 | 
				
			||||||
 | 
					        self.id = id
 | 
				
			||||||
 | 
					        self.last_read = 0
 | 
				
			||||||
 | 
					        self.last_read_location = 0
 | 
				
			||||||
 | 
					        self.timestamp = 0
 | 
				
			||||||
 | 
					        self.user_notes = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.get_bookmark_data(path)
 | 
				
			||||||
 | 
					        self.get_book_length(path)
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            self.percent_read = float(100*self.last_read / self.book_length)
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            self.percent_read = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def record(self, n):
 | 
				
			||||||
 | 
					        from calibre.ebooks.metadata.mobi import StreamSlicer
 | 
				
			||||||
 | 
					        if n >= self.nrecs:
 | 
				
			||||||
 | 
					            raise ValueError('non-existent record %r' % n)
 | 
				
			||||||
 | 
					        offoff = 78 + (8 * n)
 | 
				
			||||||
 | 
					        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
 | 
				
			||||||
 | 
					        stop = None
 | 
				
			||||||
 | 
					        if n < (self.nrecs - 1):
 | 
				
			||||||
 | 
					            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
 | 
				
			||||||
 | 
					        return StreamSlicer(self.stream, start, stop)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_bookmark_data(self, path):
 | 
				
			||||||
 | 
					        ''' Return the timestamp and last_read_location '''
 | 
				
			||||||
 | 
					        from calibre.ebooks.metadata.mobi import StreamSlicer
 | 
				
			||||||
 | 
					        user_notes = {}
 | 
				
			||||||
 | 
					        if self.bookmark_extension == 'mbp':
 | 
				
			||||||
 | 
					            MAGIC_MOBI_CONSTANT = 150
 | 
				
			||||||
 | 
					            with open(path,'rb') as f:
 | 
				
			||||||
 | 
					                stream = StringIO(f.read())
 | 
				
			||||||
 | 
					                data = StreamSlicer(stream)
 | 
				
			||||||
 | 
					                self.timestamp, = unpack('>I', data[0x24:0x28])
 | 
				
			||||||
 | 
					                bpar_offset, = unpack('>I', data[0x4e:0x52])
 | 
				
			||||||
 | 
					                lrlo = bpar_offset + 0x0c
 | 
				
			||||||
 | 
					                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
 | 
				
			||||||
 | 
					                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
 | 
				
			||||||
 | 
					                entries, = unpack('>I', data[0x4a:0x4e])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                # Store the annotations/locations
 | 
				
			||||||
 | 
					                bpl = bpar_offset + 4
 | 
				
			||||||
 | 
					                bpar_len, = unpack('>I', data[bpl:bpl+4])
 | 
				
			||||||
 | 
					                bpar_len += 8
 | 
				
			||||||
 | 
					                #print "bpar_len: 0x%x" % bpar_len
 | 
				
			||||||
 | 
					                eo = bpar_offset + bpar_len
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                # Walk bookmark entries
 | 
				
			||||||
 | 
					                #print " --- %s --- " % path
 | 
				
			||||||
 | 
					                current_entry = 1
 | 
				
			||||||
 | 
					                sig = data[eo:eo+4]
 | 
				
			||||||
 | 
					                previous_block = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                while sig == 'DATA':
 | 
				
			||||||
 | 
					                    text = None
 | 
				
			||||||
 | 
					                    entry_type = None
 | 
				
			||||||
 | 
					                    rec_len, = unpack('>I', data[eo+4:eo+8])
 | 
				
			||||||
 | 
					                    if rec_len == 0:
 | 
				
			||||||
 | 
					                        current_block = "empty_data"
 | 
				
			||||||
 | 
					                    elif  data[eo+8:eo+12] == "EBAR":
 | 
				
			||||||
 | 
					                        current_block = "data_header"
 | 
				
			||||||
 | 
					                        #entry_type = "data_header"
 | 
				
			||||||
 | 
					                        location, = unpack('>I', data[eo+0x34:eo+0x38])
 | 
				
			||||||
 | 
					                        #print "data_header location: %d" % location
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        current_block = "text_block"
 | 
				
			||||||
 | 
					                        if previous_block == 'empty_data':
 | 
				
			||||||
 | 
					                            entry_type = 'Note'
 | 
				
			||||||
 | 
					                        elif previous_block == 'data_header':
 | 
				
			||||||
 | 
					                            entry_type = 'Highlight'
 | 
				
			||||||
 | 
					                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if entry_type:
 | 
				
			||||||
 | 
					                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
 | 
				
			||||||
 | 
					                        user_notes[location] = dict(id=self.id,
 | 
				
			||||||
 | 
					                                                    displayed_location=displayed_location,
 | 
				
			||||||
 | 
					                                                    type=entry_type,
 | 
				
			||||||
 | 
					                                                    text=text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    eo += rec_len + 8
 | 
				
			||||||
 | 
					                    current_entry += 1
 | 
				
			||||||
 | 
					                    previous_block = current_block
 | 
				
			||||||
 | 
					                    sig = data[eo:eo+4]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                while sig == 'BKMK':
 | 
				
			||||||
 | 
					                    # Fix start location for Highlights using BKMK data
 | 
				
			||||||
 | 
					                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
 | 
				
			||||||
 | 
					                    if end_loc in user_notes and user_notes[end_loc]['type'] == 'Highlight':
 | 
				
			||||||
 | 
					                        start, = unpack('>I', data[eo+8:eo+12])
 | 
				
			||||||
 | 
					                        user_notes[start] = user_notes[end_loc]
 | 
				
			||||||
 | 
					                        user_notes.pop(end_loc)
 | 
				
			||||||
 | 
					                    elif end_loc in user_notes and user_notes[end_loc]['type'] == 'Note':
 | 
				
			||||||
 | 
					                        # Skip duplicate bookmarks for notes
 | 
				
			||||||
 | 
					                        pass
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        # If a bookmark coincides with a user annotation, the locs could
 | 
				
			||||||
 | 
					                        # be the same - cheat by nudging -1
 | 
				
			||||||
 | 
					                        # Skip bookmark for last_read_location
 | 
				
			||||||
 | 
					                        if end_loc != self.last_read:
 | 
				
			||||||
 | 
					                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
 | 
				
			||||||
 | 
					                            user_notes[end_loc - 1] = dict(id=self.id,
 | 
				
			||||||
 | 
					                                                           displayed_location=displayed_location,
 | 
				
			||||||
 | 
					                                                           type='Bookmark',
 | 
				
			||||||
 | 
					                                                           text=None)
 | 
				
			||||||
 | 
					                    rec_len, = unpack('>I', data[eo+4:eo+8])
 | 
				
			||||||
 | 
					                    eo += rec_len + 8
 | 
				
			||||||
 | 
					                    sig = data[eo:eo+4]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        elif self.bookmark_extension == 'tan':
 | 
				
			||||||
 | 
					            # TAN bookmarks
 | 
				
			||||||
 | 
					            MAGIC_TOPAZ_CONSTANT = 33.33
 | 
				
			||||||
 | 
					            self.timestamp = os.path.getmtime(path)
 | 
				
			||||||
 | 
					            with open(path,'rb') as f:
 | 
				
			||||||
 | 
					                stream = StringIO(f.read())
 | 
				
			||||||
 | 
					                data = StreamSlicer(stream)
 | 
				
			||||||
 | 
					                self.last_read = int(unpack('>I', data[5:9])[0])
 | 
				
			||||||
 | 
					                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
 | 
				
			||||||
 | 
					                entries, = unpack('>I', data[9:13])
 | 
				
			||||||
 | 
					                current_entry = 0
 | 
				
			||||||
 | 
					                e_base = 0x0d
 | 
				
			||||||
 | 
					                while current_entry < entries:
 | 
				
			||||||
 | 
					                    location, = unpack('>I', data[e_base+2:e_base+6])
 | 
				
			||||||
 | 
					                    text = None
 | 
				
			||||||
 | 
					                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
 | 
				
			||||||
 | 
					                    e_type, = unpack('>B', data[e_base+1])
 | 
				
			||||||
 | 
					                    if e_type == 0:
 | 
				
			||||||
 | 
					                        e_type = 'Bookmark'
 | 
				
			||||||
 | 
					                    elif e_type == 1:
 | 
				
			||||||
 | 
					                        e_type = 'Highlight'
 | 
				
			||||||
 | 
					                        text = "(Topaz highlights not yet supported)"
 | 
				
			||||||
 | 
					                    elif e_type == 2:
 | 
				
			||||||
 | 
					                        e_type = 'Note'
 | 
				
			||||||
 | 
					                        text = data[e_base+0x10:e_base+0x10+text_len]
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        e_type = 'Unknown annotation type'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if self.book_format in ['tpz','azw1']:
 | 
				
			||||||
 | 
					                        # *** This needs fine-tuning
 | 
				
			||||||
 | 
					                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
 | 
				
			||||||
 | 
					                    elif self.book_format == 'pdf':
 | 
				
			||||||
 | 
					                        # *** This needs testing
 | 
				
			||||||
 | 
					                        displayed_location = location
 | 
				
			||||||
 | 
					                    user_notes[location] = dict(id=self.id,
 | 
				
			||||||
 | 
					                                                displayed_location=displayed_location,
 | 
				
			||||||
 | 
					                                                type=e_type,
 | 
				
			||||||
 | 
					                                                text=text)
 | 
				
			||||||
 | 
					                    if text_len == 0xFFFFFFFF:
 | 
				
			||||||
 | 
					                        e_base = e_base + 14
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        e_base = e_base + 14 + 2 + text_len
 | 
				
			||||||
 | 
					                    current_entry += 1
 | 
				
			||||||
 | 
					                for location in user_notes:
 | 
				
			||||||
 | 
					                    if location == self.last_read:
 | 
				
			||||||
 | 
					                        user_notes.pop(location)
 | 
				
			||||||
 | 
					                        break
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            print "unsupported bookmark_extension: %s" % self.bookmark_extension
 | 
				
			||||||
 | 
					        self.user_notes = user_notes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        for location in sorted(user_notes):
 | 
				
			||||||
 | 
					            print '  Location %d: %s\n%s' % (user_notes[location]['displayed_location'],
 | 
				
			||||||
 | 
					                                                     user_notes[location]['type'],
 | 
				
			||||||
 | 
					                                    '\n'.join(self.textdump(user_notes[location]['text'])))
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_book_length(self, path):
 | 
				
			||||||
 | 
					        from calibre.ebooks.metadata.mobi import StreamSlicer
 | 
				
			||||||
 | 
					        book_fs = path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.book_length = 0
 | 
				
			||||||
 | 
					        if self.bookmark_extension == 'mbp':
 | 
				
			||||||
 | 
					            # Read the book len from the header
 | 
				
			||||||
 | 
					            with open(book_fs,'rb') as f:
 | 
				
			||||||
 | 
					                self.stream = StringIO(f.read())
 | 
				
			||||||
 | 
					                self.data = StreamSlicer(self.stream)
 | 
				
			||||||
 | 
					                self.nrecs, = unpack('>H', self.data[76:78])
 | 
				
			||||||
 | 
					                record0 = self.record(0)
 | 
				
			||||||
 | 
					                self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
 | 
				
			||||||
 | 
					        elif self.bookmark_extension == 'tan':
 | 
				
			||||||
 | 
					            # Read bookLength from metadata
 | 
				
			||||||
 | 
					            with open(book_fs,'rb') as f:
 | 
				
			||||||
 | 
					                stream = StringIO(f.read())
 | 
				
			||||||
 | 
					                raw = stream.read(8*1024)
 | 
				
			||||||
 | 
					                if not raw.startswith('TPZ'):
 | 
				
			||||||
 | 
					                    raise ValueError('Not a Topaz file')
 | 
				
			||||||
 | 
					                first = raw.find('metadata')
 | 
				
			||||||
 | 
					                if first < 0:
 | 
				
			||||||
 | 
					                    raise ValueError('Invalid Topaz file')
 | 
				
			||||||
 | 
					                second = raw.find('metadata', first+10)
 | 
				
			||||||
 | 
					                if second < 0:
 | 
				
			||||||
 | 
					                    raise ValueError('Invalid Topaz file')
 | 
				
			||||||
 | 
					                raw = raw[second:second+1000]
 | 
				
			||||||
 | 
					                idx = raw.find('bookLength')
 | 
				
			||||||
 | 
					                if idx > -1:
 | 
				
			||||||
 | 
					                    length = ord(raw[idx+len('bookLength')])
 | 
				
			||||||
 | 
					                    self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            print "unsupported bookmark_extension: %s" % self.bookmark_extension
 | 
				
			||||||
 | 
				
			|||||||
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
 | 
				
			|||||||
                   c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
 | 
					                   c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
 | 
				
			||||||
from errno import EBUSY, ENOMEM
 | 
					from errno import EBUSY, ENOMEM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre import iswindows, isosx, load_library
 | 
					from calibre import iswindows, isosx, isfreebsd, load_library
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_libusb_name = 'libusb'
 | 
					_libusb_name = 'libusb'
 | 
				
			||||||
PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
 | 
					PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
 | 
				
			||||||
if iswindows:
 | 
					if iswindows:
 | 
				
			||||||
    class Structure(_Structure):
 | 
					    class Structure(_Structure):
 | 
				
			||||||
        _pack_ = 1
 | 
					        _pack_ = 1
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										10
									
								
								src/calibre/devices/teclast/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/calibre/devices/teclast/__init__.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,10 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 | 
				
			||||||
 | 
					from __future__ import with_statement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
				
			||||||
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										42
									
								
								src/calibre/devices/teclast/driver.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/calibre/devices/teclast/driver.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,42 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
				
			||||||
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.devices.usbms.driver import USBMS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TECLAST_K3(USBMS):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    name           = 'Teclast K3 Device Interface'
 | 
				
			||||||
 | 
					    gui_name       = 'K3'
 | 
				
			||||||
 | 
					    description    = _('Communicate with the Teclast K3 reader.')
 | 
				
			||||||
 | 
					    author         = 'Kovid Goyal'
 | 
				
			||||||
 | 
					    supported_platforms = ['windows', 'osx', 'linux']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Ordered list of supported formats
 | 
				
			||||||
 | 
					    FORMATS     = ['epub', 'fb2', 'doc', 'pdf', 'txt']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    VENDOR_ID   = [0x071b]
 | 
				
			||||||
 | 
					    PRODUCT_ID  = [0x3203]
 | 
				
			||||||
 | 
					    BCD         = [0x0000]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    VENDOR_NAME      = 'TECLAST'
 | 
				
			||||||
 | 
					    WINDOWS_MAIN_MEM = 'DIGITAL_PLAYER'
 | 
				
			||||||
 | 
					    WINDOWS_CARD_A_MEM = 'DIGITAL_PLAYER'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MAIN_MEMORY_VOLUME_LABEL  = 'K3 Main Memory'
 | 
				
			||||||
 | 
					    STORAGE_CARD_VOLUME_LABEL = 'K3 Storage Card'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    EBOOK_DIR_MAIN = ''
 | 
				
			||||||
 | 
					    EBOOK_DIR_CARD_A = ''
 | 
				
			||||||
 | 
					    SUPPORTS_SUB_DIRS = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def windows_sort_drives(self, drives):
 | 
				
			||||||
 | 
					        main = drives.get('main', None)
 | 
				
			||||||
 | 
					        card = drives.get('carda', None)
 | 
				
			||||||
 | 
					        if card and main and card < main:
 | 
				
			||||||
 | 
					            drives['main'] = card
 | 
				
			||||||
 | 
					            drives['carda'] = main
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return drives
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -4,8 +4,7 @@ __license__ = 'GPL 3'
 | 
				
			|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
 | 
					__copyright__ = '2009, John Schember <john@nachtimwald.com>'
 | 
				
			||||||
__docformat__ = 'restructuredtext en'
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os, shutil, time
 | 
				
			||||||
import shutil
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.devices.errors import PathError
 | 
					from calibre.devices.errors import PathError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -50,11 +49,12 @@ class CLI(object):
 | 
				
			|||||||
        d = os.path.dirname(path)
 | 
					        d = os.path.dirname(path)
 | 
				
			||||||
        if not os.path.exists(d):
 | 
					        if not os.path.exists(d):
 | 
				
			||||||
            os.makedirs(d)
 | 
					            os.makedirs(d)
 | 
				
			||||||
        with open(path, 'wb') as dest:
 | 
					        with open(path, 'w+b') as dest:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                shutil.copyfileobj(infile, dest)
 | 
					                shutil.copyfileobj(infile, dest)
 | 
				
			||||||
            except IOError:
 | 
					            except IOError:
 | 
				
			||||||
                print 'WARNING: First attempt to send file to device failed'
 | 
					                print 'WARNING: First attempt to send file to device failed'
 | 
				
			||||||
 | 
					                time.sleep(0.2)
 | 
				
			||||||
                infile.seek(0)
 | 
					                infile.seek(0)
 | 
				
			||||||
                dest.seek(0)
 | 
					                dest.seek(0)
 | 
				
			||||||
                dest.truncate()
 | 
					                dest.truncate()
 | 
				
			||||||
 | 
				
			|||||||
@ -17,6 +17,7 @@ import time
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import glob
 | 
					import glob
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from itertools import repeat
 | 
					from itertools import repeat
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.devices.interface import DevicePlugin
 | 
					from calibre.devices.interface import DevicePlugin
 | 
				
			||||||
@ -333,10 +334,14 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
                    raise
 | 
					                    raise
 | 
				
			||||||
            time.sleep(2)
 | 
					            time.sleep(2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _osx_bsd_names(self):
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def osx_get_usb_drives(cls):
 | 
				
			||||||
        if usbobserver_err:
 | 
					        if usbobserver_err:
 | 
				
			||||||
            raise RuntimeError('Failed to load usbobserver: '+usbobserver_err)
 | 
					            raise RuntimeError('Failed to load usbobserver: '+usbobserver_err)
 | 
				
			||||||
        drives = usbobserver.get_usb_drives()
 | 
					        return usbobserver.get_usb_drives()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _osx_bsd_names(self):
 | 
				
			||||||
 | 
					        drives = self.osx_get_usb_drives()
 | 
				
			||||||
        matches = []
 | 
					        matches = []
 | 
				
			||||||
        d = self.detected_device
 | 
					        d = self.detected_device
 | 
				
			||||||
        if d.serial:
 | 
					        if d.serial:
 | 
				
			||||||
@ -394,16 +399,6 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
        if len(matches) > 2:
 | 
					        if len(matches) > 2:
 | 
				
			||||||
            drives['cardb'] = matches[2]
 | 
					            drives['cardb'] = matches[2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        pat = self.OSX_MAIN_MEM_VOL_PAT
 | 
					 | 
				
			||||||
        if pat is not None and len(drives) > 1 and 'main' in drives:
 | 
					 | 
				
			||||||
            if pat.search(drives['main']) is None:
 | 
					 | 
				
			||||||
                main = drives['main']
 | 
					 | 
				
			||||||
                for x in ('carda', 'cardb'):
 | 
					 | 
				
			||||||
                    if x in drives and pat.search(drives[x]):
 | 
					 | 
				
			||||||
                        drives['main'] = drives.pop(x)
 | 
					 | 
				
			||||||
                        drives[x] = main
 | 
					 | 
				
			||||||
                        break
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return drives
 | 
					        return drives
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def osx_bsd_names(self):
 | 
					    def osx_bsd_names(self):
 | 
				
			||||||
@ -427,6 +422,16 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
        if drives['main'] is None:
 | 
					        if drives['main'] is None:
 | 
				
			||||||
            print bsd_drives, mount_map, drives
 | 
					            print bsd_drives, mount_map, drives
 | 
				
			||||||
            raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
 | 
					            raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
 | 
				
			||||||
 | 
					        pat = self.OSX_MAIN_MEM_VOL_PAT
 | 
				
			||||||
 | 
					        if pat is not None and len(drives) > 1 and 'main' in drives:
 | 
				
			||||||
 | 
					            if pat.search(drives['main']) is None:
 | 
				
			||||||
 | 
					                main = drives['main']
 | 
				
			||||||
 | 
					                for x in ('carda', 'cardb'):
 | 
				
			||||||
 | 
					                    if x in drives and pat.search(drives[x]):
 | 
				
			||||||
 | 
					                        drives['main'] = drives.pop(x)
 | 
				
			||||||
 | 
					                        drives[x] = main
 | 
				
			||||||
 | 
					                        break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self._main_prefix = drives['main']+os.sep
 | 
					        self._main_prefix = drives['main']+os.sep
 | 
				
			||||||
        def get_card_prefix(c):
 | 
					        def get_card_prefix(c):
 | 
				
			||||||
            ans = drives.get(c, None)
 | 
					            ans = drives.get(c, None)
 | 
				
			||||||
@ -789,7 +794,13 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
        '''
 | 
					        '''
 | 
				
			||||||
        return components
 | 
					        return components
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def create_upload_path(self, path, mdata, fname):
 | 
					    def get_annotations(self, path_map):
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        Resolve path_map to annotation_map of files found on the device
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def create_upload_path(self, path, mdata, fname, create_dirs=True):
 | 
				
			||||||
        path = os.path.abspath(path)
 | 
					        path = os.path.abspath(path)
 | 
				
			||||||
        extra_components = []
 | 
					        extra_components = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -848,7 +859,7 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
        filedir = os.path.dirname(filepath)
 | 
					        filedir = os.path.dirname(filepath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not os.path.exists(filedir):
 | 
					        if create_dirs and not os.path.exists(filedir):
 | 
				
			||||||
            os.makedirs(filedir)
 | 
					            os.makedirs(filedir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return filepath
 | 
					        return filepath
 | 
				
			||||||
 | 
				
			|||||||
@ -123,7 +123,7 @@ class USBMS(CLI, Device):
 | 
				
			|||||||
        '''
 | 
					        '''
 | 
				
			||||||
        :path: the full path were the associated book is located.
 | 
					        :path: the full path were the associated book is located.
 | 
				
			||||||
        :filename: the name of the book file without the extension.
 | 
					        :filename: the name of the book file without the extension.
 | 
				
			||||||
        :metatdata: metadata belonging to the book. Use metadata.thumbnail
 | 
					        :metadata: metadata belonging to the book. Use metadata.thumbnail
 | 
				
			||||||
        for cover
 | 
					        for cover
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
				
			|||||||
@ -129,3 +129,12 @@ def render_html(path_to_html, width=590, height=750):
 | 
				
			|||||||
    del loop
 | 
					    del loop
 | 
				
			||||||
    return renderer
 | 
					    return renderer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_ebook_format(stream, current_guess):
 | 
				
			||||||
 | 
					    ans = current_guess
 | 
				
			||||||
 | 
					    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1'):
 | 
				
			||||||
 | 
					        stream.seek(0)
 | 
				
			||||||
 | 
					        if stream.read(3) == 'TPZ':
 | 
				
			||||||
 | 
					            ans = 'tpz'
 | 
				
			||||||
 | 
					        stream.seek(0)
 | 
				
			||||||
 | 
					    return ans
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -53,13 +53,15 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
 | 
				
			|||||||
                        "x-sjis" : "shift-jis" }
 | 
					                        "x-sjis" : "shift-jis" }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def force_encoding(raw, verbose):
 | 
					def force_encoding(raw, verbose, assume_utf8=False):
 | 
				
			||||||
    from calibre.constants import preferred_encoding
 | 
					    from calibre.constants import preferred_encoding
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        chardet = detect(raw)
 | 
					        chardet = detect(raw)
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        chardet = {'encoding':preferred_encoding, 'confidence':0}
 | 
					        chardet = {'encoding':preferred_encoding, 'confidence':0}
 | 
				
			||||||
    encoding = chardet['encoding']
 | 
					    encoding = chardet['encoding']
 | 
				
			||||||
 | 
					    if chardet['confidence'] < 1 and assume_utf8:
 | 
				
			||||||
 | 
					        encoding = 'utf-8'
 | 
				
			||||||
    if chardet['confidence'] < 1 and verbose:
 | 
					    if chardet['confidence'] < 1 and verbose:
 | 
				
			||||||
        print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
 | 
					        print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
 | 
				
			||||||
    if not encoding:
 | 
					    if not encoding:
 | 
				
			||||||
@ -73,7 +75,7 @@ def force_encoding(raw, verbose):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
 | 
					def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
 | 
				
			||||||
                   resolve_entities=False):
 | 
					                   resolve_entities=False, assume_utf8=False):
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    Force conversion of byte string to unicode. Tries to look for XML/HTML
 | 
					    Force conversion of byte string to unicode. Tries to look for XML/HTML
 | 
				
			||||||
    encoding declaration first, if not found uses the chardet library and
 | 
					    encoding declaration first, if not found uses the chardet library and
 | 
				
			||||||
@ -95,7 +97,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
 | 
				
			|||||||
                encoding = match.group(1)
 | 
					                encoding = match.group(1)
 | 
				
			||||||
                break
 | 
					                break
 | 
				
			||||||
        if encoding is None:
 | 
					        if encoding is None:
 | 
				
			||||||
            encoding = force_encoding(raw, verbose)
 | 
					            encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            if encoding.lower().strip() == 'macintosh':
 | 
					            if encoding.lower().strip() == 'macintosh':
 | 
				
			||||||
                encoding = 'mac-roman'
 | 
					                encoding = 'mac-roman'
 | 
				
			||||||
 | 
				
			|||||||
@ -1,213 +1,17 @@
 | 
				
			|||||||
from __future__ import with_statement
 | 
					 | 
				
			||||||
''' CHM File decoding support '''
 | 
					''' CHM File decoding support '''
 | 
				
			||||||
__license__ = 'GPL v3'
 | 
					__license__ = 'GPL v3'
 | 
				
			||||||
__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
 | 
					__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
 | 
				
			||||||
                 ' and Alex Bramley <a.bramley at gmail.com>.'
 | 
					                 ' and Alex Bramley <a.bramley at gmail.com>.'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os, shutil, uuid, re
 | 
					import os, uuid
 | 
				
			||||||
from tempfile import mkdtemp
 | 
					 | 
				
			||||||
from mimetypes import guess_type as guess_mimetype
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from BeautifulSoup import BeautifulSoup, NavigableString
 | 
					 | 
				
			||||||
from lxml import html
 | 
					from lxml import html
 | 
				
			||||||
from pychm.chm import CHMFile
 | 
					 | 
				
			||||||
from pychm.chmlib import (
 | 
					 | 
				
			||||||
  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
 | 
					 | 
				
			||||||
  chm_enumerate,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 | 
					from calibre.customize.conversion import InputFormatPlugin
 | 
				
			||||||
from calibre.utils.config import OptionParser
 | 
					from calibre.ptempfile import TemporaryDirectory
 | 
				
			||||||
from calibre.ebooks.metadata.toc import TOC
 | 
					 | 
				
			||||||
from calibre.utils.localization import get_lang
 | 
					from calibre.utils.localization import get_lang
 | 
				
			||||||
from calibre.utils.filenames import ascii_filename
 | 
					from calibre.utils.filenames import ascii_filename
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
def match_string(s1, s2_already_lowered):
 | 
					 | 
				
			||||||
    if s1 is not None and s2_already_lowered is not None:
 | 
					 | 
				
			||||||
        if s1.lower()==s2_already_lowered:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
    return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def check_all_prev_empty(tag):
 | 
					 | 
				
			||||||
    if tag is None:
 | 
					 | 
				
			||||||
        return True
 | 
					 | 
				
			||||||
    if tag.__class__ == NavigableString and not check_empty(tag):
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
    return check_all_prev_empty(tag.previousSibling)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def check_empty(s, rex = re.compile(r'\S')):
 | 
					 | 
				
			||||||
    return rex.search(s) is None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def option_parser():
 | 
					 | 
				
			||||||
    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
 | 
					 | 
				
			||||||
    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
 | 
					 | 
				
			||||||
    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
 | 
					 | 
				
			||||||
    parser.add_option("-t", "--title", action="store", type="string", \
 | 
					 | 
				
			||||||
                    dest="title", help=_("Set the book title"))
 | 
					 | 
				
			||||||
    parser.add_option('--title-sort', action='store', type='string', default=None,
 | 
					 | 
				
			||||||
                      dest='title_sort', help=_('Set sort key for the title'))
 | 
					 | 
				
			||||||
    parser.add_option("-a", "--author", action="store", type="string", \
 | 
					 | 
				
			||||||
                    dest="author", help=_("Set the author"))
 | 
					 | 
				
			||||||
    parser.add_option('--author-sort', action='store', type='string', default=None,
 | 
					 | 
				
			||||||
                      dest='author_sort', help=_('Set sort key for the author'))
 | 
					 | 
				
			||||||
    parser.add_option("-c", "--category", action="store", type="string", \
 | 
					 | 
				
			||||||
                    dest="category", help=_("The category this book belongs"
 | 
					 | 
				
			||||||
                    " to. E.g.: History"))
 | 
					 | 
				
			||||||
    parser.add_option("--thumbnail", action="store", type="string", \
 | 
					 | 
				
			||||||
                    dest="thumbnail", help=_("Path to a graphic that will be"
 | 
					 | 
				
			||||||
                    " set as this files' thumbnail"))
 | 
					 | 
				
			||||||
    parser.add_option("--comment", action="store", type="string", \
 | 
					 | 
				
			||||||
                    dest="freetext", help=_("Path to a txt file containing a comment."))
 | 
					 | 
				
			||||||
    parser.add_option("--get-thumbnail", action="store_true", \
 | 
					 | 
				
			||||||
                    dest="get_thumbnail", default=False, \
 | 
					 | 
				
			||||||
                    help=_("Extract thumbnail from LRF file"))
 | 
					 | 
				
			||||||
    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
 | 
					 | 
				
			||||||
    parser.add_option('--classification', default=None, help=_('Set the book classification'))
 | 
					 | 
				
			||||||
    parser.add_option('--creator', default=None, help=_('Set the book creator'))
 | 
					 | 
				
			||||||
    parser.add_option('--producer', default=None, help=_('Set the book producer'))
 | 
					 | 
				
			||||||
    parser.add_option('--get-cover', action='store_true', default=False,
 | 
					 | 
				
			||||||
                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
 | 
					 | 
				
			||||||
    parser.add_option('--bookid', action='store', type='string', default=None,
 | 
					 | 
				
			||||||
                      dest='book_id', help=_('Set book ID'))
 | 
					 | 
				
			||||||
    parser.add_option('--font-delta', action='store', type='int', default=0,
 | 
					 | 
				
			||||||
                      dest='font_delta', help=_('Set font delta'))
 | 
					 | 
				
			||||||
    return parser
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class CHMError(Exception):
 | 
					 | 
				
			||||||
    pass
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class CHMReader(CHMFile):
 | 
					 | 
				
			||||||
    def __init__(self, input, log):
 | 
					 | 
				
			||||||
        CHMFile.__init__(self)
 | 
					 | 
				
			||||||
        if not self.LoadCHM(input):
 | 
					 | 
				
			||||||
            raise CHMError("Unable to open CHM file '%s'"%(input,))
 | 
					 | 
				
			||||||
        self.log = log
 | 
					 | 
				
			||||||
        self._sourcechm = input
 | 
					 | 
				
			||||||
        self._contents = None
 | 
					 | 
				
			||||||
        self._playorder = 0
 | 
					 | 
				
			||||||
        self._metadata = False
 | 
					 | 
				
			||||||
        self._extracted = False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # location of '.hhc' file, which is the CHM TOC.
 | 
					 | 
				
			||||||
        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
 | 
					 | 
				
			||||||
        self.hhc_path = self.root + ".hhc"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _parse_toc(self, ul, basedir=os.getcwdu()):
 | 
					 | 
				
			||||||
        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
 | 
					 | 
				
			||||||
        self._playorder += 1
 | 
					 | 
				
			||||||
        for li in ul('li', recursive=False):
 | 
					 | 
				
			||||||
            href = li.object('param', {'name': 'Local'})[0]['value']
 | 
					 | 
				
			||||||
            if href.count('#'):
 | 
					 | 
				
			||||||
                href, frag = href.split('#')
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                frag = None
 | 
					 | 
				
			||||||
            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
 | 
					 | 
				
			||||||
            #print "========>", name
 | 
					 | 
				
			||||||
            toc.add_item(href, frag, name, play_order=self._playorder)
 | 
					 | 
				
			||||||
            self._playorder += 1
 | 
					 | 
				
			||||||
            if li.ul:
 | 
					 | 
				
			||||||
               child = self._parse_toc(li.ul)
 | 
					 | 
				
			||||||
               child.parent = toc
 | 
					 | 
				
			||||||
               toc.append(child)
 | 
					 | 
				
			||||||
        #print toc
 | 
					 | 
				
			||||||
        return toc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def GetFile(self, path):
 | 
					 | 
				
			||||||
        # have to have abs paths for ResolveObject, but Contents() deliberately
 | 
					 | 
				
			||||||
        # makes them relative. So we don't have to worry, re-add the leading /.
 | 
					 | 
				
			||||||
        # note this path refers to the internal CHM structure
 | 
					 | 
				
			||||||
        if path[0] != '/':
 | 
					 | 
				
			||||||
            path = '/' + path
 | 
					 | 
				
			||||||
        res, ui = self.ResolveObject(path)
 | 
					 | 
				
			||||||
        if res != CHM_RESOLVE_SUCCESS:
 | 
					 | 
				
			||||||
            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
 | 
					 | 
				
			||||||
        size, data = self.RetrieveObject(ui)
 | 
					 | 
				
			||||||
        if size == 0:
 | 
					 | 
				
			||||||
            raise CHMError("'%s' is zero bytes in length!"%(path,))
 | 
					 | 
				
			||||||
        return data
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def ExtractFiles(self, output_dir=os.getcwdu()):
 | 
					 | 
				
			||||||
        for path in self.Contents():
 | 
					 | 
				
			||||||
            lpath = os.path.join(output_dir, path)
 | 
					 | 
				
			||||||
            self._ensure_dir(lpath)
 | 
					 | 
				
			||||||
            data = self.GetFile(path)
 | 
					 | 
				
			||||||
            with open(lpath, 'wb') as f:
 | 
					 | 
				
			||||||
                if guess_mimetype(path)[0] == ('text/html'):
 | 
					 | 
				
			||||||
                    data = self._reformat(data)
 | 
					 | 
				
			||||||
                f.write(data)
 | 
					 | 
				
			||||||
        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
 | 
					 | 
				
			||||||
        self._extracted = True
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _reformat(self, data):
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            soup = BeautifulSoup(data)
 | 
					 | 
				
			||||||
        except UnicodeEncodeError:
 | 
					 | 
				
			||||||
            # hit some strange encoding problems...
 | 
					 | 
				
			||||||
            print "Unable to parse html for cleaning, leaving it :("
 | 
					 | 
				
			||||||
            return data
 | 
					 | 
				
			||||||
        # nuke javascript...
 | 
					 | 
				
			||||||
        [s.extract() for s in soup('script')]
 | 
					 | 
				
			||||||
        # remove forward and back nav bars from the top/bottom of each page
 | 
					 | 
				
			||||||
        # cos they really fuck with the flow of things and generally waste space
 | 
					 | 
				
			||||||
        # since we can't use [a,b] syntax to select arbitrary items from a list
 | 
					 | 
				
			||||||
        # we'll have to do this manually...
 | 
					 | 
				
			||||||
        t = soup('table')
 | 
					 | 
				
			||||||
        if t:
 | 
					 | 
				
			||||||
            if (t[0].previousSibling is None
 | 
					 | 
				
			||||||
              or t[0].previousSibling.previousSibling is None):
 | 
					 | 
				
			||||||
                t[0].extract()
 | 
					 | 
				
			||||||
            if (t[-1].nextSibling is None
 | 
					 | 
				
			||||||
              or t[-1].nextSibling.nextSibling is None):
 | 
					 | 
				
			||||||
                t[-1].extract()
 | 
					 | 
				
			||||||
        # for some very odd reason each page's content appears to be in a table
 | 
					 | 
				
			||||||
        # too. and this table has sub-tables for random asides... grr.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # remove br at top of page if present after nav bars removed
 | 
					 | 
				
			||||||
        br = soup('br')
 | 
					 | 
				
			||||||
        if br:
 | 
					 | 
				
			||||||
            if check_all_prev_empty(br[0].previousSibling):
 | 
					 | 
				
			||||||
                br[0].extract()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # some images seem to be broken in some chm's :/
 | 
					 | 
				
			||||||
        for img in soup('img'):
 | 
					 | 
				
			||||||
            try:
 | 
					 | 
				
			||||||
                # some are supposedly "relative"... lies.
 | 
					 | 
				
			||||||
                while img['src'].startswith('../'): img['src'] = img['src'][3:]
 | 
					 | 
				
			||||||
                # some have ";<junk>" at the end.
 | 
					 | 
				
			||||||
                img['src'] = img['src'].split(';')[0]
 | 
					 | 
				
			||||||
            except KeyError:
 | 
					 | 
				
			||||||
                # and some don't even have a src= ?!
 | 
					 | 
				
			||||||
                pass
 | 
					 | 
				
			||||||
        # now give back some pretty html.
 | 
					 | 
				
			||||||
        return soup.prettify()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def Contents(self):
 | 
					 | 
				
			||||||
        if self._contents is not None:
 | 
					 | 
				
			||||||
            return self._contents
 | 
					 | 
				
			||||||
        paths = []
 | 
					 | 
				
			||||||
        def get_paths(chm, ui, ctx):
 | 
					 | 
				
			||||||
            # skip directories
 | 
					 | 
				
			||||||
            # note this path refers to the internal CHM structure
 | 
					 | 
				
			||||||
            if ui.path[-1] != '/':
 | 
					 | 
				
			||||||
                # and make paths relative
 | 
					 | 
				
			||||||
                paths.append(ui.path.lstrip('/'))
 | 
					 | 
				
			||||||
        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
 | 
					 | 
				
			||||||
        self._contents = paths
 | 
					 | 
				
			||||||
        return self._contents
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _ensure_dir(self, path):
 | 
					 | 
				
			||||||
        dir = os.path.dirname(path)
 | 
					 | 
				
			||||||
        if not os.path.isdir(dir):
 | 
					 | 
				
			||||||
            os.makedirs(dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def extract_content(self, output_dir=os.getcwdu()):
 | 
					 | 
				
			||||||
        self.ExtractFiles(output_dir=output_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class CHMInput(InputFormatPlugin):
 | 
					class CHMInput(InputFormatPlugin):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    name        = 'CHM Input'
 | 
					    name        = 'CHM Input'
 | 
				
			||||||
@ -215,52 +19,49 @@ class CHMInput(InputFormatPlugin):
 | 
				
			|||||||
    description = 'Convert CHM files to OEB'
 | 
					    description = 'Convert CHM files to OEB'
 | 
				
			||||||
    file_types  = set(['chm'])
 | 
					    file_types  = set(['chm'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    options = set([
 | 
					 | 
				
			||||||
        OptionRecommendation(name='dummy_option', recommended_value=False,
 | 
					 | 
				
			||||||
            help=_('dummy option until real options are determined.')),
 | 
					 | 
				
			||||||
    ])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _chmtohtml(self, output_dir, chm_path, no_images, log):
 | 
					    def _chmtohtml(self, output_dir, chm_path, no_images, log):
 | 
				
			||||||
 | 
					        from calibre.ebooks.chm.reader import CHMReader
 | 
				
			||||||
        log.debug('Opening CHM file')
 | 
					        log.debug('Opening CHM file')
 | 
				
			||||||
        rdr = CHMReader(chm_path, log)
 | 
					        rdr = CHMReader(chm_path, log)
 | 
				
			||||||
        log.debug('Extracting CHM to %s' % output_dir)
 | 
					        log.debug('Extracting CHM to %s' % output_dir)
 | 
				
			||||||
        rdr.extract_content(output_dir)
 | 
					        rdr.extract_content(output_dir)
 | 
				
			||||||
 | 
					        self._chm_reader = rdr
 | 
				
			||||||
        return rdr.hhc_path
 | 
					        return rdr.hhc_path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def convert(self, stream, options, file_ext, log, accelerators):
 | 
					    def convert(self, stream, options, file_ext, log, accelerators):
 | 
				
			||||||
        from calibre.ebooks.metadata.chm import get_metadata_
 | 
					        from calibre.ebooks.chm.metadata import get_metadata_from_reader
 | 
				
			||||||
 | 
					        from calibre.customize.ui import plugin_for_input_format
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        log.debug('Processing CHM...')
 | 
					        log.debug('Processing CHM...')
 | 
				
			||||||
        tdir = mkdtemp(prefix='chm2oeb_')
 | 
					        with TemporaryDirectory('_chm2oeb') as tdir:
 | 
				
			||||||
        from calibre.customize.ui import plugin_for_input_format
 | 
					            html_input = plugin_for_input_format('html')
 | 
				
			||||||
        html_input = plugin_for_input_format('html')
 | 
					            for opt in html_input.options:
 | 
				
			||||||
        for opt in html_input.options:
 | 
					                setattr(options, opt.option.name, opt.recommended_value)
 | 
				
			||||||
            setattr(options, opt.option.name, opt.recommended_value)
 | 
					            options.input_encoding = 'utf-8'
 | 
				
			||||||
        options.input_encoding = 'utf-8'
 | 
					            no_images = False #options.no_images
 | 
				
			||||||
        no_images = False #options.no_images
 | 
					            chm_name = stream.name
 | 
				
			||||||
        chm_name = stream.name
 | 
					            #chm_data = stream.read()
 | 
				
			||||||
        #chm_data = stream.read()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #closing stream so CHM can be opened by external library
 | 
					            #closing stream so CHM can be opened by external library
 | 
				
			||||||
        stream.close()
 | 
					            stream.close()
 | 
				
			||||||
        log.debug('tdir=%s' % tdir)
 | 
					            log.debug('tdir=%s' % tdir)
 | 
				
			||||||
        log.debug('stream.name=%s' % stream.name)
 | 
					            log.debug('stream.name=%s' % stream.name)
 | 
				
			||||||
        mainname = self._chmtohtml(tdir, chm_name, no_images, log)
 | 
					            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
 | 
				
			||||||
        mainpath = os.path.join(tdir, mainname)
 | 
					            mainpath = os.path.join(tdir, mainname)
 | 
				
			||||||
 | 
					            #raw_input()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        metadata = get_metadata_(tdir)
 | 
					            metadata = get_metadata_from_reader(self._chm_reader)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        odi = options.debug_pipeline
 | 
					            odi = options.debug_pipeline
 | 
				
			||||||
        options.debug_pipeline = None
 | 
					            options.debug_pipeline = None
 | 
				
			||||||
        # try a custom conversion:
 | 
					            # try a custom conversion:
 | 
				
			||||||
        #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
 | 
					            #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
 | 
				
			||||||
        # try using html converter:
 | 
					            # try using html converter:
 | 
				
			||||||
        htmlpath = self._create_html_root(mainpath, log)
 | 
					            htmlpath = self._create_html_root(mainpath, log)
 | 
				
			||||||
        oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
 | 
					            oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
 | 
				
			||||||
        options.debug_pipeline = odi
 | 
					            options.debug_pipeline = odi
 | 
				
			||||||
        #log.debug('DEBUG: Not removing tempdir %s' % tdir)
 | 
					            #log.debug('DEBUG: Not removing tempdir %s' % tdir)
 | 
				
			||||||
        shutil.rmtree(tdir)
 | 
					 | 
				
			||||||
        return oeb
 | 
					        return oeb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
 | 
					    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
 | 
				
			||||||
@ -369,6 +170,9 @@ class CHMInput(InputFormatPlugin):
 | 
				
			|||||||
        # check that node is a normal node (not a comment, DOCTYPE, etc.)
 | 
					        # check that node is a normal node (not a comment, DOCTYPE, etc.)
 | 
				
			||||||
        # (normal nodes have string tags)
 | 
					        # (normal nodes have string tags)
 | 
				
			||||||
        if isinstance(node.tag, basestring):
 | 
					        if isinstance(node.tag, basestring):
 | 
				
			||||||
 | 
					            from calibre.ebooks.chm.reader import match_string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            chapter_path = None
 | 
				
			||||||
            if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
 | 
					            if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
 | 
				
			||||||
                for child in node:
 | 
					                for child in node:
 | 
				
			||||||
                    if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
 | 
					                    if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										157
									
								
								src/calibre/ebooks/chm/metadata.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										157
									
								
								src/calibre/ebooks/chm/metadata.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,157 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 | 
				
			||||||
 | 
					from __future__ import with_statement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
				
			||||||
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import BeautifulSoup
 | 
				
			||||||
 | 
					from calibre.ebooks.chardet import xml_to_unicode
 | 
				
			||||||
 | 
					from calibre.ebooks.metadata import string_to_authors, MetaInformation
 | 
				
			||||||
 | 
					from calibre.utils.logging import default_log
 | 
				
			||||||
 | 
					from calibre.ptempfile import TemporaryFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _clean(s):
 | 
				
			||||||
 | 
					    return s.replace(u'\u00a0', u' ')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _detag(tag):
 | 
				
			||||||
 | 
					    str = u""
 | 
				
			||||||
 | 
					    for elem in tag:
 | 
				
			||||||
 | 
					        if hasattr(elem, "contents"):
 | 
				
			||||||
 | 
					            str += _detag(elem)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            str += _clean(elem)
 | 
				
			||||||
 | 
					    return str
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _metadata_from_table(soup, searchfor):
 | 
				
			||||||
 | 
					    td = soup.find('td', text=re.compile(searchfor, flags=re.I))
 | 
				
			||||||
 | 
					    if td is None:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    td = td.parent
 | 
				
			||||||
 | 
					    # there appears to be multiple ways of structuring the metadata
 | 
				
			||||||
 | 
					    # on the home page. cue some nasty special-case hacks...
 | 
				
			||||||
 | 
					    if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I):
 | 
				
			||||||
 | 
					        meta = _detag(td.findNextSibling('td'))
 | 
				
			||||||
 | 
					        return re.sub('^:', '', meta).strip()
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        meta = _detag(td)
 | 
				
			||||||
 | 
					        return re.sub(r'^[^:]+:', '', meta).strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _metadata_from_span(soup, searchfor):
 | 
				
			||||||
 | 
					    span = soup.find('span', {'class': re.compile(searchfor, flags=re.I)})
 | 
				
			||||||
 | 
					    if span is None:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					    # this metadata might need some cleaning up still :/
 | 
				
			||||||
 | 
					    return _detag(span.renderContents().strip())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_authors(soup):
 | 
				
			||||||
 | 
					    aut = (_metadata_from_span(soup, r'author')
 | 
				
			||||||
 | 
					        or _metadata_from_table(soup, r'^\s*by\s*:?\s+'))
 | 
				
			||||||
 | 
					    ans = [_('Unknown')]
 | 
				
			||||||
 | 
					    if aut is not None:
 | 
				
			||||||
 | 
					        ans = string_to_authors(aut)
 | 
				
			||||||
 | 
					    return ans
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_publisher(soup):
 | 
				
			||||||
 | 
					    return (_metadata_from_span(soup, 'imprint')
 | 
				
			||||||
 | 
					        or _metadata_from_table(soup, 'publisher'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_isbn(soup):
 | 
				
			||||||
 | 
					    return (_metadata_from_span(soup, 'isbn')
 | 
				
			||||||
 | 
					        or _metadata_from_table(soup, 'isbn'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_comments(soup):
 | 
				
			||||||
 | 
					    date = (_metadata_from_span(soup, 'cwdate')
 | 
				
			||||||
 | 
					        or _metadata_from_table(soup, 'pub date'))
 | 
				
			||||||
 | 
					    pages = ( _metadata_from_span(soup, 'pages')
 | 
				
			||||||
 | 
					        or _metadata_from_table(soup, 'pages'))
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        # date span can have copyright symbols in it...
 | 
				
			||||||
 | 
					        date = date.replace(u'\u00a9', '').strip()
 | 
				
			||||||
 | 
					        # and pages often comes as '(\d+ pages)'
 | 
				
			||||||
 | 
					        pages = re.search(r'\d+', pages).group(0)
 | 
				
			||||||
 | 
					        return u'Published %s, %s pages.' % (date, pages)
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_cover(soup, rdr):
 | 
				
			||||||
 | 
					    ans = None
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        ans = soup.find('img', alt=re.compile('cover', flags=re.I))['src']
 | 
				
			||||||
 | 
					    except TypeError:
 | 
				
			||||||
 | 
					        # meeehh, no handy alt-tag goodness, try some hackery
 | 
				
			||||||
 | 
					        # the basic idea behind this is that in general, the cover image
 | 
				
			||||||
 | 
					        # has a height:width ratio of ~1.25, whereas most of the nav
 | 
				
			||||||
 | 
					        # buttons are decidedly less than that.
 | 
				
			||||||
 | 
					        # what we do in this is work out that ratio, take 1.25 off it and
 | 
				
			||||||
 | 
					        # save the absolute value when we sort by this value, the smallest
 | 
				
			||||||
 | 
					        # one is most likely to be the cover image, hopefully.
 | 
				
			||||||
 | 
					        r = {}
 | 
				
			||||||
 | 
					        for img in soup('img'):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                r[abs(float(img['height'])/float(img['width'])-1.25)] = img['src']
 | 
				
			||||||
 | 
					            except KeyError:
 | 
				
			||||||
 | 
					                # interestingly, occasionally the only image without height
 | 
				
			||||||
 | 
					                # or width attrs is the cover...
 | 
				
			||||||
 | 
					                r[0] = img['src']
 | 
				
			||||||
 | 
					        l = r.keys()
 | 
				
			||||||
 | 
					        l.sort()
 | 
				
			||||||
 | 
					        ans = r[l[0]]
 | 
				
			||||||
 | 
					    # this link comes from the internal html, which is in a subdir
 | 
				
			||||||
 | 
					    if ans is not None:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            ans = rdr.GetFile(ans)
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            ans = rdr.root + "/" + ans
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                ans = rdr.GetFile(ans)
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                ans = None
 | 
				
			||||||
 | 
					        if ans is not None:
 | 
				
			||||||
 | 
					            from PIL import Image
 | 
				
			||||||
 | 
					            from cStringIO import StringIO
 | 
				
			||||||
 | 
					            buf = StringIO()
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                Image.open(StringIO(ans)).convert('RGB').save(buf, 'JPEG')
 | 
				
			||||||
 | 
					                ans = buf.getvalue()
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                ans = None
 | 
				
			||||||
 | 
					    return ans
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_metadata_from_reader(rdr):
 | 
				
			||||||
 | 
					    raw = rdr.GetFile(rdr.home)
 | 
				
			||||||
 | 
					    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
 | 
				
			||||||
 | 
					        resolve_entities=True)[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = rdr.title
 | 
				
			||||||
 | 
					    authors = _get_authors(home)
 | 
				
			||||||
 | 
					    mi = MetaInformation(title, authors)
 | 
				
			||||||
 | 
					    publisher = _get_publisher(home)
 | 
				
			||||||
 | 
					    if publisher:
 | 
				
			||||||
 | 
					        mi.publisher = publisher
 | 
				
			||||||
 | 
					    isbn = _get_isbn(home)
 | 
				
			||||||
 | 
					    if isbn:
 | 
				
			||||||
 | 
					        mi.isbn = isbn
 | 
				
			||||||
 | 
					    comments = _get_comments(home)
 | 
				
			||||||
 | 
					    if comments:
 | 
				
			||||||
 | 
					        mi.comments = comments
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cdata = _get_cover(home, rdr)
 | 
				
			||||||
 | 
					    if cdata is not None:
 | 
				
			||||||
 | 
					        mi.cover_data = ('jpg', cdata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return mi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_metadata(stream):
 | 
				
			||||||
 | 
					    with TemporaryFile('_chm_metadata.chm') as fname:
 | 
				
			||||||
 | 
					        with open(fname, 'wb') as f:
 | 
				
			||||||
 | 
					            f.write(stream.read())
 | 
				
			||||||
 | 
					        from calibre.ebooks.chm.reader import CHMReader
 | 
				
			||||||
 | 
					        rdr = CHMReader(fname, default_log)
 | 
				
			||||||
 | 
					        return get_metadata_from_reader(rdr)
 | 
				
			||||||
							
								
								
									
										212
									
								
								src/calibre/ebooks/chm/reader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								src/calibre/ebooks/chm/reader.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,212 @@
 | 
				
			|||||||
 | 
					from __future__ import with_statement
 | 
				
			||||||
 | 
					''' CHM File decoding support '''
 | 
				
			||||||
 | 
					__license__ = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
 | 
				
			||||||
 | 
					                 ' and Alex Bramley <a.bramley at gmail.com>.'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os, re
 | 
				
			||||||
 | 
					from mimetypes import guess_type as guess_mimetype
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from BeautifulSoup import BeautifulSoup, NavigableString
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.utils.chm.chm import CHMFile
 | 
				
			||||||
 | 
					from calibre.utils.chm.chmlib import (
 | 
				
			||||||
 | 
					  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
 | 
				
			||||||
 | 
					  chm_enumerate,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.utils.config import OptionParser
 | 
				
			||||||
 | 
					from calibre.ebooks.metadata.toc import TOC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def match_string(s1, s2_already_lowered):
 | 
				
			||||||
 | 
					    if s1 is not None and s2_already_lowered is not None:
 | 
				
			||||||
 | 
					        if s1.lower()==s2_already_lowered:
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					    return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_all_prev_empty(tag):
 | 
				
			||||||
 | 
					    if tag is None:
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					    if tag.__class__ == NavigableString and not check_empty(tag):
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					    return check_all_prev_empty(tag.previousSibling)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_empty(s, rex = re.compile(r'\S')):
 | 
				
			||||||
 | 
					    return rex.search(s) is None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def option_parser():
 | 
				
			||||||
 | 
					    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
 | 
				
			||||||
 | 
					    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
 | 
				
			||||||
 | 
					    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
 | 
				
			||||||
 | 
					    parser.add_option("-t", "--title", action="store", type="string", \
 | 
				
			||||||
 | 
					                    dest="title", help=_("Set the book title"))
 | 
				
			||||||
 | 
					    parser.add_option('--title-sort', action='store', type='string', default=None,
 | 
				
			||||||
 | 
					                      dest='title_sort', help=_('Set sort key for the title'))
 | 
				
			||||||
 | 
					    parser.add_option("-a", "--author", action="store", type="string", \
 | 
				
			||||||
 | 
					                    dest="author", help=_("Set the author"))
 | 
				
			||||||
 | 
					    parser.add_option('--author-sort', action='store', type='string', default=None,
 | 
				
			||||||
 | 
					                      dest='author_sort', help=_('Set sort key for the author'))
 | 
				
			||||||
 | 
					    parser.add_option("-c", "--category", action="store", type="string", \
 | 
				
			||||||
 | 
					                    dest="category", help=_("The category this book belongs"
 | 
				
			||||||
 | 
					                    " to. E.g.: History"))
 | 
				
			||||||
 | 
					    parser.add_option("--thumbnail", action="store", type="string", \
 | 
				
			||||||
 | 
					                    dest="thumbnail", help=_("Path to a graphic that will be"
 | 
				
			||||||
 | 
					                    " set as this files' thumbnail"))
 | 
				
			||||||
 | 
					    parser.add_option("--comment", action="store", type="string", \
 | 
				
			||||||
 | 
					                    dest="freetext", help=_("Path to a txt file containing a comment."))
 | 
				
			||||||
 | 
					    parser.add_option("--get-thumbnail", action="store_true", \
 | 
				
			||||||
 | 
					                    dest="get_thumbnail", default=False, \
 | 
				
			||||||
 | 
					                    help=_("Extract thumbnail from LRF file"))
 | 
				
			||||||
 | 
					    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
 | 
				
			||||||
 | 
					    parser.add_option('--classification', default=None, help=_('Set the book classification'))
 | 
				
			||||||
 | 
					    parser.add_option('--creator', default=None, help=_('Set the book creator'))
 | 
				
			||||||
 | 
					    parser.add_option('--producer', default=None, help=_('Set the book producer'))
 | 
				
			||||||
 | 
					    parser.add_option('--get-cover', action='store_true', default=False,
 | 
				
			||||||
 | 
					                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
 | 
				
			||||||
 | 
					    parser.add_option('--bookid', action='store', type='string', default=None,
 | 
				
			||||||
 | 
					                      dest='book_id', help=_('Set book ID'))
 | 
				
			||||||
 | 
					    parser.add_option('--font-delta', action='store', type='int', default=0,
 | 
				
			||||||
 | 
					                      dest='font_delta', help=_('Set font delta'))
 | 
				
			||||||
 | 
					    return parser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CHMError(Exception):
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CHMReader(CHMFile):
 | 
				
			||||||
 | 
					    def __init__(self, input, log):
 | 
				
			||||||
 | 
					        CHMFile.__init__(self)
 | 
				
			||||||
 | 
					        if not self.LoadCHM(input):
 | 
				
			||||||
 | 
					            raise CHMError("Unable to open CHM file '%s'"%(input,))
 | 
				
			||||||
 | 
					        self.log = log
 | 
				
			||||||
 | 
					        self._sourcechm = input
 | 
				
			||||||
 | 
					        self._contents = None
 | 
				
			||||||
 | 
					        self._playorder = 0
 | 
				
			||||||
 | 
					        self._metadata = False
 | 
				
			||||||
 | 
					        self._extracted = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # location of '.hhc' file, which is the CHM TOC.
 | 
				
			||||||
 | 
					        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
 | 
				
			||||||
 | 
					        self.hhc_path = self.root + ".hhc"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _parse_toc(self, ul, basedir=os.getcwdu()):
 | 
				
			||||||
 | 
					        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
 | 
				
			||||||
 | 
					        self._playorder += 1
 | 
				
			||||||
 | 
					        for li in ul('li', recursive=False):
 | 
				
			||||||
 | 
					            href = li.object('param', {'name': 'Local'})[0]['value']
 | 
				
			||||||
 | 
					            if href.count('#'):
 | 
				
			||||||
 | 
					                href, frag = href.split('#')
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                frag = None
 | 
				
			||||||
 | 
					            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
 | 
				
			||||||
 | 
					            #print "========>", name
 | 
				
			||||||
 | 
					            toc.add_item(href, frag, name, play_order=self._playorder)
 | 
				
			||||||
 | 
					            self._playorder += 1
 | 
				
			||||||
 | 
					            if li.ul:
 | 
				
			||||||
 | 
					               child = self._parse_toc(li.ul)
 | 
				
			||||||
 | 
					               child.parent = toc
 | 
				
			||||||
 | 
					               toc.append(child)
 | 
				
			||||||
 | 
					        #print toc
 | 
				
			||||||
 | 
					        return toc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def GetFile(self, path):
 | 
				
			||||||
 | 
					        # have to have abs paths for ResolveObject, but Contents() deliberately
 | 
				
			||||||
 | 
					        # makes them relative. So we don't have to worry, re-add the leading /.
 | 
				
			||||||
 | 
					        # note this path refers to the internal CHM structure
 | 
				
			||||||
 | 
					        if path[0] != '/':
 | 
				
			||||||
 | 
					            path = '/' + path
 | 
				
			||||||
 | 
					        res, ui = self.ResolveObject(path)
 | 
				
			||||||
 | 
					        if res != CHM_RESOLVE_SUCCESS:
 | 
				
			||||||
 | 
					            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
 | 
				
			||||||
 | 
					        size, data = self.RetrieveObject(ui)
 | 
				
			||||||
 | 
					        if size == 0:
 | 
				
			||||||
 | 
					            raise CHMError("'%s' is zero bytes in length!"%(path,))
 | 
				
			||||||
 | 
					        return data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def ExtractFiles(self, output_dir=os.getcwdu()):
 | 
				
			||||||
 | 
					        for path in self.Contents():
 | 
				
			||||||
 | 
					            lpath = os.path.join(output_dir, path)
 | 
				
			||||||
 | 
					            self._ensure_dir(lpath)
 | 
				
			||||||
 | 
					            data = self.GetFile(path)
 | 
				
			||||||
 | 
					            with open(lpath, 'wb') as f:
 | 
				
			||||||
 | 
					                if guess_mimetype(path)[0] == ('text/html'):
 | 
				
			||||||
 | 
					                    data = self._reformat(data)
 | 
				
			||||||
 | 
					                f.write(data)
 | 
				
			||||||
 | 
					        self._extracted = True
 | 
				
			||||||
 | 
					        files = os.listdir(output_dir)
 | 
				
			||||||
 | 
					        if self.hhc_path not in files:
 | 
				
			||||||
 | 
					            for f in files:
 | 
				
			||||||
 | 
					                if f.lower() == self.hhc_path.lower():
 | 
				
			||||||
 | 
					                    self.hhc_path = f
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _reformat(self, data):
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            soup = BeautifulSoup(data)
 | 
				
			||||||
 | 
					        except UnicodeEncodeError:
 | 
				
			||||||
 | 
					            # hit some strange encoding problems...
 | 
				
			||||||
 | 
					            print "Unable to parse html for cleaning, leaving it :("
 | 
				
			||||||
 | 
					            return data
 | 
				
			||||||
 | 
					        # nuke javascript...
 | 
				
			||||||
 | 
					        [s.extract() for s in soup('script')]
 | 
				
			||||||
 | 
					        # remove forward and back nav bars from the top/bottom of each page
 | 
				
			||||||
 | 
					        # cos they really fuck with the flow of things and generally waste space
 | 
				
			||||||
 | 
					        # since we can't use [a,b] syntax to select arbitrary items from a list
 | 
				
			||||||
 | 
					        # we'll have to do this manually...
 | 
				
			||||||
 | 
					        t = soup('table')
 | 
				
			||||||
 | 
					        if t:
 | 
				
			||||||
 | 
					            if (t[0].previousSibling is None
 | 
				
			||||||
 | 
					              or t[0].previousSibling.previousSibling is None):
 | 
				
			||||||
 | 
					                t[0].extract()
 | 
				
			||||||
 | 
					            if (t[-1].nextSibling is None
 | 
				
			||||||
 | 
					              or t[-1].nextSibling.nextSibling is None):
 | 
				
			||||||
 | 
					                t[-1].extract()
 | 
				
			||||||
 | 
					        # for some very odd reason each page's content appears to be in a table
 | 
				
			||||||
 | 
					        # too. and this table has sub-tables for random asides... grr.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # remove br at top of page if present after nav bars removed
 | 
				
			||||||
 | 
					        br = soup('br')
 | 
				
			||||||
 | 
					        if br:
 | 
				
			||||||
 | 
					            if check_all_prev_empty(br[0].previousSibling):
 | 
				
			||||||
 | 
					                br[0].extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # some images seem to be broken in some chm's :/
 | 
				
			||||||
 | 
					        for img in soup('img'):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                # some are supposedly "relative"... lies.
 | 
				
			||||||
 | 
					                while img['src'].startswith('../'): img['src'] = img['src'][3:]
 | 
				
			||||||
 | 
					                # some have ";<junk>" at the end.
 | 
				
			||||||
 | 
					                img['src'] = img['src'].split(';')[0]
 | 
				
			||||||
 | 
					            except KeyError:
 | 
				
			||||||
 | 
					                # and some don't even have a src= ?!
 | 
				
			||||||
 | 
					                pass
 | 
				
			||||||
 | 
					        # now give back some pretty html.
 | 
				
			||||||
 | 
					        return soup.prettify()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def Contents(self):
 | 
				
			||||||
 | 
					        if self._contents is not None:
 | 
				
			||||||
 | 
					            return self._contents
 | 
				
			||||||
 | 
					        paths = []
 | 
				
			||||||
 | 
					        def get_paths(chm, ui, ctx):
 | 
				
			||||||
 | 
					            # skip directories
 | 
				
			||||||
 | 
					            # note this path refers to the internal CHM structure
 | 
				
			||||||
 | 
					            if ui.path[-1] != '/':
 | 
				
			||||||
 | 
					                # and make paths relative
 | 
				
			||||||
 | 
					                paths.append(ui.path.lstrip('/'))
 | 
				
			||||||
 | 
					        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
 | 
				
			||||||
 | 
					        self._contents = paths
 | 
				
			||||||
 | 
					        return self._contents
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _ensure_dir(self, path):
 | 
				
			||||||
 | 
					        dir = os.path.dirname(path)
 | 
				
			||||||
 | 
					        if not os.path.isdir(dir):
 | 
				
			||||||
 | 
					            os.makedirs(dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def extract_content(self, output_dir=os.getcwdu()):
 | 
				
			||||||
 | 
					        self.ExtractFiles(output_dir=output_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -13,6 +13,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
 | 
				
			|||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 | 
					from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 | 
				
			||||||
from calibre.ptempfile import PersistentTemporaryDirectory
 | 
					from calibre.ptempfile import PersistentTemporaryDirectory
 | 
				
			||||||
from calibre.utils.date import parse_date
 | 
					from calibre.utils.date import parse_date
 | 
				
			||||||
 | 
					from calibre.utils.zipfile import ZipFile
 | 
				
			||||||
from calibre import extract, walk
 | 
					from calibre import extract, walk
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEBUG_README=u'''
 | 
					DEBUG_README=u'''
 | 
				
			||||||
@ -726,6 +727,13 @@ OptionRecommendation(name='timestamp',
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            os.makedirs(out_dir)
 | 
					            os.makedirs(out_dir)
 | 
				
			||||||
            self.dump_oeb(ret, out_dir)
 | 
					            self.dump_oeb(ret, out_dir)
 | 
				
			||||||
 | 
					        if self.input_fmt == 'recipe':
 | 
				
			||||||
 | 
					            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
 | 
				
			||||||
 | 
					                'periodical.downloaded_recipe'), 'w')
 | 
				
			||||||
 | 
					            zf.add_dir(out_dir)
 | 
				
			||||||
 | 
					            with self.input_plugin:
 | 
				
			||||||
 | 
					                self.input_plugin.save_download(zf)
 | 
				
			||||||
 | 
					            zf.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.log.info('Input debug saved to:', out_dir)
 | 
					        self.log.info('Input debug saved to:', out_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -773,26 +781,29 @@ OptionRecommendation(name='timestamp',
 | 
				
			|||||||
        self.ui_reporter(0.01, _('Converting input to HTML...'))
 | 
					        self.ui_reporter(0.01, _('Converting input to HTML...'))
 | 
				
			||||||
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
 | 
					        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
 | 
				
			||||||
        self.input_plugin.report_progress = ir
 | 
					        self.input_plugin.report_progress = ir
 | 
				
			||||||
        self.oeb = self.input_plugin(stream, self.opts,
 | 
					        with self.input_plugin:
 | 
				
			||||||
                                    self.input_fmt, self.log,
 | 
					            self.oeb = self.input_plugin(stream, self.opts,
 | 
				
			||||||
                                    accelerators, tdir)
 | 
					                                        self.input_fmt, self.log,
 | 
				
			||||||
        if self.opts.debug_pipeline is not None:
 | 
					                                        accelerators, tdir)
 | 
				
			||||||
            self.dump_input(self.oeb, tdir)
 | 
					            if self.opts.debug_pipeline is not None:
 | 
				
			||||||
            if self.abort_after_input_dump:
 | 
					                self.dump_input(self.oeb, tdir)
 | 
				
			||||||
                return
 | 
					                if self.abort_after_input_dump:
 | 
				
			||||||
        if self.input_fmt == 'recipe':
 | 
					                    return
 | 
				
			||||||
            self.opts_to_mi(self.user_metadata)
 | 
					            if self.input_fmt in ('recipe', 'downloaded_recipe'):
 | 
				
			||||||
        if not hasattr(self.oeb, 'manifest'):
 | 
					                self.opts_to_mi(self.user_metadata)
 | 
				
			||||||
            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
 | 
					            if not hasattr(self.oeb, 'manifest'):
 | 
				
			||||||
                    self.input_plugin)
 | 
					                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
 | 
				
			||||||
        self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
 | 
					                        self.input_plugin)
 | 
				
			||||||
        self.opts.is_image_collection = self.input_plugin.is_image_collection
 | 
					            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
 | 
				
			||||||
        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
 | 
					            self.opts.is_image_collection = self.input_plugin.is_image_collection
 | 
				
			||||||
        self.flush()
 | 
					            pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
 | 
				
			||||||
        if self.opts.debug_pipeline is not None:
 | 
					            self.flush()
 | 
				
			||||||
            out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
 | 
					            if self.opts.debug_pipeline is not None:
 | 
				
			||||||
            self.dump_oeb(self.oeb, out_dir)
 | 
					                out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
 | 
				
			||||||
            self.log('Parsed HTML written to:', out_dir)
 | 
					                self.dump_oeb(self.oeb, out_dir)
 | 
				
			||||||
 | 
					                self.log('Parsed HTML written to:', out_dir)
 | 
				
			||||||
 | 
					            self.input_plugin.specialize(self.oeb, self.opts, self.log,
 | 
				
			||||||
 | 
					                    self.output_fmt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        pr(0., _('Running transforms on ebook...'))
 | 
					        pr(0., _('Running transforms on ebook...'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -882,7 +893,8 @@ OptionRecommendation(name='timestamp',
 | 
				
			|||||||
        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
 | 
					        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
 | 
				
			||||||
        self.output_plugin.report_progress = our
 | 
					        self.output_plugin.report_progress = our
 | 
				
			||||||
        our(0., _('Creating')+' %s'%self.output_plugin.name)
 | 
					        our(0., _('Creating')+' %s'%self.output_plugin.name)
 | 
				
			||||||
        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
 | 
					        with self.output_plugin:
 | 
				
			||||||
 | 
					            self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
 | 
				
			||||||
                self.opts, self.log)
 | 
					                self.opts, self.log)
 | 
				
			||||||
        self.ui_reporter(1.)
 | 
					        self.ui_reporter(1.)
 | 
				
			||||||
        run_plugins_on_postprocess(self.output, self.output_fmt)
 | 
					        run_plugins_on_postprocess(self.output, self.output_fmt)
 | 
				
			||||||
 | 
				
			|||||||
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 | 
				
			|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
					__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
				
			||||||
__docformat__ = 'restructuredtext en'
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os, re, uuid
 | 
					import os, uuid
 | 
				
			||||||
from itertools import cycle
 | 
					from itertools import cycle
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from lxml import etree
 | 
					from lxml import etree
 | 
				
			||||||
@ -19,8 +19,7 @@ class EPUBInput(InputFormatPlugin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
 | 
					    recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    def decrypt_font(self, key, path):
 | 
				
			||||||
    def decrypt_font(cls, key, path):
 | 
					 | 
				
			||||||
        raw = open(path, 'rb').read()
 | 
					        raw = open(path, 'rb').read()
 | 
				
			||||||
        crypt = raw[:1024]
 | 
					        crypt = raw[:1024]
 | 
				
			||||||
        key = cycle(iter(key))
 | 
					        key = cycle(iter(key))
 | 
				
			||||||
@ -29,13 +28,18 @@ class EPUBInput(InputFormatPlugin):
 | 
				
			|||||||
            f.write(decrypt)
 | 
					            f.write(decrypt)
 | 
				
			||||||
            f.write(raw[1024:])
 | 
					            f.write(raw[1024:])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    def process_encryption(self, encfile, opf, log):
 | 
				
			||||||
    def process_encryption(cls, encfile, opf, log):
 | 
					 | 
				
			||||||
        key = None
 | 
					        key = None
 | 
				
			||||||
        m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
 | 
					        for item in opf.identifier_iter():
 | 
				
			||||||
        if m:
 | 
					            scheme = None
 | 
				
			||||||
            key = m.group(1)
 | 
					            for key in item.attrib.keys():
 | 
				
			||||||
            key = list(map(ord, uuid.UUID(key).bytes))
 | 
					                if key.endswith('scheme'):
 | 
				
			||||||
 | 
					                    scheme = item.get(key)
 | 
				
			||||||
 | 
					            if (scheme and scheme.lower() == 'uuid') or \
 | 
				
			||||||
 | 
					                    (item.text and item.text.startswith('urn:uuid:')):
 | 
				
			||||||
 | 
					                key = str(item.text).rpartition(':')[-1]
 | 
				
			||||||
 | 
					                key = list(map(ord, uuid.UUID(key).bytes))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            root = etree.parse(encfile)
 | 
					            root = etree.parse(encfile)
 | 
				
			||||||
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
 | 
					            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
 | 
				
			||||||
@ -46,7 +50,8 @@ class EPUBInput(InputFormatPlugin):
 | 
				
			|||||||
                uri = cr.get('URI')
 | 
					                uri = cr.get('URI')
 | 
				
			||||||
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
 | 
					                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
 | 
				
			||||||
                if os.path.exists(path):
 | 
					                if os.path.exists(path):
 | 
				
			||||||
                    cls.decrypt_font(key, path)
 | 
					                    self._encrypted_font_uris.append(uri)
 | 
				
			||||||
 | 
					                    self.decrypt_font(key, path)
 | 
				
			||||||
            return True
 | 
					            return True
 | 
				
			||||||
        except:
 | 
					        except:
 | 
				
			||||||
            import traceback
 | 
					            import traceback
 | 
				
			||||||
@ -115,14 +120,17 @@ class EPUBInput(InputFormatPlugin):
 | 
				
			|||||||
        if opf is None:
 | 
					        if opf is None:
 | 
				
			||||||
            raise ValueError('%s is not a valid EPUB file'%path)
 | 
					            raise ValueError('%s is not a valid EPUB file'%path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if os.path.exists(encfile):
 | 
					 | 
				
			||||||
            if not self.process_encryption(encfile, opf, log):
 | 
					 | 
				
			||||||
                raise DRMError(os.path.basename(path))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        opf = os.path.relpath(opf, os.getcwdu())
 | 
					        opf = os.path.relpath(opf, os.getcwdu())
 | 
				
			||||||
        parts = os.path.split(opf)
 | 
					        parts = os.path.split(opf)
 | 
				
			||||||
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
 | 
					        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self._encrypted_font_uris = []
 | 
				
			||||||
 | 
					        if os.path.exists(encfile):
 | 
				
			||||||
 | 
					            if not self.process_encryption(encfile, opf, log):
 | 
				
			||||||
 | 
					                raise DRMError(os.path.basename(path))
 | 
				
			||||||
 | 
					        self.encrypted_fonts = self._encrypted_font_uris
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if len(parts) > 1 and parts[0]:
 | 
					        if len(parts) > 1 and parts[0]:
 | 
				
			||||||
            delta = '/'.join(parts[:-1])+'/'
 | 
					            delta = '/'.join(parts[:-1])+'/'
 | 
				
			||||||
            for elem in opf.itermanifest():
 | 
					            for elem in opf.itermanifest():
 | 
				
			||||||
 | 
				
			|||||||
@ -12,8 +12,9 @@ from urllib import unquote
 | 
				
			|||||||
from calibre.customize.conversion import OutputFormatPlugin
 | 
					from calibre.customize.conversion import OutputFormatPlugin
 | 
				
			||||||
from calibre.ptempfile import TemporaryDirectory
 | 
					from calibre.ptempfile import TemporaryDirectory
 | 
				
			||||||
from calibre.constants import __appname__, __version__
 | 
					from calibre.constants import __appname__, __version__
 | 
				
			||||||
from calibre import strftime, guess_type, prepare_string_for_xml
 | 
					from calibre import strftime, guess_type, prepare_string_for_xml, CurrentDir
 | 
				
			||||||
from calibre.customize.conversion import OptionRecommendation
 | 
					from calibre.customize.conversion import OptionRecommendation
 | 
				
			||||||
 | 
					from calibre.constants import filesystem_encoding
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from lxml import etree
 | 
					from lxml import etree
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -157,11 +158,9 @@ class EPUBOutput(OutputFormatPlugin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.workaround_ade_quirks()
 | 
					        self.workaround_ade_quirks()
 | 
				
			||||||
        self.workaround_webkit_quirks()
 | 
					        self.workaround_webkit_quirks()
 | 
				
			||||||
        self.workaround_sony_quirks()
 | 
					 | 
				
			||||||
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
 | 
					        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
 | 
				
			||||||
        RescaleImages()(oeb, opts)
 | 
					        RescaleImages()(oeb, opts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
        from calibre.ebooks.oeb.transforms.split import Split
 | 
					        from calibre.ebooks.oeb.transforms.split import Split
 | 
				
			||||||
        split = Split(not self.opts.dont_split_on_page_breaks,
 | 
					        split = Split(not self.opts.dont_split_on_page_breaks,
 | 
				
			||||||
                max_flow_size=self.opts.flow_size*1024
 | 
					                max_flow_size=self.opts.flow_size*1024
 | 
				
			||||||
@ -170,6 +169,21 @@ class EPUBOutput(OutputFormatPlugin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.insert_cover()
 | 
					        self.insert_cover()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.workaround_sony_quirks()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        from calibre.ebooks.oeb.base import OPF
 | 
				
			||||||
 | 
					        identifiers = oeb.metadata['identifier']
 | 
				
			||||||
 | 
					        uuid = None
 | 
				
			||||||
 | 
					        for x in identifiers:
 | 
				
			||||||
 | 
					            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
 | 
				
			||||||
 | 
					                uuid = unicode(x).split(':')[-1]
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					        if uuid is None:
 | 
				
			||||||
 | 
					            self.log.warn('No UUID identifier found')
 | 
				
			||||||
 | 
					            from uuid import uuid4
 | 
				
			||||||
 | 
					            uuid = str(uuid4())
 | 
				
			||||||
 | 
					            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with TemporaryDirectory('_epub_output') as tdir:
 | 
					        with TemporaryDirectory('_epub_output') as tdir:
 | 
				
			||||||
            from calibre.customize.ui import plugin_for_output_format
 | 
					            from calibre.customize.ui import plugin_for_output_format
 | 
				
			||||||
            oeb_output = plugin_for_output_format('oeb')
 | 
					            oeb_output = plugin_for_output_format('oeb')
 | 
				
			||||||
@ -177,10 +191,16 @@ class EPUBOutput(OutputFormatPlugin):
 | 
				
			|||||||
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
 | 
					            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
 | 
				
			||||||
            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
 | 
					            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
 | 
				
			||||||
                    if x.endswith('.ncx')][0])
 | 
					                    if x.endswith('.ncx')][0])
 | 
				
			||||||
 | 
					            encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
 | 
				
			||||||
 | 
					            encryption = None
 | 
				
			||||||
 | 
					            if encrypted_fonts:
 | 
				
			||||||
 | 
					                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            from calibre.ebooks.epub import initialize_container
 | 
					            from calibre.ebooks.epub import initialize_container
 | 
				
			||||||
            epub = initialize_container(output_path, os.path.basename(opf))
 | 
					            epub = initialize_container(output_path, os.path.basename(opf))
 | 
				
			||||||
            epub.add_dir(tdir)
 | 
					            epub.add_dir(tdir)
 | 
				
			||||||
 | 
					            if encryption is not None:
 | 
				
			||||||
 | 
					                epub.writestr('META-INF/encryption.xml', encryption)
 | 
				
			||||||
            if opts.extract_to is not None:
 | 
					            if opts.extract_to is not None:
 | 
				
			||||||
                if os.path.exists(opts.extract_to):
 | 
					                if os.path.exists(opts.extract_to):
 | 
				
			||||||
                    shutil.rmtree(opts.extract_to)
 | 
					                    shutil.rmtree(opts.extract_to)
 | 
				
			||||||
@ -189,6 +209,52 @@ class EPUBOutput(OutputFormatPlugin):
 | 
				
			|||||||
                self.log.info('EPUB extracted to', opts.extract_to)
 | 
					                self.log.info('EPUB extracted to', opts.extract_to)
 | 
				
			||||||
            epub.close()
 | 
					            epub.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def encrypt_fonts(self, uris, tdir, uuid):
 | 
				
			||||||
 | 
					        from binascii import unhexlify
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        key = re.sub(r'[^a-fA-F0-9]', '', uuid)
 | 
				
			||||||
 | 
					        if len(key) < 16:
 | 
				
			||||||
 | 
					            raise ValueError('UUID identifier %r is invalid'%uuid)
 | 
				
			||||||
 | 
					        key = unhexlify((key + key)[:32])
 | 
				
			||||||
 | 
					        key = tuple(map(ord, key))
 | 
				
			||||||
 | 
					        paths = []
 | 
				
			||||||
 | 
					        with CurrentDir(tdir):
 | 
				
			||||||
 | 
					            paths = [os.path.join(*x.split('/')) for x in uris]
 | 
				
			||||||
 | 
					            uris = dict(zip(uris, paths))
 | 
				
			||||||
 | 
					            fonts = []
 | 
				
			||||||
 | 
					            for uri in list(uris.keys()):
 | 
				
			||||||
 | 
					                path = uris[uri]
 | 
				
			||||||
 | 
					                if isinstance(path, unicode):
 | 
				
			||||||
 | 
					                    path = path.encode(filesystem_encoding)
 | 
				
			||||||
 | 
					                if not os.path.exists(path):
 | 
				
			||||||
 | 
					                    uris.pop(uri)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                self.log.debug('Encrypting font:', uri)
 | 
				
			||||||
 | 
					                with open(path, 'r+b') as f:
 | 
				
			||||||
 | 
					                    data = f.read(1024)
 | 
				
			||||||
 | 
					                    f.seek(0)
 | 
				
			||||||
 | 
					                    for i in range(1024):
 | 
				
			||||||
 | 
					                        f.write(chr(ord(data[i]) ^ key[i%16]))
 | 
				
			||||||
 | 
					                if not isinstance(uri, unicode):
 | 
				
			||||||
 | 
					                    uri = uri.decode('utf-8')
 | 
				
			||||||
 | 
					                fonts.append(u'''
 | 
				
			||||||
 | 
					                <enc:EncryptedData>
 | 
				
			||||||
 | 
					                    <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>
 | 
				
			||||||
 | 
					                    <enc:CipherData>
 | 
				
			||||||
 | 
					                    <enc:CipherReference URI="%s"/>
 | 
				
			||||||
 | 
					                    </enc:CipherData>
 | 
				
			||||||
 | 
					                </enc:EncryptedData>
 | 
				
			||||||
 | 
					                '''%(uri.replace('"', '\\"')))
 | 
				
			||||||
 | 
					            if fonts:
 | 
				
			||||||
 | 
					                    ans = '''<encryption
 | 
				
			||||||
 | 
					                    xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
 | 
				
			||||||
 | 
					                    xmlns:enc="http://www.w3.org/2001/04/xmlenc#"
 | 
				
			||||||
 | 
					                    xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">
 | 
				
			||||||
 | 
					                    '''
 | 
				
			||||||
 | 
					                    ans += (u'\n'.join(fonts)).encode('utf-8')
 | 
				
			||||||
 | 
					                    ans += '\n</encryption>'
 | 
				
			||||||
 | 
					                    return ans
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def default_cover(self):
 | 
					    def default_cover(self):
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
        Create a generic cover for books that dont have a cover
 | 
					        Create a generic cover for books that dont have a cover
 | 
				
			||||||
 | 
				
			|||||||
@ -20,7 +20,7 @@ from itertools import izip
 | 
				
			|||||||
from calibre.customize.conversion import InputFormatPlugin
 | 
					from calibre.customize.conversion import InputFormatPlugin
 | 
				
			||||||
from calibre.ebooks.chardet import xml_to_unicode
 | 
					from calibre.ebooks.chardet import xml_to_unicode
 | 
				
			||||||
from calibre.customize.conversion import OptionRecommendation
 | 
					from calibre.customize.conversion import OptionRecommendation
 | 
				
			||||||
from calibre.constants import islinux
 | 
					from calibre.constants import islinux, isfreebsd
 | 
				
			||||||
from calibre import unicode_path
 | 
					from calibre import unicode_path
 | 
				
			||||||
from calibre.utils.localization import get_lang
 | 
					from calibre.utils.localization import get_lang
 | 
				
			||||||
from calibre.utils.filenames import ascii_filename
 | 
					from calibre.utils.filenames import ascii_filename
 | 
				
			||||||
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
 | 
				
			|||||||
        self.added_resources = {}
 | 
					        self.added_resources = {}
 | 
				
			||||||
        self.log = log
 | 
					        self.log = log
 | 
				
			||||||
        for path, href in htmlfile_map.items():
 | 
					        for path, href in htmlfile_map.items():
 | 
				
			||||||
            if not islinux:
 | 
					            if not (islinux or isfreebsd):
 | 
				
			||||||
                path = path.lower()
 | 
					                path = path.lower()
 | 
				
			||||||
            self.added_resources[path] = href
 | 
					            self.added_resources[path] = href
 | 
				
			||||||
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
 | 
					        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
 | 
				
			||||||
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
 | 
				
			|||||||
        if os.path.isdir(link):
 | 
					        if os.path.isdir(link):
 | 
				
			||||||
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
 | 
					            self.log.warn(link_, 'is a link to a directory. Ignoring.')
 | 
				
			||||||
            return link_
 | 
					            return link_
 | 
				
			||||||
        if not islinux:
 | 
					        if not (islinux or isfreebsd):
 | 
				
			||||||
            link = link.lower()
 | 
					            link = link.lower()
 | 
				
			||||||
        if link not in self.added_resources:
 | 
					        if link not in self.added_resources:
 | 
				
			||||||
            bhref = os.path.basename(link)
 | 
					            bhref = os.path.basename(link)
 | 
				
			||||||
 | 
				
			|||||||
@ -215,6 +215,28 @@ def merge_results(one, two):
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            one[idx].smart_update(x)
 | 
					            one[idx].smart_update(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MetadataSources(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, sources):
 | 
				
			||||||
 | 
					        self.sources = sources
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __enter__(self):
 | 
				
			||||||
 | 
					        for s in self.sources:
 | 
				
			||||||
 | 
					            s.__enter__()
 | 
				
			||||||
 | 
					        return self
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __exit__(self, *args):
 | 
				
			||||||
 | 
					        for s in self.sources:
 | 
				
			||||||
 | 
					            s.__exit__()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __call__(self, *args, **kwargs):
 | 
				
			||||||
 | 
					        for s in self.sources:
 | 
				
			||||||
 | 
					            s(*args, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def join(self):
 | 
				
			||||||
 | 
					        for s in self.sources:
 | 
				
			||||||
 | 
					            s.join()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 | 
					def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 | 
				
			||||||
           verbose=0):
 | 
					           verbose=0):
 | 
				
			||||||
    assert not(title is None and author is None and publisher is None and \
 | 
					    assert not(title is None and author is None and publisher is None and \
 | 
				
			||||||
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 | 
				
			|||||||
    if isbn is not None:
 | 
					    if isbn is not None:
 | 
				
			||||||
        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
 | 
					        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
 | 
				
			||||||
    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
 | 
					    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
 | 
				
			||||||
 | 
					    with MetadataSources(fetchers) as manager:
 | 
				
			||||||
 | 
					        manager(title, author, publisher, isbn, verbose)
 | 
				
			||||||
 | 
					        manager.join()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for fetcher in fetchers:
 | 
					 | 
				
			||||||
        fetcher(title, author, publisher, isbn, verbose)
 | 
					 | 
				
			||||||
    for fetcher in fetchers:
 | 
					 | 
				
			||||||
        fetcher.join()
 | 
					 | 
				
			||||||
    results = list(fetchers[0].results)
 | 
					    results = list(fetchers[0].results)
 | 
				
			||||||
    for fetcher in fetchers[1:]:
 | 
					    for fetcher in fetchers[1:]:
 | 
				
			||||||
        merge_results(results, fetcher.results)
 | 
					        merge_results(results, fetcher.results)
 | 
				
			||||||
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 | 
				
			|||||||
def get_social_metadata(mi, verbose=0):
 | 
					def get_social_metadata(mi, verbose=0):
 | 
				
			||||||
    from calibre.customize.ui import metadata_sources
 | 
					    from calibre.customize.ui import metadata_sources
 | 
				
			||||||
    fetchers = list(metadata_sources(metadata_type='social'))
 | 
					    fetchers = list(metadata_sources(metadata_type='social'))
 | 
				
			||||||
    for fetcher in fetchers:
 | 
					    with MetadataSources(fetchers) as manager:
 | 
				
			||||||
        fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
 | 
					        manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
 | 
				
			||||||
    for fetcher in fetchers:
 | 
					        manager.join()
 | 
				
			||||||
        fetcher.join()
 | 
					 | 
				
			||||||
    ratings, tags, comments = [], set([]), set([])
 | 
					    ratings, tags, comments = [], set([]), set([])
 | 
				
			||||||
    for fetcher in fetchers:
 | 
					    for fetcher in fetchers:
 | 
				
			||||||
        if fetcher.results:
 | 
					        if fetcher.results:
 | 
				
			||||||
 | 
				
			|||||||
@ -70,6 +70,17 @@ def is_recipe(filename):
 | 
				
			|||||||
        filename.rpartition('.')[0].endswith('_recipe_out')
 | 
					        filename.rpartition('.')[0].endswith('_recipe_out')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
 | 
					def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
 | 
				
			||||||
 | 
					    pos = 0
 | 
				
			||||||
 | 
					    if hasattr(stream, 'tell'):
 | 
				
			||||||
 | 
					        pos = stream.tell()
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        return _get_metadata(stream, stream_type, use_libprs_metadata)
 | 
				
			||||||
 | 
					    finally:
 | 
				
			||||||
 | 
					        if hasattr(stream, 'seek'):
 | 
				
			||||||
 | 
					            stream.seek(pos)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_metadata(stream, stream_type, use_libprs_metadata):
 | 
				
			||||||
    if stream_type: stream_type = stream_type.lower()
 | 
					    if stream_type: stream_type = stream_type.lower()
 | 
				
			||||||
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
 | 
					    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
 | 
				
			||||||
        stream_type = 'html'
 | 
					        stream_type = 'html'
 | 
				
			||||||
 | 
				
			|||||||
@ -97,9 +97,14 @@ class MetadataUpdater(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.nrecs, = unpack('>H', data[76:78])
 | 
					        self.nrecs, = unpack('>H', data[76:78])
 | 
				
			||||||
        record0 = self.record0 = self.record(0)
 | 
					        record0 = self.record0 = self.record(0)
 | 
				
			||||||
 | 
					        mobi_header_length, = unpack('>I', record0[0x14:0x18])
 | 
				
			||||||
 | 
					        if not mobi_header_length:
 | 
				
			||||||
 | 
					            raise MobiError("Non-standard file format.  Try 'Convert E-Books' with MOBI as Input and Output formats.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.encryption_type, = unpack('>H', record0[12:14])
 | 
					        self.encryption_type, = unpack('>H', record0[12:14])
 | 
				
			||||||
        codepage, = unpack('>I', record0[28:32])
 | 
					        codepage, = unpack('>I', record0[28:32])
 | 
				
			||||||
        self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
 | 
					        self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        image_base, = unpack('>I', record0[108:112])
 | 
					        image_base, = unpack('>I', record0[108:112])
 | 
				
			||||||
        flags, = self.flags, = unpack('>I', record0[128:132])
 | 
					        flags, = self.flags, = unpack('>I', record0[128:132])
 | 
				
			||||||
        have_exth = self.have_exth = (flags & 0x40) != 0
 | 
					        have_exth = self.have_exth = (flags & 0x40) != 0
 | 
				
			||||||
@ -306,9 +311,10 @@ class MetadataUpdater(object):
 | 
				
			|||||||
        return StreamSlicer(self.stream, start, stop)
 | 
					        return StreamSlicer(self.stream, start, stop)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def update(self, mi):
 | 
					    def update(self, mi):
 | 
				
			||||||
        def pop_exth_record(exth_id):
 | 
					        def update_exth_record(rec):
 | 
				
			||||||
            if exth_id in self.original_exth_records:
 | 
					            recs.append(rec)
 | 
				
			||||||
                self.original_exth_records.pop(exth_id)
 | 
					            if rec[0] in self.original_exth_records:
 | 
				
			||||||
 | 
					                self.original_exth_records.pop(rec[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.type != "BOOKMOBI":
 | 
					        if self.type != "BOOKMOBI":
 | 
				
			||||||
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
 | 
					                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
 | 
				
			||||||
@ -323,47 +329,36 @@ class MetadataUpdater(object):
 | 
				
			|||||||
            pas = False
 | 
					            pas = False
 | 
				
			||||||
        if mi.author_sort and pas:
 | 
					        if mi.author_sort and pas:
 | 
				
			||||||
            authors = mi.author_sort
 | 
					            authors = mi.author_sort
 | 
				
			||||||
            recs.append((100, authors.encode(self.codec, 'replace')))
 | 
					            update_exth_record((100, authors.encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(100)
 | 
					 | 
				
			||||||
        elif mi.authors:
 | 
					        elif mi.authors:
 | 
				
			||||||
            authors = '; '.join(mi.authors)
 | 
					            authors = '; '.join(mi.authors)
 | 
				
			||||||
            recs.append((100, authors.encode(self.codec, 'replace')))
 | 
					            update_exth_record((100, authors.encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(100)
 | 
					 | 
				
			||||||
        if mi.publisher:
 | 
					        if mi.publisher:
 | 
				
			||||||
            recs.append((101, mi.publisher.encode(self.codec, 'replace')))
 | 
					            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(101)
 | 
					 | 
				
			||||||
        if mi.comments:
 | 
					        if mi.comments:
 | 
				
			||||||
            recs.append((103, mi.comments.encode(self.codec, 'replace')))
 | 
					            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(103)
 | 
					 | 
				
			||||||
        if mi.isbn:
 | 
					        if mi.isbn:
 | 
				
			||||||
            recs.append((104, mi.isbn.encode(self.codec, 'replace')))
 | 
					            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(104)
 | 
					 | 
				
			||||||
        if mi.tags:
 | 
					        if mi.tags:
 | 
				
			||||||
            subjects = '; '.join(mi.tags)
 | 
					            subjects = '; '.join(mi.tags)
 | 
				
			||||||
            recs.append((105, subjects.encode(self.codec, 'replace')))
 | 
					            update_exth_record((105, subjects.encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(105)
 | 
					 | 
				
			||||||
        if mi.pubdate:
 | 
					        if mi.pubdate:
 | 
				
			||||||
            recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
 | 
					            update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(106)
 | 
					 | 
				
			||||||
        elif mi.timestamp:
 | 
					        elif mi.timestamp:
 | 
				
			||||||
            recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
 | 
					            update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(106)
 | 
					 | 
				
			||||||
        elif self.timestamp:
 | 
					        elif self.timestamp:
 | 
				
			||||||
            recs.append((106, self.timestamp))
 | 
					            update_exth_record((106, self.timestamp))
 | 
				
			||||||
            pop_exth_record(106)
 | 
					 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
 | 
					            update_exth_record((106, nowf().isoformat().encode(self.codec, 'replace')))
 | 
				
			||||||
            pop_exth_record(106)
 | 
					 | 
				
			||||||
        if self.cover_record is not None:
 | 
					        if self.cover_record is not None:
 | 
				
			||||||
            recs.append((201, pack('>I', self.cover_rindex)))
 | 
					            update_exth_record((201, pack('>I', self.cover_rindex)))
 | 
				
			||||||
            recs.append((203, pack('>I', 0)))
 | 
					            update_exth_record((203, pack('>I', 0)))
 | 
				
			||||||
            pop_exth_record(201)
 | 
					 | 
				
			||||||
            pop_exth_record(203)
 | 
					 | 
				
			||||||
        if self.thumbnail_record is not None:
 | 
					        if self.thumbnail_record is not None:
 | 
				
			||||||
            recs.append((202, pack('>I', self.thumbnail_rindex)))
 | 
					            update_exth_record((202, pack('>I', self.thumbnail_rindex)))
 | 
				
			||||||
            pop_exth_record(202)
 | 
					        if 503 in self.original_exth_records:
 | 
				
			||||||
 | 
					            update_exth_record((503, mi.title.encode(self.codec, 'replace')))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Restore any original EXTH fields that weren't updated
 | 
					        # Include remaining original EXTH fields
 | 
				
			||||||
        for id in sorted(self.original_exth_records):
 | 
					        for id in sorted(self.original_exth_records):
 | 
				
			||||||
            recs.append((id, self.original_exth_records[id]))
 | 
					            recs.append((id, self.original_exth_records[id]))
 | 
				
			||||||
        recs = sorted(recs, key=lambda x:(x[0],x[0]))
 | 
					        recs = sorted(recs, key=lambda x:(x[0],x[0]))
 | 
				
			||||||
 | 
				
			|||||||
@ -779,6 +779,9 @@ class OPF(object):
 | 
				
			|||||||
            self.set_text(matches[0], unicode(val))
 | 
					            self.set_text(matches[0], unicode(val))
 | 
				
			||||||
        return property(fget=fget, fset=fset)
 | 
					        return property(fget=fget, fset=fset)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def identifier_iter(self):
 | 
				
			||||||
 | 
					        for item in self.identifier_path(self.metadata):
 | 
				
			||||||
 | 
					            yield item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def guess_cover(self):
 | 
					    def guess_cover(self):
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
 | 
				
			|||||||
@ -8,9 +8,10 @@ Read metadata from RAR archives
 | 
				
			|||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from cStringIO import StringIO
 | 
					
 | 
				
			||||||
from calibre.ptempfile import PersistentTemporaryFile
 | 
					from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
 | 
				
			||||||
from calibre.libunrar import extract_member, names
 | 
					from calibre.libunrar import extract_member, names
 | 
				
			||||||
 | 
					from calibre import CurrentDir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_metadata(stream):
 | 
					def get_metadata(stream):
 | 
				
			||||||
    from calibre.ebooks.metadata.archive import is_comic
 | 
					    from calibre.ebooks.metadata.archive import is_comic
 | 
				
			||||||
@ -32,8 +33,10 @@ def get_metadata(stream):
 | 
				
			|||||||
            stream_type = stream_type[1:]
 | 
					            stream_type = stream_type[1:]
 | 
				
			||||||
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
 | 
					            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
 | 
				
			||||||
                               'rb', 'imp', 'pdf', 'lrf'):
 | 
					                               'rb', 'imp', 'pdf', 'lrf'):
 | 
				
			||||||
                data = extract_member(path, match=None, name=f)[1]
 | 
					                with TemporaryDirectory() as tdir:
 | 
				
			||||||
                stream = StringIO(data)
 | 
					                    with CurrentDir(tdir):
 | 
				
			||||||
 | 
					                       stream = extract_member(path, match=None, name=f,
 | 
				
			||||||
 | 
					                               as_file=True)[1]
 | 
				
			||||||
                return get_metadata(stream, stream_type)
 | 
					                return get_metadata(stream, stream_type)
 | 
				
			||||||
    raise ValueError('No ebook found in RAR archive')
 | 
					    raise ValueError('No ebook found in RAR archive')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -149,7 +149,8 @@ class TOC(list):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def read_ncx_toc(self, toc):
 | 
					    def read_ncx_toc(self, toc):
 | 
				
			||||||
        self.base_path = os.path.dirname(toc)
 | 
					        self.base_path = os.path.dirname(toc)
 | 
				
			||||||
        soup = NCXSoup(xml_to_unicode(open(toc, 'rb').read())[0])
 | 
					        raw  = xml_to_unicode(open(toc, 'rb').read(), assume_utf8=True)[0]
 | 
				
			||||||
 | 
					        soup = NCXSoup(raw)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def process_navpoint(np, dest):
 | 
					        def process_navpoint(np, dest):
 | 
				
			||||||
            play_order = np.get('playOrder', None)
 | 
					            play_order = np.get('playOrder', None)
 | 
				
			||||||
@ -160,7 +161,7 @@ class TOC(list):
 | 
				
			|||||||
            if nl is not None:
 | 
					            if nl is not None:
 | 
				
			||||||
                text = u''
 | 
					                text = u''
 | 
				
			||||||
                for txt in nl.findAll(re.compile('text')):
 | 
					                for txt in nl.findAll(re.compile('text')):
 | 
				
			||||||
                    text += ''.join([unicode(s) for s in txt.findAll(text=True)])
 | 
					                    text += u''.join([unicode(s) for s in txt.findAll(text=True)])
 | 
				
			||||||
                content = np.find(re.compile('content'))
 | 
					                content = np.find(re.compile('content'))
 | 
				
			||||||
                if content is None or not content.has_key('src') or not txt:
 | 
					                if content is None or not content.has_key('src') or not txt:
 | 
				
			||||||
                    return
 | 
					                    return
 | 
				
			||||||
 | 
				
			|||||||
@ -43,6 +43,8 @@ def read_metadata_(task, tdir, notification=lambda x,y:x):
 | 
				
			|||||||
            import_map = {}
 | 
					            import_map = {}
 | 
				
			||||||
            for format in formats:
 | 
					            for format in formats:
 | 
				
			||||||
                nfp = run_plugins_on_import(format)
 | 
					                nfp = run_plugins_on_import(format)
 | 
				
			||||||
 | 
					                if nfp is None:
 | 
				
			||||||
 | 
					                    nfp = format
 | 
				
			||||||
                nfp = os.path.abspath(nfp)
 | 
					                nfp = os.path.abspath(nfp)
 | 
				
			||||||
                if isinstance(nfp, unicode):
 | 
					                if isinstance(nfp, unicode):
 | 
				
			||||||
                    nfp.encode(filesystem_encoding)
 | 
					                    nfp.encode(filesystem_encoding)
 | 
				
			||||||
 | 
				
			|||||||
@ -3,9 +3,10 @@ __license__   = 'GPL v3'
 | 
				
			|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
					__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from zipfile import ZipFile
 | 
					 | 
				
			||||||
from cStringIO import StringIO
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.utils.zipfile import ZipFile
 | 
				
			||||||
 | 
					from calibre.ptempfile import TemporaryDirectory
 | 
				
			||||||
 | 
					from calibre import CurrentDir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_metadata(stream):
 | 
					def get_metadata(stream):
 | 
				
			||||||
    from calibre.ebooks.metadata.meta import get_metadata
 | 
					    from calibre.ebooks.metadata.meta import get_metadata
 | 
				
			||||||
@ -23,8 +24,10 @@ def get_metadata(stream):
 | 
				
			|||||||
            stream_type = stream_type[1:]
 | 
					            stream_type = stream_type[1:]
 | 
				
			||||||
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
 | 
					            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
 | 
				
			||||||
                               'rb', 'imp', 'pdf', 'lrf'):
 | 
					                               'rb', 'imp', 'pdf', 'lrf'):
 | 
				
			||||||
                stream = StringIO(zf.read(f))
 | 
					                with TemporaryDirectory() as tdir:
 | 
				
			||||||
                return get_metadata(stream, stream_type)
 | 
					                    with CurrentDir(tdir):
 | 
				
			||||||
 | 
					                        path = zf.extract(f)
 | 
				
			||||||
 | 
					                        return get_metadata(open(path, 'rb'), stream_type)
 | 
				
			||||||
    raise ValueError('No ebook found in ZIP archive')
 | 
					    raise ValueError('No ebook found in ZIP archive')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -154,7 +154,7 @@ class MOBIOutput(OutputFormatPlugin):
 | 
				
			|||||||
                MobiWriter, PALMDOC, UNCOMPRESSED
 | 
					                MobiWriter, PALMDOC, UNCOMPRESSED
 | 
				
			||||||
        from calibre.ebooks.mobi.mobiml import MobiMLizer
 | 
					        from calibre.ebooks.mobi.mobiml import MobiMLizer
 | 
				
			||||||
        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 | 
					        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 | 
				
			||||||
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
 | 
					        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
 | 
				
			||||||
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
 | 
					        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
 | 
				
			||||||
        from calibre.customize.ui import plugin_for_input_format
 | 
					        from calibre.customize.ui import plugin_for_input_format
 | 
				
			||||||
        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
 | 
					        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
 | 
				
			||||||
@ -163,8 +163,11 @@ class MOBIOutput(OutputFormatPlugin):
 | 
				
			|||||||
            tocadder(oeb, opts)
 | 
					            tocadder(oeb, opts)
 | 
				
			||||||
        mangler = CaseMangler()
 | 
					        mangler = CaseMangler()
 | 
				
			||||||
        mangler(oeb, opts)
 | 
					        mangler(oeb, opts)
 | 
				
			||||||
        rasterizer = SVGRasterizer()
 | 
					        try:
 | 
				
			||||||
        rasterizer(oeb, opts)
 | 
					            rasterizer = SVGRasterizer()
 | 
				
			||||||
 | 
					            rasterizer(oeb, opts)
 | 
				
			||||||
 | 
					        except Unavailable:
 | 
				
			||||||
 | 
					            self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
 | 
				
			||||||
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
 | 
					        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
 | 
				
			||||||
        mobimlizer(oeb, opts)
 | 
					        mobimlizer(oeb, opts)
 | 
				
			||||||
        self.check_for_periodical()
 | 
					        self.check_for_periodical()
 | 
				
			||||||
 | 
				
			|||||||
@ -4,12 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
				
			|||||||
Read data from .mobi files
 | 
					Read data from .mobi files
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import functools
 | 
					import functools, shutil, os, re, struct, textwrap, cStringIO, sys
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import re
 | 
					 | 
				
			||||||
import struct
 | 
					 | 
				
			||||||
import textwrap
 | 
					 | 
				
			||||||
import cStringIO
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    from PIL import Image as PILImage
 | 
					    from PIL import Image as PILImage
 | 
				
			||||||
@ -619,6 +614,16 @@ class MobiReader(object):
 | 
				
			|||||||
                * opf.cover.split('/'))):
 | 
					                * opf.cover.split('/'))):
 | 
				
			||||||
                opf.cover = None
 | 
					                opf.cover = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cover = opf.cover
 | 
				
			||||||
 | 
					        if cover is not None:
 | 
				
			||||||
 | 
					            cover = cover.replace('/', os.sep)
 | 
				
			||||||
 | 
					            if os.path.exists(cover):
 | 
				
			||||||
 | 
					                ncover = 'images'+os.sep+'calibre_cover.jpg'
 | 
				
			||||||
 | 
					                if os.path.exists(ncover):
 | 
				
			||||||
 | 
					                    os.remove(ncover)
 | 
				
			||||||
 | 
					                shutil.copyfile(cover, ncover)
 | 
				
			||||||
 | 
					            opf.cover = ncover.replace(os.sep, '/')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        manifest = [(htmlfile, 'application/xhtml+xml'),
 | 
					        manifest = [(htmlfile, 'application/xhtml+xml'),
 | 
				
			||||||
            (os.path.abspath('styles.css'), 'text/css')]
 | 
					            (os.path.abspath('styles.css'), 'text/css')]
 | 
				
			||||||
        bp = os.path.dirname(htmlfile)
 | 
					        bp = os.path.dirname(htmlfile)
 | 
				
			||||||
@ -796,15 +801,22 @@ class MobiReader(object):
 | 
				
			|||||||
def get_metadata(stream):
 | 
					def get_metadata(stream):
 | 
				
			||||||
    from calibre.utils.logging import Log
 | 
					    from calibre.utils.logging import Log
 | 
				
			||||||
    log = Log()
 | 
					    log = Log()
 | 
				
			||||||
 | 
					 | 
				
			||||||
    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
 | 
					    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
 | 
				
			||||||
    try:
 | 
					    mh = MetadataHeader(stream, log)
 | 
				
			||||||
        mh = MetadataHeader(stream, log)
 | 
					    if mh.title and mh.title != _('Unknown'):
 | 
				
			||||||
 | 
					        mi.title = mh.title
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if mh.exth is not None:
 | 
					    if mh.exth is not None:
 | 
				
			||||||
            if mh.exth.mi is not None:
 | 
					        if mh.exth.mi is not None:
 | 
				
			||||||
                mi = mh.exth.mi
 | 
					            mi = mh.exth.mi
 | 
				
			||||||
        else:
 | 
					    else:
 | 
				
			||||||
 | 
					        size = sys.maxint
 | 
				
			||||||
 | 
					        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
 | 
				
			||||||
 | 
					            pos = stream.tell()
 | 
				
			||||||
 | 
					            stream.seek(0, 2)
 | 
				
			||||||
 | 
					            size = stream.tell()
 | 
				
			||||||
 | 
					            stream.seek(pos)
 | 
				
			||||||
 | 
					        if size < 4*1024*1024:
 | 
				
			||||||
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
 | 
					            with TemporaryDirectory('_mobi_meta_reader') as tdir:
 | 
				
			||||||
                with CurrentDir(tdir):
 | 
					                with CurrentDir(tdir):
 | 
				
			||||||
                    mr = MobiReader(stream, log)
 | 
					                    mr = MobiReader(stream, log)
 | 
				
			||||||
@ -812,16 +824,18 @@ def get_metadata(stream):
 | 
				
			|||||||
                    mr.extract_content(tdir, parse_cache)
 | 
					                    mr.extract_content(tdir, parse_cache)
 | 
				
			||||||
                    if mr.embedded_mi is not None:
 | 
					                    if mr.embedded_mi is not None:
 | 
				
			||||||
                        mi = mr.embedded_mi
 | 
					                        mi = mr.embedded_mi
 | 
				
			||||||
        if hasattr(mh.exth, 'cover_offset'):
 | 
					    if hasattr(mh.exth, 'cover_offset'):
 | 
				
			||||||
            cover_index = mh.first_image_index + mh.exth.cover_offset
 | 
					        cover_index = mh.first_image_index + mh.exth.cover_offset
 | 
				
			||||||
            data  = mh.section_data(int(cover_index))
 | 
					        data  = mh.section_data(int(cover_index))
 | 
				
			||||||
        else:
 | 
					    else:
 | 
				
			||||||
            data  = mh.section_data(mh.first_image_index)
 | 
					        data  = mh.section_data(mh.first_image_index)
 | 
				
			||||||
        buf = cStringIO.StringIO(data)
 | 
					    buf = cStringIO.StringIO(data)
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
        im = PILImage.open(buf)
 | 
					        im = PILImage.open(buf)
 | 
				
			||||||
        obuf = cStringIO.StringIO()
 | 
					 | 
				
			||||||
        im.convert('RGBA').save(obuf, format='JPEG')
 | 
					 | 
				
			||||||
        mi.cover_data = ('jpg', obuf.getvalue())
 | 
					 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        log.exception()
 | 
					        log.exception('Failed to read MOBI cover')
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        obuf = cStringIO.StringIO()
 | 
				
			||||||
 | 
					        im.convert('RGB').save(obuf, format='JPEG')
 | 
				
			||||||
 | 
					        mi.cover_data = ('jpg', obuf.getvalue())
 | 
				
			||||||
    return mi
 | 
					    return mi
 | 
				
			||||||
 | 
				
			|||||||
@ -152,13 +152,17 @@ class EbookIterator(object):
 | 
				
			|||||||
                        prints('Substituting font family: %s -> %s'%(bad, good))
 | 
					                        prints('Substituting font family: %s -> %s'%(bad, good))
 | 
				
			||||||
                        return match.group().replace(bad, '"%s"'%good)
 | 
					                        return match.group().replace(bad, '"%s"'%good)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            from calibre.ebooks.chardet import force_encoding
 | 
				
			||||||
            for csspath in css_files:
 | 
					            for csspath in css_files:
 | 
				
			||||||
                with open(csspath, 'r+b') as f:
 | 
					                with open(csspath, 'r+b') as f:
 | 
				
			||||||
                    css = f.read()
 | 
					                    css = f.read()
 | 
				
			||||||
                    css = font_family_pat.sub(prepend_embedded_font, css)
 | 
					                    enc = force_encoding(css, False)
 | 
				
			||||||
                    f.seek(0)
 | 
					                    css = css.decode(enc, 'replace')
 | 
				
			||||||
                    f.truncate()
 | 
					                    ncss = font_family_pat.sub(prepend_embedded_font, css)
 | 
				
			||||||
                    f.write(css)
 | 
					                    if ncss != css:
 | 
				
			||||||
 | 
					                        f.seek(0)
 | 
				
			||||||
 | 
					                        f.truncate()
 | 
				
			||||||
 | 
					                        f.write(ncss.encode(enc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __enter__(self, processed=False):
 | 
					    def __enter__(self, processed=False):
 | 
				
			||||||
        self.delete_on_exit = []
 | 
					        self.delete_on_exit = []
 | 
				
			||||||
@ -173,11 +177,12 @@ class EbookIterator(object):
 | 
				
			|||||||
            plumber.opts.no_process = True
 | 
					            plumber.opts.no_process = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        plumber.input_plugin.for_viewer = True
 | 
					        plumber.input_plugin.for_viewer = True
 | 
				
			||||||
        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
 | 
					        with plumber.input_plugin:
 | 
				
			||||||
 | 
					            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
 | 
				
			||||||
                plumber.opts, plumber.input_fmt, self.log,
 | 
					                plumber.opts, plumber.input_fmt, self.log,
 | 
				
			||||||
                {}, self.base)
 | 
					                {}, self.base)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \
 | 
					        if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
 | 
				
			||||||
                not hasattr(self.pathtoopf, 'manifest'):
 | 
					                not hasattr(self.pathtoopf, 'manifest'):
 | 
				
			||||||
            self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
 | 
					            self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
 | 
				
			||||||
                    plumber.input_plugin)
 | 
					                    plumber.input_plugin)
 | 
				
			||||||
 | 
				
			|||||||
@ -331,7 +331,10 @@ class OEBReader(object):
 | 
				
			|||||||
            id = child.get('id')
 | 
					            id = child.get('id')
 | 
				
			||||||
            klass = child.get('class', 'chapter')
 | 
					            klass = child.get('class', 'chapter')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
 | 
					            try:
 | 
				
			||||||
 | 
					                po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                po = self.oeb.toc.next_play_order()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            authorElement = xpath(child,
 | 
					            authorElement = xpath(child,
 | 
				
			||||||
                    'descendant::calibre:meta[@name = "author"]')
 | 
					                    'descendant::calibre:meta[@name = "author"]')
 | 
				
			||||||
 | 
				
			|||||||
@ -190,11 +190,11 @@ class Stylizer(object):
 | 
				
			|||||||
                    selector = CSSSelector(ntext)
 | 
					                    selector = CSSSelector(ntext)
 | 
				
			||||||
                    matches = selector(tree)
 | 
					                    matches = selector(tree)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if not matches and class_sel_pat.match(text):
 | 
					            if not matches and class_sel_pat.match(text) and text.lower() != text:
 | 
				
			||||||
                found = False
 | 
					                found = False
 | 
				
			||||||
 | 
					                ltext = text.lower()
 | 
				
			||||||
                for x in tree.xpath('//*[@class]'):
 | 
					                for x in tree.xpath('//*[@class]'):
 | 
				
			||||||
                    if text.lower().endswith('.'+x.get('class').lower()) and \
 | 
					                    if ltext.endswith('.'+x.get('class').lower()):
 | 
				
			||||||
                            text.lower() != text:
 | 
					 | 
				
			||||||
                        matches.append(x)
 | 
					                        matches.append(x)
 | 
				
			||||||
                        found = True
 | 
					                        found = True
 | 
				
			||||||
                if found:
 | 
					                if found:
 | 
				
			||||||
 | 
				
			|||||||
@ -27,11 +27,14 @@ from calibre.ebooks.oeb.stylizer import Stylizer
 | 
				
			|||||||
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
 | 
					IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
 | 
				
			||||||
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
 | 
					KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Unavailable(Exception):
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SVGRasterizer(object):
 | 
					class SVGRasterizer(object):
 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self):
 | 
				
			||||||
        from calibre.gui2 import is_ok_to_use_qt
 | 
					        from calibre.gui2 import is_ok_to_use_qt
 | 
				
			||||||
        if not is_ok_to_use_qt():
 | 
					        if not is_ok_to_use_qt():
 | 
				
			||||||
            raise Exception('Not OK to use Qt')
 | 
					            raise Unavailable('Not OK to use Qt')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def config(cls, cfg):
 | 
					    def config(cls, cfg):
 | 
				
			||||||
 | 
				
			|||||||
@ -29,7 +29,7 @@ class RescaleImages(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        page_width, page_height = self.opts.dest.width, self.opts.dest.height
 | 
					        page_width, page_height = self.opts.dest.width, self.opts.dest.height
 | 
				
			||||||
        if not self.opts.is_image_collection:
 | 
					        if not getattr(self.opts, 'is_image_collection', False):
 | 
				
			||||||
            page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
 | 
					            page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
 | 
				
			||||||
            page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
 | 
					            page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
 | 
				
			||||||
        for item in self.oeb.manifest:
 | 
					        for item in self.oeb.manifest:
 | 
				
			||||||
 | 
				
			|||||||
@ -11,12 +11,14 @@ class PDBError(Exception):
 | 
				
			|||||||
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 | 
					from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 | 
				
			||||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 | 
					from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 | 
				
			||||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
 | 
					from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
 | 
				
			||||||
 | 
					from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
 | 
				
			||||||
 | 
					
 | 
				
			||||||
FORMAT_READERS = {
 | 
					FORMAT_READERS = {
 | 
				
			||||||
    'PNPdPPrs': ereader_reader,
 | 
					    'PNPdPPrs': ereader_reader,
 | 
				
			||||||
    'PNRdPPrs': ereader_reader,
 | 
					    'PNRdPPrs': ereader_reader,
 | 
				
			||||||
    'zTXTGPlm': ztxt_reader,
 | 
					    'zTXTGPlm': ztxt_reader,
 | 
				
			||||||
    'TEXtREAd': palmdoc_reader,
 | 
					    'TEXtREAd': palmdoc_reader,
 | 
				
			||||||
 | 
					    '.pdfADBE': pdf_reader,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
 | 
					from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
 | 
				
			||||||
@ -34,8 +36,8 @@ IDENTITY_TO_NAME = {
 | 
				
			|||||||
    'PNRdPPrs': 'eReader',
 | 
					    'PNRdPPrs': 'eReader',
 | 
				
			||||||
    'zTXTGPlm': 'zTXT',
 | 
					    'zTXTGPlm': 'zTXT',
 | 
				
			||||||
    'TEXtREAd': 'PalmDOC',
 | 
					    'TEXtREAd': 'PalmDOC',
 | 
				
			||||||
 | 
					 | 
				
			||||||
    '.pdfADBE': 'Adobe Reader',
 | 
					    '.pdfADBE': 'Adobe Reader',
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    'BVokBDIC': 'BDicty',
 | 
					    'BVokBDIC': 'BDicty',
 | 
				
			||||||
    'DB99DBOS': 'DB (Database program)',
 | 
					    'DB99DBOS': 'DB (Database program)',
 | 
				
			||||||
    'vIMGView': 'FireViewer (ImageViewer)',
 | 
					    'vIMGView': 'FireViewer (ImageViewer)',
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										0
									
								
								src/calibre/ebooks/pdb/pdf/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/calibre/ebooks/pdb/pdf/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										37
									
								
								src/calibre/ebooks/pdb/pdf/reader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								src/calibre/ebooks/pdb/pdf/reader.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					Read content from palmdoc pdb file.
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, John Schember <john@nachtimwald.com>'
 | 
				
			||||||
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.ebooks.pdb.formatreader import FormatReader
 | 
				
			||||||
 | 
					from calibre.ptempfile import TemporaryFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Reader(FormatReader):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, header, stream, log, options):
 | 
				
			||||||
 | 
					        self.header = header
 | 
				
			||||||
 | 
					        self.stream = stream
 | 
				
			||||||
 | 
					        self.log = log
 | 
				
			||||||
 | 
					        self.options = options
 | 
				
			||||||
 | 
					        setattr(self.options, 'new_pdf_engine', False)
 | 
				
			||||||
 | 
					        setattr(self.options, 'no_images', False)
 | 
				
			||||||
 | 
					        setattr(self.options, 'unwrap_factor', 0.5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def extract_content(self, output_dir):
 | 
				
			||||||
 | 
					        self.log.info('Extracting PDF...')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with TemporaryFile() as pdf_n:
 | 
				
			||||||
 | 
					            pdf = open(pdf_n, 'rwb')
 | 
				
			||||||
 | 
					            for x in xrange(self.header.section_count()):
 | 
				
			||||||
 | 
					                pdf.write(self.header.section_data(x))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            from calibre.customize.ui import plugin_for_input_format
 | 
				
			||||||
 | 
					            pdf.seek(0)
 | 
				
			||||||
 | 
					            return plugin_for_input_format('pdf').convert(pdf, self.options,
 | 
				
			||||||
 | 
					                'pdf', self.log, [])
 | 
				
			||||||
@ -13,7 +13,7 @@ from functools import partial
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from calibre.ebooks import ConversionError, DRMError
 | 
					from calibre.ebooks import ConversionError, DRMError
 | 
				
			||||||
from calibre.ptempfile import PersistentTemporaryFile
 | 
					from calibre.ptempfile import PersistentTemporaryFile
 | 
				
			||||||
from calibre import isosx, iswindows, islinux
 | 
					from calibre import isosx, iswindows, islinux, isfreebsd
 | 
				
			||||||
from calibre import CurrentDir
 | 
					from calibre import CurrentDir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PDFTOHTML = 'pdftohtml'
 | 
					PDFTOHTML = 'pdftohtml'
 | 
				
			||||||
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
 | 
				
			|||||||
if iswindows and hasattr(sys, 'frozen'):
 | 
					if iswindows and hasattr(sys, 'frozen'):
 | 
				
			||||||
    PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
 | 
					    PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
 | 
				
			||||||
    popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
 | 
					    popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
 | 
				
			||||||
if islinux and getattr(sys, 'frozen_path', False):
 | 
					if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
 | 
				
			||||||
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
 | 
					    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def pdftohtml(output_dir, pdf_path, no_images):
 | 
					def pdftohtml(output_dir, pdf_path, no_images):
 | 
				
			||||||
 | 
				
			|||||||
@ -72,14 +72,14 @@ class PML_HTMLizer(object):
 | 
				
			|||||||
        'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
 | 
					        'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
 | 
				
			||||||
        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
 | 
					        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
 | 
				
			||||||
        'r': ('<div style="text-align: right;">', '</div>'),
 | 
					        'r': ('<div style="text-align: right;">', '</div>'),
 | 
				
			||||||
        't': ('<div style="margin-left: 5%;">', '</div>'),
 | 
					        't': ('<div style="text-indent: 5%;">', '</div>'),
 | 
				
			||||||
        'T': ('<div style="margin-left: %s;">', '</div>'),
 | 
					        'T': ('<div style="text-indent: %s;">', '</div>'),
 | 
				
			||||||
        'i': ('<span style="font-style: italic;">', '</span>'),
 | 
					        'i': ('<span style="font-style: italic;">', '</span>'),
 | 
				
			||||||
        'u': ('<span style="text-decoration: underline;">', '</span>'),
 | 
					        'u': ('<span style="text-decoration: underline;">', '</span>'),
 | 
				
			||||||
        'd': ('<span style="text-decoration: line-through;">', '</span>'),
 | 
					        'd': ('<span style="text-decoration: line-through;">', '</span>'),
 | 
				
			||||||
        'b': ('<span style="font-weight: bold;">', '</span>'),
 | 
					        'b': ('<span style="font-weight: bold;">', '</span>'),
 | 
				
			||||||
        'l': ('<span style="font-size: 150%;">', '</span>'),
 | 
					        'l': ('<span style="font-size: 150%;">', '</span>'),
 | 
				
			||||||
        'k': ('<span style="font-size: 75%;">', '</span>'),
 | 
					        'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
 | 
				
			||||||
        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
 | 
					        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
 | 
				
			||||||
        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
 | 
					        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -154,6 +154,11 @@ class PML_HTMLizer(object):
 | 
				
			|||||||
        self.file_name = ''
 | 
					        self.file_name = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def prepare_pml(self, pml):
 | 
					    def prepare_pml(self, pml):
 | 
				
			||||||
 | 
					        # Give Chapters the form \\*='text'text\\*. This is used for generating
 | 
				
			||||||
 | 
					        # the TOC later.
 | 
				
			||||||
 | 
					        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
 | 
				
			||||||
 | 
					        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Remove comments
 | 
					        # Remove comments
 | 
				
			||||||
        pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
 | 
					        pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -163,7 +168,7 @@ class PML_HTMLizer(object):
 | 
				
			|||||||
        pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
 | 
					        pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
 | 
				
			||||||
        pml = re.sub(r'(?mus)^[ ]*$', '', pml)
 | 
					        pml = re.sub(r'(?mus)^[ ]*$', '', pml)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Footnotes and Sidebars
 | 
					        # Footnotes and Sidebars.
 | 
				
			||||||
        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 | 
					        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 | 
				
			||||||
        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 | 
					        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -171,9 +176,7 @@ class PML_HTMLizer(object):
 | 
				
			|||||||
        # &. It will display as &
 | 
					        # &. It will display as &
 | 
				
			||||||
        pml = pml.replace('&', '&')
 | 
					        pml = pml.replace('&', '&')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
 | 
					        # Replace \\a and \\U with either the unicode character or the entity.
 | 
				
			||||||
        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
 | 
					        pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
 | 
				
			||||||
        pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
 | 
					        pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -536,6 +539,7 @@ class PML_HTMLizer(object):
 | 
				
			|||||||
                        elif '%s%s' % (c, l) == 'Sd':
 | 
					                        elif '%s%s' % (c, l) == 'Sd':
 | 
				
			||||||
                            text = self.process_code('Sd', line, 'sb')
 | 
					                            text = self.process_code('Sd', line, 'sb')
 | 
				
			||||||
                    elif c in 'xXC':
 | 
					                    elif c in 'xXC':
 | 
				
			||||||
 | 
					                        empty = False
 | 
				
			||||||
                        # The PML was modified eariler so x and X put the text
 | 
					                        # The PML was modified eariler so x and X put the text
 | 
				
			||||||
                        # inside of ="" so we don't have do special processing
 | 
					                        # inside of ="" so we don't have do special processing
 | 
				
			||||||
                        # for C.
 | 
					                        # for C.
 | 
				
			||||||
@ -578,10 +582,7 @@ class PML_HTMLizer(object):
 | 
				
			|||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    if c != ' ':
 | 
					                    if c != ' ':
 | 
				
			||||||
                        empty = False
 | 
					                        empty = False
 | 
				
			||||||
                    if self.state['k'][0]:
 | 
					                    text = c
 | 
				
			||||||
                        text = c.upper()
 | 
					 | 
				
			||||||
                    else:
 | 
					 | 
				
			||||||
                        text = c
 | 
					 | 
				
			||||||
                parsed.append(text)
 | 
					                parsed.append(text)
 | 
				
			||||||
                c = line.read(1)
 | 
					                c = line.read(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -131,7 +131,7 @@ class PMLMLizer(object):
 | 
				
			|||||||
                if item.href in self.link_hrefs.keys():
 | 
					                if item.href in self.link_hrefs.keys():
 | 
				
			||||||
                    toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
 | 
					                    toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
 | 
					                    self.oeb_book.warn('Ignoring toc item: %s not found in document.' % item)
 | 
				
			||||||
        return ''.join(toc)
 | 
					        return ''.join(toc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_text(self):
 | 
					    def get_text(self):
 | 
				
			||||||
 | 
				
			|||||||
@ -131,9 +131,9 @@ class RtfTokenParser():
 | 
				
			|||||||
                if isString(self.tokens[i].name, "\\'"):
 | 
					                if isString(self.tokens[i].name, "\\'"):
 | 
				
			||||||
                    i = i + 1
 | 
					                    i = i + 1
 | 
				
			||||||
                    if not isinstance(self.tokens[i], tokenData):
 | 
					                    if not isinstance(self.tokens[i], tokenData):
 | 
				
			||||||
                        raise BaseException('Error: token8bitChar without data.')
 | 
					                        raise Exception('Error: token8bitChar without data.')
 | 
				
			||||||
                    if len(self.tokens[i].data) < 2:
 | 
					                    if len(self.tokens[i].data) < 2:
 | 
				
			||||||
                        raise BaseException('Error: token8bitChar without data.')
 | 
					                        raise Exception('Error: token8bitChar without data.')
 | 
				
			||||||
                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
 | 
					                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
 | 
				
			||||||
                    if len(self.tokens[i].data) > 2:
 | 
					                    if len(self.tokens[i].data) > 2:
 | 
				
			||||||
                        newTokens.append(tokenData(self.tokens[i].data[2:]))
 | 
					                        newTokens.append(tokenData(self.tokens[i].data[2:]))
 | 
				
			||||||
@ -195,7 +195,7 @@ class RtfTokenParser():
 | 
				
			|||||||
                            i = i + 1
 | 
					                            i = i + 1
 | 
				
			||||||
                            j = j + 1
 | 
					                            j = j + 1
 | 
				
			||||||
                            continue
 | 
					                            continue
 | 
				
			||||||
                        raise BaseException('Error: incorect utf replacement.')
 | 
					                        raise Exception('Error: incorect utf replacement.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    #calibre rtf2xml does not support utfreplace
 | 
					                    #calibre rtf2xml does not support utfreplace
 | 
				
			||||||
                    replace = []
 | 
					                    replace = []
 | 
				
			||||||
@ -248,7 +248,7 @@ class RtfTokenizer():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            if isChar(self.rtfData[i], '\\'):
 | 
					            if isChar(self.rtfData[i], '\\'):
 | 
				
			||||||
                if i + 1 >= len(self.rtfData):
 | 
					                if i + 1 >= len(self.rtfData):
 | 
				
			||||||
                    raise BaseException('Error: Control character found at the end of the document.')
 | 
					                    raise Exception('Error: Control character found at the end of the document.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if lastDataStart > -1:
 | 
					                if lastDataStart > -1:
 | 
				
			||||||
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
 | 
					                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
 | 
				
			||||||
@ -269,7 +269,7 @@ class RtfTokenizer():
 | 
				
			|||||||
                        i = i + 1
 | 
					                        i = i + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if not consumed:
 | 
					                    if not consumed:
 | 
				
			||||||
                        raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
 | 
					                        raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    #we have numeric argument before delimiter
 | 
					                    #we have numeric argument before delimiter
 | 
				
			||||||
                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
 | 
					                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
 | 
				
			||||||
@ -283,10 +283,10 @@ class RtfTokenizer():
 | 
				
			|||||||
                            l = l + 1
 | 
					                            l = l + 1
 | 
				
			||||||
                            i = i + 1
 | 
					                            i = i + 1
 | 
				
			||||||
                            if l > 10 :
 | 
					                            if l > 10 :
 | 
				
			||||||
                                raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
 | 
					                                raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        if not consumed:
 | 
					                        if not consumed:
 | 
				
			||||||
                            raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
 | 
					                            raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    separator = ''
 | 
					                    separator = ''
 | 
				
			||||||
                    if isChar(self.rtfData[i], ' '):
 | 
					                    if isChar(self.rtfData[i], ' '):
 | 
				
			||||||
 | 
				
			|||||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user