mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
merging with trunk
This commit is contained in:
commit
07e888f764
145
Changelog.yaml
145
Changelog.yaml
@ -4,6 +4,151 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.6.44
|
||||||
|
date: 2010-03-05
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Experimental support for conversion of CHM files"
|
||||||
|
type: major
|
||||||
|
description : >
|
||||||
|
"Conversion and reading of metadata from CHM files is now supported. This feature is
|
||||||
|
still experimental, with more testing needed. Building from source on linux now
|
||||||
|
requires chmlib."
|
||||||
|
|
||||||
|
- title: "Experimental support for fetching annotations from the Kindles"
|
||||||
|
type: major
|
||||||
|
description: >
|
||||||
|
"calibre can now fetch annotations from your kindle and put them into the
|
||||||
|
comments field. To fetch annotations, click the arrow next to the
|
||||||
|
'send to device' button and select 'Fetch Annotations', with your Kindle
|
||||||
|
connected."
|
||||||
|
|
||||||
|
- title: "Support FreeBSD out of the box (except USB)"
|
||||||
|
type: major
|
||||||
|
tickets: [4715]
|
||||||
|
|
||||||
|
|
||||||
|
- title: "News download scheduler: Don't try to download news when no active internet connection is present (linux/windows only)"
|
||||||
|
|
||||||
|
- title: "EPUB to WPUB conversion: Preserve font encryption"
|
||||||
|
|
||||||
|
- title: "calibre-server: Add --pidfile and --daemonize (unix only) options"
|
||||||
|
|
||||||
|
- title: "Plugins: When loading a plugin zip file extract to temp dir and add to sys.path, if the zip file contains binay code (pyd/dll/so/dylib), instead of just adding the zip file to the path, as python cannot load compiled code from a zip file"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Ebook-viewer: Handle non-ascii CSS files when doing font substitutions"
|
||||||
|
|
||||||
|
- title: "Conversion pipline: Ignore non-integral play orders when parsing NCX files"
|
||||||
|
|
||||||
|
- title: "When decoding NCX toc files, if no encoding is declared and detection has less that 100% confidence, assume UTF-8."
|
||||||
|
tickets: [5039]
|
||||||
|
|
||||||
|
- title: "PML chapter definitions missing from toc.ncx"
|
||||||
|
tickets: [4990]
|
||||||
|
|
||||||
|
- title: "Unicode string for cover causes calibredb --output-format stanza to fail"
|
||||||
|
ticket: [5035]
|
||||||
|
|
||||||
|
- title: "Search cover:False fails, cover:True succeeds"
|
||||||
|
tickets: [5034]
|
||||||
|
|
||||||
|
- title: "Plugins: correctly use context"
|
||||||
|
|
||||||
|
- title: "MOBI Input: Don't lose cover if it is also referred to in main text"
|
||||||
|
ticket: [5020]
|
||||||
|
|
||||||
|
- title: "RTF Output: Don't choke on PNG images"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Journal of Hospital Medicine, San Francisco Bay Guardian, Smithsonian Magazine
|
||||||
|
author: Krittika Goyal
|
||||||
|
|
||||||
|
- title: Astronomy Pick of the Day, Epicurious
|
||||||
|
author: Starson17
|
||||||
|
|
||||||
|
- title: Diario Vasco, Various Chilean newspapers
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Kukuburi
|
||||||
|
author: Mori
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Ars Technica
|
||||||
|
- Fudzilla
|
||||||
|
- The Atlantic
|
||||||
|
- The Economist
|
||||||
|
- Huffington Post
|
||||||
|
|
||||||
|
- version: 0.6.43
|
||||||
|
date: 2010-02-26
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Support for the Teclast K3 and Elonex e-book readers"
|
||||||
|
|
||||||
|
- title: "Add 'Recently Read' category to catalog if Kindle is connected when catalog is generated"
|
||||||
|
|
||||||
|
- title: "When adding PRC/MOBI files that are actually Topaz files, change detected file type to Topaz"
|
||||||
|
|
||||||
|
- title: "MOBI Output: If the SVG rasterizer is not avaialbale continue anyway"
|
||||||
|
|
||||||
|
- title: "News download: When using the debug pipeline options, create a zip file named periodical.downloaded_recipe in the debug directory. This can be passed to ebook-convert to directly convert a previous download into an e-book."
|
||||||
|
|
||||||
|
- title: "Add Apply button to catalog generation dialog"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "When fetching metadata in the edit metadata dialog, use a python thread instead of a Qt thread. Hopefully this will fix the reports of crashes when fetching metadata"
|
||||||
|
|
||||||
|
- title: "Refresh cover browser when a cover is changed via the edit meta information dialog"
|
||||||
|
|
||||||
|
- title: "More device detection debug output on OS X"
|
||||||
|
|
||||||
|
- title: "Download only covers should not also set social metadata"
|
||||||
|
tickets: [4966]
|
||||||
|
|
||||||
|
- title: "Content server: If fail to bind to 0.0.0.0 try detecting and binding only to interface used for outgoing traffic"
|
||||||
|
|
||||||
|
- title: "Handle poorly designed import plugins that return None on error"
|
||||||
|
|
||||||
|
- title: "Move logic for removing inline navbars out of the BasicNewsRecipe class"
|
||||||
|
|
||||||
|
- title: "MOBI metadata: When setting title, set in both PalmDoc and EXTH headers"
|
||||||
|
|
||||||
|
- title: "MOBI metadata: Do not try to extarct embedded metadata from MOBI files larger than 4MB"
|
||||||
|
|
||||||
|
- title: "Handle PDB files that contain PDF files"
|
||||||
|
tickets: [4971]
|
||||||
|
|
||||||
|
- title: "PML Input: Various fixes"
|
||||||
|
tickets: [4959,4961]
|
||||||
|
|
||||||
|
- title: "Fix reading MOBI metadata from files in zip/rar archives"
|
||||||
|
|
||||||
|
- title: "Make extracting single files from RAR archives more efficient"
|
||||||
|
|
||||||
|
- title: "No longer need Qt to generate default cover for news downloads"
|
||||||
|
|
||||||
|
- title: "Catalog generation: fix for EPUB anchors beginning with numbers in Recently Added"
|
||||||
|
|
||||||
|
- title: "Searching: Handle uppercase keywords correctly"
|
||||||
|
tickets: [4951]
|
||||||
|
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Gamasutra
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- "Strategy+Business"
|
||||||
|
- Arizona Daily Star
|
||||||
|
- Heise
|
||||||
|
- New Scientist
|
||||||
|
- Various Serbian news feeds
|
||||||
|
- Houston and San Francisco Chronicles
|
||||||
|
|
||||||
- version: 0.6.42
|
- version: 0.6.42
|
||||||
date: 2010-02-20
|
date: 2010-02-20
|
||||||
|
|
||||||
|
@ -79,9 +79,24 @@ p.unread_book {
|
|||||||
text-indent:-2em;
|
text-indent:-2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.date_read {
|
||||||
|
text-align:left;
|
||||||
|
margin-top:0px;
|
||||||
|
margin-bottom:0px;
|
||||||
|
margin-left:6em;
|
||||||
|
text-indent:-6em;
|
||||||
|
}
|
||||||
|
|
||||||
hr.series_divider {
|
hr.series_divider {
|
||||||
width:50%;
|
width:50%;
|
||||||
margin-left:1em;
|
margin-left:1em;
|
||||||
margin-top:0em;
|
margin-top:0em;
|
||||||
margin-bottom:0em;
|
margin-bottom:0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hr.annotations_divider {
|
||||||
|
width:50%;
|
||||||
|
margin-left:1em;
|
||||||
|
margin-top:0em;
|
||||||
|
margin-bottom:0em;
|
||||||
|
}
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 116 KiB After Width: | Height: | Size: 124 KiB |
BIN
resources/images/news/diariovasco.png
Normal file
BIN
resources/images/news/diariovasco.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 766 B |
BIN
resources/images/news/gamasutra_fa.png
Normal file
BIN
resources/images/news/gamasutra_fa.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 956 B |
BIN
resources/images/news/gamasutra_news.png
Normal file
BIN
resources/images/news/gamasutra_news.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 956 B |
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
24sata.rs
|
24sata.rs
|
||||||
@ -9,7 +8,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class Ser24Sata(BasicNewsRecipe):
|
class Ser24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Sr'
|
title = '24 Sata - Sr'
|
||||||
@ -17,22 +15,20 @@ class Ser24Sata(BasicNewsRecipe):
|
|||||||
description = '24 sata portal vesti iz Srbije'
|
description = '24 sata portal vesti iz Srbije'
|
||||||
publisher = 'Ringier d.o.o.'
|
publisher = 'Ringier d.o.o.'
|
||||||
category = 'news, politics, entertainment, Serbia'
|
category = 'news, politics, entertainment, Serbia'
|
||||||
oldest_article = 7
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
lang = 'sr-Latn-RS'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -40,25 +36,6 @@ class Ser24Sata(BasicNewsRecipe):
|
|||||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
|
|
||||||
attribs = [ 'style','font','valign'
|
|
||||||
,'colspan','width','height'
|
|
||||||
,'rowspan','summary','align'
|
|
||||||
,'cellspacing','cellpadding'
|
|
||||||
,'frames','rules','border'
|
|
||||||
]
|
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
|
||||||
item.name = 'div'
|
|
||||||
for attrib in attribs:
|
|
||||||
if item.has_key(attrib):
|
|
||||||
del item[attrib]
|
|
||||||
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
soup.head.insert(1,mcharset)
|
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
37
resources/recipes/apod.recipe
Normal file
37
resources/recipes/apod.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class APOD(BasicNewsRecipe):
|
||||||
|
title = u'Astronomy Picture of the Day'
|
||||||
|
__author__ = 'Starson17'
|
||||||
|
description = 'Astronomy Pictures'
|
||||||
|
language = 'en'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
|
||||||
|
remove_javascript = True
|
||||||
|
recursions = 0
|
||||||
|
oldest_article = 14
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
center_tags = soup.findAll(['center'])
|
||||||
|
p_tags = soup.findAll(['p'])
|
||||||
|
last_center = center_tags[-1:]
|
||||||
|
last_center[0].extract()
|
||||||
|
first_p = p_tags[:1]
|
||||||
|
for tag in first_p:
|
||||||
|
tag.extract()
|
||||||
|
last2_p = p_tags[-2:]
|
||||||
|
for tag in last2_p:
|
||||||
|
tag.extract()
|
||||||
|
return soup
|
||||||
|
|
@ -5,6 +5,7 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
arstechnica.com
|
arstechnica.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
@ -20,7 +21,7 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
|
extra_css = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
@ -30,6 +31,10 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
|
||||||
|
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||||
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
||||||
|
|
||||||
@ -37,7 +42,7 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
dict(name=['object','link','embed'])
|
dict(name=['object','link','embed'])
|
||||||
,dict(name='div', attrs={'class':'read-more-link'})
|
,dict(name='div', attrs={'class':'read-more-link'})
|
||||||
]
|
]
|
||||||
|
remove_attributes=['width','height']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
||||||
@ -90,3 +95,5 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None).rpartition('?')[0]
|
||||||
|
@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
theatlantic.com
|
theatlantic.com
|
||||||
'''
|
'''
|
||||||
import re
|
import string
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||||
|
|
||||||
class TheAtlantic(BasicNewsRecipe):
|
class TheAtlantic(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The Atlantic'
|
title = 'The Atlantic'
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
description = 'Current affairs and politics focussed on the US'
|
description = 'Current affairs and politics focussed on the US'
|
||||||
INDEX = 'http://www.theatlantic.com/doc/current'
|
INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', id='storytop')
|
remove_tags_before = dict(name='div', id='articleHead')
|
||||||
remove_tags = [
|
remove_tags_after = dict(id='copyright')
|
||||||
dict(name='div', id=['seealso','storybottom', 'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
|
remove_tags = [dict(id=['header', 'printAds', 'pageControls'])]
|
||||||
dict(name='p', attrs={'id':["pagination"]}),
|
no_stylesheets = True
|
||||||
dict(name='table',attrs={'class':"tools"}),
|
|
||||||
dict(name='style'),
|
|
||||||
dict(name='a', href='/a/newsletters.mhtml')
|
def print_version(self, url):
|
||||||
]
|
return url.replace('/archive/', '/print/')
|
||||||
remove_attributes = ['icap', 'callout', 'style']
|
|
||||||
no_stylesheets = True
|
|
||||||
conversion_options = { 'linearize_tables':True }
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
#timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
|
|
||||||
#storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
|
|
||||||
h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
|
|
||||||
h1{font-family:georgia,serif; font-weight:bold; font-size:large}
|
|
||||||
#byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
|
|
||||||
#topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
|
|
||||||
.artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
|
||||||
'''
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
articles = []
|
||||||
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
sectit = soup.find('h1', attrs={'class':'sectionTitle'})
|
||||||
|
if sectit is not None:
|
||||||
|
texts = sectit.findAll('cufontext')
|
||||||
|
texts = map(self.tag_to_string, texts[-2:])
|
||||||
|
self.timefmt = ' [%s]'%(''.join(texts))
|
||||||
|
|
||||||
issue = soup.find('span', attrs={'class':'issue'})
|
cover = soup.find('img', src=True, attrs={'class':'cover'})
|
||||||
if issue:
|
|
||||||
self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
|
|
||||||
|
|
||||||
cover = soup.find('img', alt=re.compile('Cover'), src=True)
|
|
||||||
if cover is not None:
|
if cover is not None:
|
||||||
self.cover_url = 'http://theatlantic.com'+cover['src']
|
self.cover_url = cover['src']
|
||||||
|
|
||||||
for item in soup.findAll('div', attrs={'class':'item'}):
|
feeds = []
|
||||||
a = item.find('a')
|
for section in soup.findAll('div', attrs={'class':'magazineSection'}):
|
||||||
if a and a.has_key('href'):
|
section_title = section.find(attrs={'class':'sectionHeader'})
|
||||||
|
section_title = string.capwords(self.tag_to_string(section_title))
|
||||||
|
self.log('Found section:', section_title)
|
||||||
|
articles = []
|
||||||
|
for post in section.findAll('div', attrs={'class':'post'}):
|
||||||
|
h = post.find(['h3', 'h4'])
|
||||||
|
title = self.tag_to_string(h)
|
||||||
|
a = post.find('a', href=True)
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if not url.startswith('http://'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.theatlantic.com/'+url
|
url = 'http://www.theatlantic.com'+url
|
||||||
url = url.replace('/doc/', '/doc/print/')
|
p = post.find('p', attrs={'class':'dek'})
|
||||||
title = self.tag_to_string(a)
|
desc = None
|
||||||
if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
|
self.log('\tFound article:', title, 'at', url)
|
||||||
continue
|
if p is not None:
|
||||||
title = title.replace('&', '&')
|
desc = self.tag_to_string(p)
|
||||||
byline = item.find(attrs={'class':'byline'})
|
self.log('\t\t', desc)
|
||||||
date = self.tag_to_string(byline) if byline else ''
|
articles.append({'title':title, 'url':url, 'description':desc,
|
||||||
description = ''
|
'date':''})
|
||||||
|
feeds.append((section_title, articles))
|
||||||
|
|
||||||
self.log('\tFound article:', title)
|
poems = []
|
||||||
self.log('\t\t', url)
|
self.log('Found section: Poems')
|
||||||
|
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
||||||
|
title = self.tag_to_string(poem.find('h4'))
|
||||||
|
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
||||||
|
url = 'http://www.theatlantic.com'+poem.find('a')['href']
|
||||||
|
self.log('\tFound article:', title, 'at', url)
|
||||||
|
self.log('\t\t', desc)
|
||||||
|
poems.append({'title':title, 'url':url, 'description':desc,
|
||||||
|
'date':''})
|
||||||
|
if poems:
|
||||||
|
feeds.append(('Poems', poems))
|
||||||
|
|
||||||
articles.append({
|
self.log('Found section: Advice')
|
||||||
'title':title,
|
div = soup.find(id='advice')
|
||||||
'date':date,
|
title = self.tag_to_string(div.find('h4'))
|
||||||
'url':url,
|
url = 'http://www.theatlantic.com'+div.find('a')['href']
|
||||||
'description':description
|
desc = self.tag_to_string(div.find('p'))
|
||||||
})
|
self.log('\tFound article:', title, 'at', url)
|
||||||
|
self.log('\t\t', desc)
|
||||||
|
|
||||||
|
feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
|
||||||
|
'date':''}]))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for table in soup.findAll('table', align='right'):
|
||||||
|
img = table.find('img')
|
||||||
|
if img is not None:
|
||||||
|
img.extract()
|
||||||
|
caption = self.tag_to_string(table).strip()
|
||||||
|
div = Tag(soup, 'div')
|
||||||
|
div['style'] = 'text-align:center'
|
||||||
|
div.insert(0, img)
|
||||||
|
div.insert(1, Tag(soup, 'br'))
|
||||||
|
if caption:
|
||||||
|
div.insert(2, NavigableString(caption))
|
||||||
|
table.replaceWith(div)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
return [('Current Issue', articles)]
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.azstarnet.com
|
azstarnet.com
|
||||||
'''
|
'''
|
||||||
|
import urllib
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Azstarnet(BasicNewsRecipe):
|
class Azstarnet(BasicNewsRecipe):
|
||||||
@ -14,12 +14,12 @@ class Azstarnet(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'azstarnet.com'
|
publisher = 'azstarnet.com'
|
||||||
category = 'news, politics, Arizona, USA'
|
category = 'news, politics, Arizona, USA'
|
||||||
delay = 1
|
|
||||||
oldest_article = 3
|
oldest_article = 3
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
masthead_url = 'http://azstarnet.com/content/tncms/live/global/resources/images/logo.gif'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -32,31 +32,27 @@ class Azstarnet(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open('http://azstarnet.com/')
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://azstarnet.com/registration/retro.php')
|
data = urllib.urlencode({ 'm':'login'
|
||||||
br.select_form(nr=1)
|
,'u':self.username
|
||||||
br['email'] = self.username
|
,'p':self.password
|
||||||
br['pass' ] = self.password
|
,'z':'http://azstarnet.com/'
|
||||||
br.submit()
|
})
|
||||||
|
br.open('http://azstarnet.com/app/registration/proxy.php',data)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','iframe','base','img'])]
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object','link','iframe','base','img'])
|
|
||||||
,dict(name='div',attrs={'class':'bannerinstory'})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')
|
(u'Local News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
|
||||||
,(u'Sports' , u'http://rss.azstarnet.com/index.php?site=sports')
|
,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc')
|
||||||
,(u'Business' , u'http://rss.azstarnet.com/index.php?site=biz-topheadlines')
|
,(u'World News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc')
|
||||||
,(u'Nation-World' , u'http://rss.azstarnet.com/index.php?site=news')
|
,(u'Sports' , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc')
|
||||||
,(u'Opinion' , u'http://rss.azstarnet.com/index.php?site=opinion')
|
,(u'Opinion' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc')
|
||||||
,(u'Lifestyle' , u'http://rss.azstarnet.com/index.php?site=accent')
|
,(u'Movies' , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc')
|
||||||
,(u'Food' , u'http://rss.azstarnet.com/index.php?site=food')
|
,(u'Food' , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -64,4 +60,6 @@ class Azstarnet(BasicNewsRecipe):
|
|||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?print=1'
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
b92.net
|
b92.net
|
||||||
'''
|
'''
|
||||||
@ -19,16 +18,15 @@ class B92(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} '
|
||||||
lang = 'sr-Latn-RS'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -50,20 +48,5 @@ class B92(BasicNewsRecipe):
|
|||||||
return url + '&version=print'
|
return url + '&version=print'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del soup.body['onload']
|
return self.adeify_images(soup)
|
||||||
for item in soup.findAll('font'):
|
|
||||||
item.name='div'
|
|
||||||
if item.has_key('size'):
|
|
||||||
del item['size']
|
|
||||||
attribs = [ 'style','font','valign'
|
|
||||||
,'colspan','width','height'
|
|
||||||
,'rowspan','summary','align'
|
|
||||||
,'cellspacing','cellpadding'
|
|
||||||
,'frames','rules','border'
|
|
||||||
]
|
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
|
||||||
item.name = 'div'
|
|
||||||
for attrib in attribs:
|
|
||||||
if item.has_key(attrib):
|
|
||||||
del item[attrib]
|
|
||||||
return soup
|
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
beta.rs
|
beta.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = 'BETA'
|
title = 'BETA'
|
||||||
@ -19,18 +17,14 @@ class Danas(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '
|
||||||
lang = 'sr-Latn-RS'
|
|
||||||
direction = 'ltr'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -43,9 +37,4 @@ class Danas(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
soup.head.insert(1,mcharset)
|
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -14,14 +14,13 @@ class Blic(BasicNewsRecipe):
|
|||||||
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||||
publisher = 'RINGIER d.o.o.'
|
publisher = 'RINGIER d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
delay = 1
|
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} '
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -31,13 +30,15 @@ class Blic(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'id':'article_info'})
|
remove_tags_before = dict(name='div', attrs={'id':'article_info'})
|
||||||
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
feeds = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]
|
feeds = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link'])]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '/print'
|
return url + '/print'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
36
resources/recipes/cetnixploitation.recipe
Normal file
36
resources/recipes/cetnixploitation.recipe
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
chetnixploitation.blogspot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Chetnixploitation(BasicNewsRecipe):
|
||||||
|
title = 'Chetnixploitation'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Filmski blog'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'film, blog, cetnici, srbija, ex-yu'
|
||||||
|
, 'publisher': 'Son of Man'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
@ -20,7 +20,7 @@ class Danas(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'http://www.danas.rs/images/basic/danas.gif'
|
masthead_url = 'http://www.danas.rs/images/basic/danas.gif'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -38,7 +38,7 @@ class Danas(BasicNewsRecipe):
|
|||||||
,dict(name=['object','link','iframe'])
|
,dict(name=['object','link','iframe'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
|
(u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
|
||||||
,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
|
,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
|
||||||
,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24')
|
,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24')
|
||||||
@ -60,4 +60,4 @@ class Danas(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '&action=print'
|
return url + '&action=print'
|
||||||
|
|
||||||
|
50
resources/recipes/diariovasco.recipe
Normal file
50
resources/recipes/diariovasco.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.diariovasco.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DiarioVasco(BasicNewsRecipe):
|
||||||
|
title = 'Diario Vasco'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Noticias de pais Vasco y el resto del mundo'
|
||||||
|
publisher = 'Diario Vasco'
|
||||||
|
category = 'news, politics, Spain'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(attrs={'id':'title'})
|
||||||
|
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
|
||||||
|
]
|
||||||
|
remove_tags = [dict(name='ul')]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml' )
|
||||||
|
,(u'Portada' , u'http://www.diariovasco.com/portada.xml' )
|
||||||
|
,(u'Politica' , u'http://www.diariovasco.com/rss/feeds/politica.xml' )
|
||||||
|
,(u'Deportes' , u'http://www.diariovasco.com/rss/feeds/deportes.xml' )
|
||||||
|
,(u'Economia' , u'http://www.diariovasco.com/rss/feeds/economia.xml' )
|
||||||
|
,(u'Mundo' , u'http://www.diariovasco.com/rss/feeds/mundo.xml' )
|
||||||
|
,(u'Cultura' , u'http://www.diariovasco.com/rss/feeds/cultura.xml' )
|
||||||
|
,(u'Gente' , u'http://www.diariovasco.com/rss/feeds/gente.xml' )
|
||||||
|
,(u'Contraportada' , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
|
||||||
|
]
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
e-novine.com
|
e-novine.com
|
||||||
@ -9,7 +7,6 @@ e-novine.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class E_novine(BasicNewsRecipe):
|
class E_novine(BasicNewsRecipe):
|
||||||
title = 'E-Novine'
|
title = 'E-Novine'
|
||||||
@ -20,40 +17,38 @@ class E_novine(BasicNewsRecipe):
|
|||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1250'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
masthead_url = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
|
||||||
lang = 'sr'
|
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} '
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'article_head'})
|
||||||
|
,dict(name='div', attrs={'id':'article_body'})
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
remove_tags = [
|
||||||
|
dict(name=['object','link','embed','iframe'])
|
||||||
|
,dict(attrs={'id':'box_article_tools'})
|
||||||
|
]
|
||||||
|
remove_attributes = ['height','width','lang']
|
||||||
|
|
||||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
return self.adeify_images(soup)
|
||||||
if ftag:
|
|
||||||
it = ftag.div
|
def print_version(self, url):
|
||||||
it.extract()
|
return url + '?print'
|
||||||
ftag.div.extract()
|
|
||||||
ftag.insert(0,it)
|
|
||||||
return soup
|
|
||||||
|
@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
|
|||||||
ns = NavigableString(self.tag_to_string(caption))
|
ns = NavigableString(self.tag_to_string(caption))
|
||||||
div.insert(0, ns)
|
div.insert(0, ns)
|
||||||
div.insert(1, Tag(soup, 'br'))
|
div.insert(1, Tag(soup, 'br'))
|
||||||
|
del img['width']
|
||||||
|
del img['height']
|
||||||
img.extract()
|
img.extract()
|
||||||
div.insert(2, img)
|
div.insert(2, img)
|
||||||
table.replaceWith(div)
|
table.replaceWith(div)
|
||||||
|
@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
|
|||||||
div.insert(0, ns)
|
div.insert(0, ns)
|
||||||
div.insert(1, Tag(soup, 'br'))
|
div.insert(1, Tag(soup, 'br'))
|
||||||
img.extract()
|
img.extract()
|
||||||
|
del img['width']
|
||||||
|
del img['height']
|
||||||
div.insert(2, img)
|
div.insert(2, img)
|
||||||
table.replaceWith(div)
|
table.replaceWith(div)
|
||||||
return soup
|
return soup
|
||||||
|
58
resources/recipes/epicurious.recipe
Normal file
58
resources/recipes/epicurious.recipe
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Starson17'
|
||||||
|
'''
|
||||||
|
www.epicurious.com
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Epicurious(BasicNewsRecipe):
|
||||||
|
title = u'Epicurious'
|
||||||
|
__author__ = 'Starson17'
|
||||||
|
description = 'Food and Recipes from Epicurious'
|
||||||
|
cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
|
||||||
|
publisher = 'Epicurious'
|
||||||
|
tags = 'news, food, gourmet, recipes'
|
||||||
|
language = 'en'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
recursions = 3
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
|
||||||
|
dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
|
||||||
|
{'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
|
||||||
|
dict(name='div', attrs={'class':['tagged','comments']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
|
||||||
|
(u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
|
||||||
|
(u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
|
||||||
|
(u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
|
||||||
|
]
|
||||||
|
|
||||||
|
match_regexps = [
|
||||||
|
r'http://www.epicurious.com/.*recipes/.*/views'
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
|
||||||
|
(re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
|
||||||
|
(re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
|
||||||
|
]
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
for t in soup.findAll(['table', 'tr', 'td']):
|
||||||
|
t.name = 'div'
|
||||||
|
return soup
|
||||||
|
|
@ -1,27 +1,41 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010 Starson17'
|
||||||
'''
|
'''
|
||||||
fudzilla.com
|
fudzilla.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Fudzilla(BasicNewsRecipe):
|
class Fudzilla(BasicNewsRecipe):
|
||||||
title = u'Fudzilla'
|
title = u'Fudzilla'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Starson17'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
description = 'Tech news'
|
description = 'Tech news'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
remove_javascript = True
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
feeds = [ (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
remove_tags_before = dict(name='div', attrs={'class':['padding']})
|
||||||
nurl = url.replace('http://www.fudzilla.com/index.php','http://www.fudzilla.com/index2.php')
|
|
||||||
nmain, nsep, nrest = nurl.partition('&Itemid=')
|
remove_tags = [dict(name='td', attrs={'class':['left','right']}),
|
||||||
return nmain + '&pop=1&page=0&Itemid=1'
|
dict(name='div', attrs={'id':['toolbar','buttons']}),
|
||||||
|
dict(name='div', attrs={'class':['artbannersxtd','back_button']}),
|
||||||
|
dict(name='span', attrs={'class':['pathway']}),
|
||||||
|
dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}),
|
||||||
|
dict(name='table', attrs={'class':['headlines']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p class="MsoNormal"> Welcome.*</p> ', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||||
|
]
|
||||||
|
56
resources/recipes/gamasutra_fa.recipe
Normal file
56
resources/recipes/gamasutra_fa.recipe
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
gamasutra.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gamasutra(BasicNewsRecipe):
|
||||||
|
title = 'Gamasutra Featured articles'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'The Art and Business of Making Games'
|
||||||
|
publisher = 'Gamasutra'
|
||||||
|
category = 'news, games, IT'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
|
||||||
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .title{font-size: x-large; font-weight: bold} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
|
||||||
|
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||||
|
,(re.compile(r'</head>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','embed','iframe'])
|
||||||
|
,dict(attrs={'class':'adBox'})
|
||||||
|
]
|
||||||
|
remove_tags_before = dict(attrs={'class':'title'})
|
||||||
|
remove_attributes = ['width','height','name']
|
||||||
|
|
||||||
|
feeds = [(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?print=1'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
45
resources/recipes/gamasutra_news.recipe
Normal file
45
resources/recipes/gamasutra_news.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
gamasutra.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gamasutra(BasicNewsRecipe):
|
||||||
|
title = 'Gamasutra News'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'The Art and Business of Making Games'
|
||||||
|
publisher = 'Gamasutra'
|
||||||
|
category = 'news, games, IT'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
|
||||||
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .newsTitle{font-size: xx-large; font-weight: bold} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['relatedNews','adBox']})]
|
||||||
|
keep_only_tags = [dict(attrs={'class':['newsTitle','newsAuth','newsDate','newsText']})]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
glassrpske.com
|
glassrpske.com
|
||||||
@ -9,7 +8,6 @@ glassrpske.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class GlasSrpske(BasicNewsRecipe):
|
class GlasSrpske(BasicNewsRecipe):
|
||||||
title = 'Glas Srpske'
|
title = 'Glas Srpske'
|
||||||
@ -22,20 +20,16 @@ class GlasSrpske(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
|
masthead_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
|
||||||
lang = 'sr-BA'
|
language = 'sr'
|
||||||
language = 'sr'
|
|
||||||
|
|
||||||
INDEX = 'http://www.glassrpske.com'
|
INDEX = 'http://www.glassrpske.com'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} '
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -63,11 +57,7 @@ class GlasSrpske(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
return self.adeify_images(soup)
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
totalfeeds = []
|
totalfeeds = []
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.glas-javnosti.rs
|
www.glas-javnosti.rs
|
||||||
'''
|
'''
|
||||||
@ -18,18 +17,14 @@ class GlasJavnosti(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '
|
||||||
lang = 'sr-Latn-RS'
|
|
||||||
direction = 'ltr'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,17 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
class heiseDe(BasicNewsRecipe):
|
class heiseDe(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'heise'
|
title = 'heise'
|
||||||
description = 'Computernews from Germany'
|
description = 'Computernews from Germany'
|
||||||
__author__ = 'Oliver Niesner'
|
__author__ = 'Oliver Niesner'
|
||||||
language = 'de'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags = [dict(id='navi_top'),
|
remove_tags = [dict(id='navi_top'),
|
||||||
dict(id='navi_bottom'),
|
dict(id='navi_bottom'),
|
||||||
dict(id='logo'),
|
dict(id='logo'),
|
||||||
@ -35,12 +33,10 @@ class heiseDe(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
|
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
|
||||||
dict(name='p', attrs={'class':'news_option'}),
|
dict(name='p', attrs={'class':'news_option'}),
|
||||||
dict(name='p', attrs={'class':'news_navi'}),
|
dict(name='p', attrs={'class':'news_navi'}),
|
||||||
dict(name='p', attrs={'class':'news_foren'})]
|
dict(name='div', attrs={'class':'news_foren'})]
|
||||||
remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]
|
remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})]
|
||||||
|
|
||||||
feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]
|
feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,17 +1,41 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class HoustonChronicle(BasicNewsRecipe):
|
class HoustonChronicle(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'The Houston Chronicle'
|
title = u'The Houston Chronicle'
|
||||||
description = 'News from Houston, Texas'
|
description = 'News from Houston, Texas'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['story-head', 'story'])]
|
keep_only_tags = [
|
||||||
remove_tags = [dict(id=['share-module', 'resource-box',
|
dict(id=['story-head', 'story'])
|
||||||
'resource-box-header'])]
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(id=['share-module', 'resource-box',
|
||||||
|
'resource-box-header'])
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||||
|
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||||
|
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||||
|
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||||
|
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||||
|
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
|
||||||
|
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
|
||||||
|
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
|
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
|
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||||
|
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
|
||||||
|
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
|
||||||
|
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.chron.com/news/')
|
soup = self.index_to_soup('http://www.chron.com/news/')
|
||||||
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
|
|||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import re
|
|||||||
|
|
||||||
class HuffingtonPostRecipe(BasicNewsRecipe):
|
class HuffingtonPostRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal and Archana Raman'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
version = 2
|
version = 2
|
||||||
|
|
||||||
@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
oldest_article = 1.1
|
oldest_article = 1.1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = True
|
#use_embedded_content = True
|
||||||
|
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
# Feeds from: http://www.huffingtonpost.com/syndication/
|
# Feeds from: http://www.huffingtonpost.com/syndication/
|
||||||
feeds = []
|
feeds = []
|
||||||
feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
|
feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
|
||||||
|
|
||||||
#feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
|
feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
|
||||||
feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
|
#feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
|
||||||
feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
|
#feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
|
feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
|
||||||
feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
|
#feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
|
||||||
feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
|
#feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
|
feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
|
||||||
feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
#feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
||||||
feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
|
#feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
|
feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
|
||||||
feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
#feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
||||||
feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
|
#feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
|
feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
|
||||||
feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
|
#feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
|
||||||
feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
|
#feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
|
feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
|
||||||
feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
|
#feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
|
||||||
feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
|
#feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
|
feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
|
||||||
feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
|
#feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
|
||||||
feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
|
#feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
|
feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
|
||||||
feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
|
#feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
|
||||||
feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
|
#feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
|
feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
|
||||||
feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
|
#feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
|
||||||
feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
|
#feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
|
||||||
|
|
||||||
#feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
|
feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
|
||||||
feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
|
#feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
|
||||||
feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
|
#feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
|
||||||
|
|
||||||
feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
|
feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
|
||||||
feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
|
#feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
|
||||||
|
|
||||||
|
|
||||||
remove_tags = []
|
remove_tags = []
|
||||||
remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
|
remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
|
||||||
remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
|
remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
|
||||||
|
remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
|
||||||
|
remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
|
||||||
|
remove_tags.append(dict(name='img', alt="Connect"))
|
||||||
|
remove_tags.append(dict(name='div', attrs={'class' : ['logo']})) #'share_boxes_box_block_b_wraper',
|
||||||
|
remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
|
||||||
|
remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
|
||||||
|
remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
|
||||||
|
# remove_attributes = ['style']
|
||||||
|
|
||||||
remove_attributes = ['style']
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
|
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||||
|
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||||
|
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||||
h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
|
#title_permalink{color:black;font-size:large;}
|
||||||
a[href]{color: blue; text-decoration: none; cursor: pointer;}
|
.date{color:#858585;font-family:"Times New Roman",sans-serif;}
|
||||||
|
.comments_datetime v05{color:#696969;}
|
||||||
|
.teaser_permalink{font-style:italic;font-size:xx-small;}
|
||||||
|
.blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
|
||||||
'''
|
'''
|
||||||
|
#a[href]{color: blue; text-decoration: none; cursor: pointer;}
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
"""
|
"""
|
||||||
Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
|
Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
|
||||||
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
|
|||||||
Todo: refactor to searching this list to avoid the hardcoded zero-index
|
Todo: refactor to searching this list to avoid the hardcoded zero-index
|
||||||
"""
|
"""
|
||||||
link = article.get('link')
|
link = article.get('link')
|
||||||
|
print("Link:"+link)
|
||||||
if not link:
|
if not link:
|
||||||
links = article.get('links')
|
links = article.get('links')
|
||||||
if links:
|
if links:
|
||||||
link = links[0]['href']
|
link = links[0]['href']
|
||||||
|
if not links[0]['href']:
|
||||||
|
link = links[1]['href']
|
||||||
|
|
||||||
return link
|
return link
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
for tag in soup.findAll('div',text = "What's Your Reaction?"):
|
||||||
|
tag.extract()
|
||||||
|
|
||||||
|
for tg in soup.findAll('blockquote'):
|
||||||
|
tg.extract()
|
||||||
|
|
||||||
|
return soup
|
||||||
|
87
resources/recipes/johm.recipe
Normal file
87
resources/recipes/johm.recipe
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class JournalofHospitalMedicine(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Journal of Hospital Medicine'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
description = 'Medical news'
|
||||||
|
timefmt = ' [%d %b, %Y]'
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
#remove_tags_before = dict(name='div', attrs={'align':'center'})
|
||||||
|
#remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':'subContent'}),
|
||||||
|
dict(name='div', attrs={'id':['contentFrame']}),
|
||||||
|
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
|
||||||
|
#dict(name='table', attrs={'align':'RIGHT'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# TO LOGIN
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open('http://www3.interscience.wiley.com/cgi-bin/home')
|
||||||
|
br.select_form(name='siteLogin')
|
||||||
|
br['LoginName'] = self.username
|
||||||
|
br['Password'] = self.password
|
||||||
|
response = br.submit()
|
||||||
|
raw = response.read()
|
||||||
|
if 'userName = ""' in raw:
|
||||||
|
raise Exception('Login failed. Check your username and password')
|
||||||
|
return br
|
||||||
|
|
||||||
|
#TO GET ARTICLE TOC
|
||||||
|
def johm_get_index(self):
|
||||||
|
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
||||||
|
|
||||||
|
# To parse artice toc
|
||||||
|
def parse_index(self):
|
||||||
|
parse_soup = self.johm_get_index()
|
||||||
|
|
||||||
|
div = parse_soup.find(id='contentCell')
|
||||||
|
|
||||||
|
current_section = None
|
||||||
|
current_articles = []
|
||||||
|
feeds = []
|
||||||
|
for x in div.findAll(True):
|
||||||
|
if x.name == 'h4':
|
||||||
|
# Section heading found
|
||||||
|
if current_articles and current_section:
|
||||||
|
feeds.append((current_section, current_articles))
|
||||||
|
current_section = self.tag_to_string(x)
|
||||||
|
current_articles = []
|
||||||
|
self.log('\tFound section:', current_section)
|
||||||
|
if current_section is not None and x.name == 'strong':
|
||||||
|
title = self.tag_to_string(x)
|
||||||
|
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
||||||
|
if p is None:
|
||||||
|
continue
|
||||||
|
url = p.get('href', False)
|
||||||
|
if not url or not title:
|
||||||
|
continue
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www3.interscience.wiley.com'+url
|
||||||
|
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
||||||
|
self.log('\t\tFound article:', title)
|
||||||
|
self.log('\t\t\t', url)
|
||||||
|
#if url.startswith('/'):
|
||||||
|
#url = 'http://online.wsj.com'+url
|
||||||
|
current_articles.append({'title': title, 'url':url,
|
||||||
|
'description':'', 'date':''})
|
||||||
|
|
||||||
|
if current_articles and current_section:
|
||||||
|
feeds.append((current_section, current_articles))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for img in soup.findAll('img', src=True):
|
||||||
|
img['src'] = img['src'].replace('tfig', 'nfig')
|
||||||
|
return soup
|
||||||
|
|
37
resources/recipes/kukuburi.recipe
Normal file
37
resources/recipes/kukuburi.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Mori'
|
||||||
|
__version__ = 'v. 0.1'
|
||||||
|
'''
|
||||||
|
Kukuburi.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class KukuburiRecipe(BasicNewsRecipe):
|
||||||
|
__author__ = 'Mori'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
title = u'Kukuburi'
|
||||||
|
publisher = u'Ramón Pérez'
|
||||||
|
description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
oldest_article = 100
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
(r'<!--.*?-->', lambda match: ''),
|
||||||
|
(r'<div class="feedflare".*?</div>', lambda match: '')
|
||||||
|
]
|
||||||
|
]
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
lasegunda.com
|
lasegunda.com
|
||||||
'''
|
'''
|
||||||
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||||
remove_javascript = True
|
remove_empty_feeds = True
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
html2lrf_options = [
|
'comment' : description
|
||||||
'--comment', description
|
, 'tags' : category
|
||||||
, '--category', category
|
, 'publisher' : publisher
|
||||||
, '--publisher', publisher
|
, 'language' : language
|
||||||
, '--ignore-tables'
|
, 'linearize_tables' : True
|
||||||
]
|
}
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table')]
|
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
|
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
|
||||||
,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
||||||
,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
||||||
,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
|
,(u'Internacional' , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
|
||||||
,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
|
,(u'Deportes' , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
|
||||||
,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
|
,(u'Epectaculos/Cultura' , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
|
||||||
,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
|
,(u'Educacion' , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
|
||||||
,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
|
,(u'Ciencia y Tecnologia' , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
|
||||||
,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
|
,(u'Solidaridad' , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
|
||||||
,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
|
,(u'Buena Vida' , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||||
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
latercera.com
|
latercera.com
|
||||||
'''
|
'''
|
||||||
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
html2lrf_options = [
|
language = 'es'
|
||||||
'--comment', description
|
|
||||||
, '--category', category
|
conversion_options = {
|
||||||
, '--publisher', publisher
|
'comment' : description
|
||||||
]
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='script')
|
dict(name=['ul','input','base'])
|
||||||
,dict(name='ul')
|
|
||||||
,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
|
,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
|
||||||
,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
|
,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
|
||||||
,dict(name='input')
|
|
||||||
,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
|
,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
(u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
||||||
,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
|
,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
|
||||||
|
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
|
||||||
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
|
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
|
||||||
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
||||||
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
|
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
|
||||||
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es'
|
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
newscientist.com
|
newscientist.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class NewScientist(BasicNewsRecipe):
|
class NewScientist(BasicNewsRecipe):
|
||||||
@ -15,12 +15,14 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'New Scientist'
|
publisher = 'New Scientist'
|
||||||
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
|
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
|
||||||
delay = 3
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
cover_url = 'http://www.newscientist.com/currentcover.jpg'
|
||||||
|
masthead_url = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
extra_css = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -28,14 +30,18 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
||||||
,dict(name='div', attrs={'id' :['compnl','artIssueInfo','artTools']})
|
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools']})
|
||||||
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
||||||
|
,dict(name='meta' , attrs={'name' :'description' })
|
||||||
]
|
]
|
||||||
|
remove_tags_after = dict(attrs={'class':'nbpcopy'})
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
||||||
@ -50,9 +56,15 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = article.get('guid', None)
|
return article.get('guid', None)
|
||||||
return url
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?full=true&print=true'
|
return url + '?full=true&print=true'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for tg in soup.findAll('a'):
|
||||||
|
if tg.string == 'Home':
|
||||||
|
tg.parent.extract()
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
nspm.rs
|
nspm.rs
|
||||||
'''
|
'''
|
||||||
@ -21,17 +19,16 @@ class Nspm(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
|
||||||
lang = 'sr-Latn-RS'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -39,6 +36,8 @@ class Nspm(BasicNewsRecipe):
|
|||||||
dict(name=['link','object','embed'])
|
dict(name=['link','object','embed'])
|
||||||
,dict(name='td', attrs={'class':'buttonheading'})
|
,dict(name='td', attrs={'class':'buttonheading'})
|
||||||
]
|
]
|
||||||
|
remove_tags_after = dict(attrs={'class':'article_separator'})
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -51,17 +50,6 @@ class Nspm(BasicNewsRecipe):
|
|||||||
return url.replace('.html','/stampa.html')
|
return url.replace('.html','/stampa.html')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
for item in soup.body.findAll(style=True):
|
||||||
soup.html['lang'] = self.lang
|
del item['style']
|
||||||
attribs = [ 'style','font','valign'
|
|
||||||
,'colspan','width','height'
|
|
||||||
,'rowspan','summary','align'
|
|
||||||
,'cellspacing','cellpadding'
|
|
||||||
,'frames','rules','border'
|
|
||||||
]
|
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
|
||||||
item.name = 'div'
|
|
||||||
for attrib in attribs:
|
|
||||||
if item.has_key(attrib):
|
|
||||||
del item[attrib]
|
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -7,10 +7,11 @@ sfgate.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class SanFranciscoChronicle(BasicNewsRecipe):
|
class SanFranciscoChronicle(BasicNewsRecipe):
|
||||||
title = u'San Francisco Chronicle'
|
title = u'San Francisco Chronicle'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic and Sujata Raman'
|
||||||
description = u'San Francisco news'
|
description = u'San Francisco news'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
remove_tags_before = {'class':'articleheadings'}
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id':'articlecontent' })
|
|
||||||
remove_tags = [
|
remove_tags_before = {'id':'printheader'}
|
||||||
dict(name='div', attrs={'class':'tools tools_top'})
|
|
||||||
,dict(name='div', attrs={'id':'articlebox' })
|
remove_tags = [
|
||||||
]
|
dict(name='div',attrs={'id':'printheader'})
|
||||||
|
,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
|
||||||
|
,dict(name='div',attrs={'id':'footer'})
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||||
|
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||||
|
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||||
|
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||||
|
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||||
|
.byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
|
.date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
|
.dtlcomment{font-style:italic;}
|
||||||
|
.georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
|
||||||
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
|
(u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def print_version(self,url):
|
||||||
|
url= url +"&type=printable"
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
print str(article['title_detail']['value'])
|
||||||
|
url = article.get('guid',None)
|
||||||
|
url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
|
||||||
|
if "Presented By:" in str(article['title_detail']['value']):
|
||||||
|
url = ''
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
42
resources/recipes/sfbg.recipe
Normal file
42
resources/recipes/sfbg.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class SanFranciscoBayGuardian(BasicNewsRecipe):
|
||||||
|
title = u'San Francisco Bay Guardian'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
#encoding = 'latin1'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'story_header'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'shirttail'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
#dict(name='div', attrs={'class':'related-articles'}),
|
||||||
|
dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
|
||||||
|
#dict(name='ul', attrs={'class':'article-tools'}),
|
||||||
|
dict(name='ul', attrs={'id':'story_tabs'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Cover', 'http://www.newsobserver.com/100/index.rss'),
|
||||||
|
('News', 'http://www.newsobserver.com/102/index.rss'),
|
||||||
|
('Politics', 'http://www.newsobserver.com/105/index.rss'),
|
||||||
|
('Business', 'http://www.newsobserver.com/104/index.rss'),
|
||||||
|
('Sports', 'http://www.newsobserver.com/103/index.rss'),
|
||||||
|
('College Sports', 'http://www.newsobserver.com/119/index.rss'),
|
||||||
|
('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
|
||||||
|
('Editorials', 'http://www.newsobserver.com/158/index.rss')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
story = soup.find(name='div', attrs={'id':'story_body'})
|
||||||
|
#td = heading.findParent(name='td')
|
||||||
|
#td.extract()
|
||||||
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
|
body = soup.find(name='body')
|
||||||
|
body.insert(0, story)
|
||||||
|
return soup
|
52
resources/recipes/smith.recipe
Normal file
52
resources/recipes/smith.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class SmithsonianMagazine(BasicNewsRecipe):
|
||||||
|
title = u'Smithsonian Magazine'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 31#days
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
#encoding = 'latin1'
|
||||||
|
recursions = 1
|
||||||
|
match_regexps = ['&page=[2-9]$']
|
||||||
|
|
||||||
|
remove_stylesheets = True
|
||||||
|
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||||
|
remove_tags_after = dict(name='p', attrs={'id':'articlePaginationWrapper'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':'article_sidebar_border'}),
|
||||||
|
dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}),
|
||||||
|
#dict(name='ul', attrs={'class':'article-tools'}),
|
||||||
|
dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('History and Archeology',
|
||||||
|
'http://feeds.feedburner.com/smithsonianmag/history-archaeology'),
|
||||||
|
('People and Places',
|
||||||
|
'http://feeds.feedburner.com/smithsonianmag/people-places'),
|
||||||
|
('Science and Nature',
|
||||||
|
'http://feeds.feedburner.com/smithsonianmag/science-nature'),
|
||||||
|
('Arts and Culture',
|
||||||
|
'http://feeds.feedburner.com/smithsonianmag/arts-culture'),
|
||||||
|
('Travel',
|
||||||
|
'http://feeds.feedburner.com/smithsonianmag/travel'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
story = soup.find(name='div', attrs={'id':'article-left'})
|
||||||
|
#td = heading.findParent(name='td')
|
||||||
|
#td.extract()
|
||||||
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
|
body = soup.find(name='body')
|
||||||
|
body.insert(0, story)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
|
||||||
|
if not first:
|
||||||
|
for div in soup.findAll(id='article-head'): div.extract()
|
||||||
|
return soup
|
@ -9,16 +9,35 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
|
|||||||
title = u'Strategy+Business'
|
title = u'Strategy+Business'
|
||||||
publisher = u' Booz & Company'
|
publisher = u' Booz & Company'
|
||||||
category = u'Business'
|
category = u'Business'
|
||||||
description = u'Business magazine for senior business executives and the people who influence them.'
|
description = (u'Business magazine for senior business executives and the people who influence them.'
|
||||||
|
'Go to http://www.strategy-business.com/registration to sign up for a free account')
|
||||||
|
|
||||||
oldest_article = 13 * 7 # 3 months
|
oldest_article = 13 * 7 # 3 months
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
br.open('http://www.strategy-business.com/registration')
|
||||||
|
for i, f in enumerate(br.forms()):
|
||||||
|
if 'gatekeeper_edit' in f.name:
|
||||||
|
br.select_form(name=f.name)
|
||||||
|
for c in f.controls:
|
||||||
|
if c.name.endswith('_email'):
|
||||||
|
br[c.name] = self.username
|
||||||
|
elif c.name.endswith('_password'):
|
||||||
|
br[c.name] = self.password
|
||||||
|
raw = br.submit().read()
|
||||||
|
if '>Logout' not in raw:
|
||||||
|
raise ValueError('Failed to login, check your username and password')
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||||
a {text-decoration: none; color: blue;}
|
a {text-decoration: none; color: blue;}
|
||||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class al(BasicNewsRecipe):
|
class al(BasicNewsRecipe):
|
||||||
author = 'Lorenzo Vigentini'
|
author = 'Lorenzo Vigentini'
|
||||||
description = 'the Escapist Magazine'
|
description = 'The Escapist Magazine'
|
||||||
|
|
||||||
cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
|
cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
|
||||||
title = u'the Escapist Magazine'
|
title = u'the Escapist Magazine'
|
||||||
|
39
resources/recipes/thecultofghoul.recipe
Normal file
39
resources/recipes/thecultofghoul.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
cultofghoul.blogspot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheCultOfGhoul(BasicNewsRecipe):
|
||||||
|
title = 'The Cult of Ghoul'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Filmski blog'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'film, blog, srbija, strava, uzas'
|
||||||
|
, 'publisher': 'Dejan Ognjanovic'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://cultofghoul.blogspot.com/feeds/posts/default')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
br.select_form(nr=0)
|
br.select_form(nr=0)
|
||||||
br['user'] = self.username
|
br['user'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
res = br.submit()
|
||||||
|
raw = res.read()
|
||||||
|
if 'Welcome,' not in raw:
|
||||||
|
raise ValueError('Failed to log in to wsj.com, check your '
|
||||||
|
'username and password')
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
soup = self.wsj_get_index()
|
soup = self.wsj_get_index()
|
||||||
|
|
||||||
year = strftime('%Y')
|
year = strftime('%Y')
|
||||||
for x in soup.findAll('td', attrs={'class':'b14'}):
|
for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
|
||||||
txt = self.tag_to_string(x).strip()
|
txt = self.tag_to_string(x).strip()
|
||||||
|
txt = txt.replace(u'\xa0', ' ')
|
||||||
|
txt = txt.encode('ascii', 'ignore')
|
||||||
if year in txt:
|
if year in txt:
|
||||||
self.timefmt = ' [%s]'%txt
|
self.timefmt = ' [%s]'%txt
|
||||||
break
|
break
|
||||||
|
@ -11,7 +11,8 @@ import sys, re, os, platform
|
|||||||
is64bit = platform.architecture()[0] == '64bit'
|
is64bit = platform.architecture()[0] == '64bit'
|
||||||
iswindows = re.search('win(32|64)', sys.platform)
|
iswindows = re.search('win(32|64)', sys.platform)
|
||||||
isosx = 'darwin' in sys.platform
|
isosx = 'darwin' in sys.platform
|
||||||
islinux = not isosx and not iswindows
|
isfreebsd = 'freebsd' in sys.platform
|
||||||
|
islinux = not isosx and not iswindows and not isfreebsd
|
||||||
SRC = os.path.abspath('src')
|
SRC = os.path.abspath('src')
|
||||||
sys.path.insert(0, SRC)
|
sys.path.insert(0, SRC)
|
||||||
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
|
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
|
||||||
@ -117,7 +118,7 @@ class Command(object):
|
|||||||
self.real_user = os.environ.get('SUDO_USER', None)
|
self.real_user = os.environ.get('SUDO_USER', None)
|
||||||
|
|
||||||
def drop_privileges(self):
|
def drop_privileges(self):
|
||||||
if not islinux or isosx:
|
if not islinux or isosx or isfreebsd:
|
||||||
return
|
return
|
||||||
if self.real_user is not None:
|
if self.real_user is not None:
|
||||||
self.info('Dropping privileges to those of', self.real_user+':',
|
self.info('Dropping privileges to those of', self.real_user+':',
|
||||||
@ -128,7 +129,7 @@ class Command(object):
|
|||||||
os.seteuid(int(self.real_uid))
|
os.seteuid(int(self.real_uid))
|
||||||
|
|
||||||
def regain_privileges(self):
|
def regain_privileges(self):
|
||||||
if not islinux or isosx:
|
if not islinux or isosx or isfreebsd:
|
||||||
return
|
return
|
||||||
if os.geteuid() != 0 and self.orig_euid == 0:
|
if os.geteuid() != 0 and self.orig_euid == 0:
|
||||||
self.info('Trying to get root privileges')
|
self.info('Trying to get root privileges')
|
||||||
|
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
|
|||||||
fc_lib = '/usr/lib'
|
fc_lib = '/usr/lib'
|
||||||
podofo_inc = '/usr/include/podofo'
|
podofo_inc = '/usr/include/podofo'
|
||||||
podofo_lib = '/usr/lib'
|
podofo_lib = '/usr/lib'
|
||||||
|
chmlib_inc_dirs = chmlib_lib_dirs = []
|
||||||
|
|
||||||
if iswindows:
|
if iswindows:
|
||||||
prefix = r'C:\cygwin\home\kovid\sw'
|
prefix = r'C:\cygwin\home\kovid\sw'
|
||||||
@ -96,6 +97,10 @@ if iswindows:
|
|||||||
sw_lib_dir = os.path.join(prefix, 'lib')
|
sw_lib_dir = os.path.join(prefix, 'lib')
|
||||||
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
||||||
fc_lib = sw_lib_dir
|
fc_lib = sw_lib_dir
|
||||||
|
chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
|
||||||
|
'build', 'chmlib-0.40', 'src'))
|
||||||
|
chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
|
||||||
|
'build', 'chmlib-0.40', 'src', 'Release'))
|
||||||
png_inc_dirs = [sw_inc_dir]
|
png_inc_dirs = [sw_inc_dir]
|
||||||
png_lib_dirs = [sw_lib_dir]
|
png_lib_dirs = [sw_lib_dir]
|
||||||
png_libs = ['png12']
|
png_libs = ['png12']
|
||||||
|
@ -11,15 +11,16 @@ from distutils import sysconfig
|
|||||||
|
|
||||||
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
||||||
|
|
||||||
from setup import Command, islinux, isosx, SRC, iswindows
|
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
|
||||||
from setup.build_environment import fc_inc, fc_lib, \
|
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
|
||||||
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
||||||
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
||||||
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
||||||
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
||||||
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
|
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
|
||||||
|
jpg_lib_dirs, chmlib_lib_dirs
|
||||||
MT
|
MT
|
||||||
isunix = islinux or isosx
|
isunix = islinux or isosx or isfreebsd
|
||||||
|
|
||||||
make = 'make' if isunix else NMAKE
|
make = 'make' if isunix else NMAKE
|
||||||
|
|
||||||
@ -56,6 +57,22 @@ if iswindows:
|
|||||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
|
|
||||||
|
Extension('chmlib',
|
||||||
|
['calibre/utils/chm/swig_chm.c'],
|
||||||
|
libraries=['ChmLib' if iswindows else 'chm'],
|
||||||
|
inc_dirs=chmlib_inc_dirs,
|
||||||
|
lib_dirs=chmlib_lib_dirs,
|
||||||
|
cflags=["-DSWIG_COBJECT_TYPES"]),
|
||||||
|
|
||||||
|
Extension('chm_extra',
|
||||||
|
['calibre/utils/chm/extra.c'],
|
||||||
|
libraries=['ChmLib' if iswindows else 'chm'],
|
||||||
|
inc_dirs=chmlib_inc_dirs,
|
||||||
|
lib_dirs=chmlib_lib_dirs,
|
||||||
|
cflags=["-D__PYTHON__"]),
|
||||||
|
|
||||||
|
|
||||||
Extension('pdfreflow',
|
Extension('pdfreflow',
|
||||||
reflow_sources,
|
reflow_sources,
|
||||||
headers=reflow_headers,
|
headers=reflow_headers,
|
||||||
@ -126,7 +143,7 @@ extensions = [
|
|||||||
if iswindows:
|
if iswindows:
|
||||||
extensions.append(Extension('winutil',
|
extensions.append(Extension('winutil',
|
||||||
['calibre/utils/windows/winutil.c'],
|
['calibre/utils/windows/winutil.c'],
|
||||||
libraries=['shell32', 'setupapi'],
|
libraries=['shell32', 'setupapi', 'wininet'],
|
||||||
cflags=['/X']
|
cflags=['/X']
|
||||||
))
|
))
|
||||||
|
|
||||||
@ -154,6 +171,13 @@ if islinux:
|
|||||||
ldflags.append('-lpython'+sysconfig.get_python_version())
|
ldflags.append('-lpython'+sysconfig.get_python_version())
|
||||||
|
|
||||||
|
|
||||||
|
if isfreebsd:
|
||||||
|
cflags.append('-pthread')
|
||||||
|
ldflags.append('-shared')
|
||||||
|
cflags.append('-I'+sysconfig.get_python_inc())
|
||||||
|
ldflags.append('-lpython'+sysconfig.get_python_version())
|
||||||
|
|
||||||
|
|
||||||
if isosx:
|
if isosx:
|
||||||
x, p = ('i386', 'ppc')
|
x, p = ('i386', 'ppc')
|
||||||
archs = ['-arch', x, '-arch', p, '-isysroot',
|
archs = ['-arch', x, '-arch', p, '-isysroot',
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
|
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
|
||||||
|
|
||||||
from setup import Command, islinux, basenames, modules, functions, \
|
from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
|
||||||
__appname__, __version__
|
__appname__, __version__
|
||||||
|
|
||||||
HEADER = '''\
|
HEADER = '''\
|
||||||
@ -116,7 +116,7 @@ class Develop(Command):
|
|||||||
|
|
||||||
|
|
||||||
def pre_sub_commands(self, opts):
|
def pre_sub_commands(self, opts):
|
||||||
if not islinux:
|
if not (islinux or isfreebsd):
|
||||||
self.info('\nSetting up a source based development environment is only '
|
self.info('\nSetting up a source based development environment is only '
|
||||||
'supported on linux. On other platforms, see the User Manual'
|
'supported on linux. On other platforms, see the User Manual'
|
||||||
' for help with setting up a development environment.')
|
' for help with setting up a development environment.')
|
||||||
@ -156,7 +156,7 @@ class Develop(Command):
|
|||||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||||
' devices will not work')
|
' devices will not work')
|
||||||
|
|
||||||
if os.geteuid() != 0:
|
if not isfreebsd and os.geteuid() != 0:
|
||||||
return self.warn('Must be run as root to compile mount helper. Auto '
|
return self.warn('Must be run as root to compile mount helper. Auto '
|
||||||
'mounting of devices will not work.')
|
'mounting of devices will not work.')
|
||||||
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
|
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
|
||||||
@ -168,9 +168,10 @@ class Develop(Command):
|
|||||||
ret = p.wait()
|
ret = p.wait()
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
return warn()
|
return warn()
|
||||||
os.chown(dest, 0, 0)
|
if not isfreebsd:
|
||||||
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
|
os.chown(dest, 0, 0)
|
||||||
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
|
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
|
||||||
|
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
|
||||||
self.manifest.append(dest)
|
self.manifest.append(dest)
|
||||||
return dest
|
return dest
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
|
|||||||
'/usr/lib/liblcms.so.1',
|
'/usr/lib/liblcms.so.1',
|
||||||
'/tmp/calibre-mount-helper',
|
'/tmp/calibre-mount-helper',
|
||||||
'/usr/lib/libunrar.so',
|
'/usr/lib/libunrar.so',
|
||||||
|
'/usr/lib/libchm.so.0',
|
||||||
'/usr/lib/libsqlite3.so.0',
|
'/usr/lib/libsqlite3.so.0',
|
||||||
'/usr/lib/libsqlite3.so.0',
|
'/usr/lib/libsqlite3.so.0',
|
||||||
'/usr/lib/libmng.so.1',
|
'/usr/lib/libmng.so.1',
|
||||||
|
@ -459,7 +459,7 @@ class Py2App(object):
|
|||||||
|
|
||||||
@flush
|
@flush
|
||||||
def add_misc_libraries(self):
|
def add_misc_libraries(self):
|
||||||
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
|
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
|
||||||
info('\nAdding', x)
|
info('\nAdding', x)
|
||||||
x = 'lib%s.dylib'%x
|
x = 'lib%s.dylib'%x
|
||||||
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
|
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
|
||||||
|
@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)
|
|||||||
|
|
||||||
|
|
||||||
from calibre.startup import plugins, winutil, winutilerror
|
from calibre.startup import plugins, winutil, winutilerror
|
||||||
from calibre.constants import iswindows, isosx, islinux, isfrozen, \
|
from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
|
||||||
terminal_controller, preferred_encoding, \
|
terminal_controller, preferred_encoding, \
|
||||||
__appname__, __version__, __author__, \
|
__appname__, __version__, __author__, \
|
||||||
win32event, win32api, winerror, fcntl, \
|
win32event, win32api, winerror, fcntl, \
|
||||||
@ -22,7 +22,7 @@ import mechanize
|
|||||||
if False:
|
if False:
|
||||||
winutil, winutilerror, __appname__, islinux, __version__
|
winutil, winutilerror, __appname__, islinux, __version__
|
||||||
fcntl, win32event, isfrozen, __author__, terminal_controller
|
fcntl, win32event, isfrozen, __author__, terminal_controller
|
||||||
winerror, win32api
|
winerror, win32api, isfreebsd
|
||||||
|
|
||||||
mimetypes.add_type('application/epub+zip', '.epub')
|
mimetypes.add_type('application/epub+zip', '.epub')
|
||||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.6.42'
|
__version__ = '0.6.44'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
|
|||||||
iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
|
iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
|
||||||
isosx = 'darwin' in sys.platform.lower()
|
isosx = 'darwin' in sys.platform.lower()
|
||||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||||
islinux = not(iswindows or isosx)
|
isfreebsd = 'freebsd' in sys.platform.lower()
|
||||||
|
islinux = not(iswindows or isosx or isfreebsd)
|
||||||
isfrozen = hasattr(sys, 'frozen')
|
isfrozen = hasattr(sys, 'frozen')
|
||||||
isunix = isosx or islinux
|
isunix = isosx or islinux
|
||||||
|
|
||||||
@ -56,7 +57,8 @@ if plugins is None:
|
|||||||
sys.path.insert(0, plugin_path)
|
sys.path.insert(0, plugin_path)
|
||||||
|
|
||||||
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
|
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
|
||||||
'fontconfig', 'pdfreflow', 'progress_indicator'] + \
|
'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
|
||||||
|
'chm_extra'] + \
|
||||||
(['winutil'] if iswindows else []) + \
|
(['winutil'] if iswindows else []) + \
|
||||||
(['usbobserver'] if isosx else []):
|
(['usbobserver'] if isosx else []):
|
||||||
try:
|
try:
|
||||||
|
@ -119,11 +119,34 @@ class Plugin(object):
|
|||||||
|
|
||||||
def __enter__(self, *args):
|
def __enter__(self, *args):
|
||||||
if self.plugin_path is not None:
|
if self.plugin_path is not None:
|
||||||
sys.path.insert(0, self.plugin_path)
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
zf = ZipFile(self.plugin_path)
|
||||||
|
extensions = set([x.rpartition('.')[-1].lower() for x in
|
||||||
|
zf.namelist()])
|
||||||
|
zip_safe = True
|
||||||
|
for ext in ('pyd', 'so', 'dll', 'dylib'):
|
||||||
|
if ext in extensions:
|
||||||
|
zip_safe = False
|
||||||
|
if zip_safe:
|
||||||
|
sys.path.insert(0, self.plugin_path)
|
||||||
|
self.sys_insertion_path = self.plugin_path
|
||||||
|
else:
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
|
||||||
|
self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
|
||||||
|
zf.extractall(self.sys_insertion_path)
|
||||||
|
sys.path.insert(0, self.sys_insertion_path)
|
||||||
|
zf.close()
|
||||||
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
if self.plugin_path in sys.path:
|
ip, it = getattr(self, 'sys_insertion_path', None), getattr(self,
|
||||||
sys.path.remove(self.plugin_path)
|
'_sys_insertion_tdir', None)
|
||||||
|
if ip in sys.path:
|
||||||
|
sys.path.remove(ip)
|
||||||
|
if hasattr(it, '__exit__'):
|
||||||
|
it.__exit__(*args)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class FileTypePlugin(Plugin):
|
class FileTypePlugin(Plugin):
|
||||||
|
@ -70,9 +70,10 @@ class PML2PMLZ(FileTypePlugin):
|
|||||||
pmlz = zipfile.ZipFile(of.name, 'w')
|
pmlz = zipfile.ZipFile(of.name, 'w')
|
||||||
pmlz.write(pmlfile, os.path.basename(pmlfile))
|
pmlz.write(pmlfile, os.path.basename(pmlfile))
|
||||||
|
|
||||||
pml_img = os.path.basename(pmlfile)[0] + '_img'
|
pml_img = os.path.splitext(pmlfile)[0] + '_img'
|
||||||
img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
|
i_img = os.path.join(os.path.dirname(pmlfile),'images')
|
||||||
os.path.exists('images') else ''
|
img_dir = pml_img if os.path.isdir(pml_img) else i_img if \
|
||||||
|
os.path.isdir(i_img) else ''
|
||||||
if img_dir:
|
if img_dir:
|
||||||
for image in glob.glob(os.path.join(img_dir, '*.png')):
|
for image in glob.glob(os.path.join(img_dir, '*.png')):
|
||||||
pmlz.write(image, os.path.join('images', (os.path.basename(image))))
|
pmlz.write(image, os.path.join('images', (os.path.basename(image))))
|
||||||
@ -81,17 +82,6 @@ class PML2PMLZ(FileTypePlugin):
|
|||||||
return of.name
|
return of.name
|
||||||
|
|
||||||
|
|
||||||
# CHM MODIFIED
|
|
||||||
class CHMMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read CHM metadata'
|
|
||||||
file_types = set(['chm'])
|
|
||||||
description = _('Read metadata from %s files') % 'CHM'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.chm import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class ComicMetadataReader(MetadataReaderPlugin):
|
class ComicMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read comic metadata'
|
name = 'Read comic metadata'
|
||||||
@ -113,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
|||||||
mi.cover_data = (ext.lower(), data)
|
mi.cover_data = (ext.lower(), data)
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
class CHMMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read CHM metadata'
|
||||||
|
file_types = set(['chm'])
|
||||||
|
description = _('Read metadata from %s files') % 'CHM'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.chm.metadata import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
|
||||||
class EPUBMetadataReader(MetadataReaderPlugin):
|
class EPUBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read EPUB metadata'
|
name = 'Read EPUB metadata'
|
||||||
@ -394,7 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
|
|||||||
from calibre.ebooks.tcr.input import TCRInput
|
from calibre.ebooks.tcr.input import TCRInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
from calibre.ebooks.lrf.input import LRFInput
|
from calibre.ebooks.lrf.input import LRFInput
|
||||||
from calibre.ebooks.chm.input import CHMInput # CHM MODIFIED
|
from calibre.ebooks.chm.input import CHMInput
|
||||||
|
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
@ -418,7 +419,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
|
|||||||
from calibre.devices.cybook.driver import CYBOOK
|
from calibre.devices.cybook.driver import CYBOOK
|
||||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||||
BOOQ
|
BOOQ, ELONEX
|
||||||
from calibre.devices.iliad.driver import ILIAD
|
from calibre.devices.iliad.driver import ILIAD
|
||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK
|
from calibre.devices.jetbook.driver import JETBOOK
|
||||||
@ -433,6 +434,7 @@ from calibre.devices.nuut2.driver import NUUT2
|
|||||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||||
from calibre.devices.binatone.driver import README
|
from calibre.devices.binatone.driver import README
|
||||||
from calibre.devices.hanvon.driver import N516, EB511
|
from calibre.devices.hanvon.driver import N516, EB511
|
||||||
|
from calibre.devices.teclast.driver import TECLAST_K3
|
||||||
|
|
||||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
||||||
@ -454,7 +456,7 @@ plugins += [
|
|||||||
TCRInput,
|
TCRInput,
|
||||||
TXTInput,
|
TXTInput,
|
||||||
LRFInput,
|
LRFInput,
|
||||||
CHMInput, # CHM MODIFIED
|
CHMInput,
|
||||||
]
|
]
|
||||||
plugins += [
|
plugins += [
|
||||||
EPUBOutput,
|
EPUBOutput,
|
||||||
@ -508,6 +510,8 @@ plugins += [
|
|||||||
README,
|
README,
|
||||||
N516,
|
N516,
|
||||||
EB511,
|
EB511,
|
||||||
|
ELONEX,
|
||||||
|
TECLAST_K3
|
||||||
]
|
]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
|
@ -214,8 +214,21 @@ class InputFormatPlugin(Plugin):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log):
|
def postprocess_book(self, oeb, opts, log):
|
||||||
|
'''
|
||||||
|
Called to allow the input plugin to perform postprocessing after
|
||||||
|
the book has been parsed.
|
||||||
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def specialize(self, oeb, opts, log, output_fmt):
|
||||||
|
'''
|
||||||
|
Called to allow the input plugin to specialize the parsed book
|
||||||
|
for a particular output format. Called after postprocess_book
|
||||||
|
and before any transforms are performed on the parsed book.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class OutputFormatPlugin(Plugin):
|
class OutputFormatPlugin(Plugin):
|
||||||
'''
|
'''
|
||||||
OutputFormatPlugins are responsible for converting an OEB document
|
OutputFormatPlugins are responsible for converting an OEB document
|
||||||
|
@ -235,7 +235,7 @@ class SonyReaderOutput(OutputProfile):
|
|||||||
description = _('This profile is intended for the SONY PRS line. '
|
description = _('This profile is intended for the SONY PRS line. '
|
||||||
'The 500/505/600/700 etc.')
|
'The 500/505/600/700 etc.')
|
||||||
|
|
||||||
screen_size = (600, 775)
|
screen_size = (590, 775)
|
||||||
dpi = 168.451
|
dpi = 168.451
|
||||||
fbase = 12
|
fbase = 12
|
||||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||||
|
@ -235,6 +235,8 @@ def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
|
|||||||
with plugin:
|
with plugin:
|
||||||
try:
|
try:
|
||||||
nfp = plugin.run(path_to_file)
|
nfp = plugin.run(path_to_file)
|
||||||
|
if not nfp:
|
||||||
|
nfp = path_to_file
|
||||||
except:
|
except:
|
||||||
print 'Running file type plugin %s failed with traceback:'%plugin.name
|
print 'Running file type plugin %s failed with traceback:'%plugin.name
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
@ -399,7 +401,7 @@ def initialize_plugins():
|
|||||||
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
|
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
|
||||||
except PluginNotFound:
|
except PluginNotFound:
|
||||||
continue
|
continue
|
||||||
plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
|
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
|
||||||
_initialized_plugins.append(plugin)
|
_initialized_plugins.append(plugin)
|
||||||
except:
|
except:
|
||||||
print 'Failed to initialize plugin...'
|
print 'Failed to initialize plugin...'
|
||||||
|
@ -23,6 +23,8 @@ Run an embedded python interpreter.
|
|||||||
help='Debug the specified device driver.')
|
help='Debug the specified device driver.')
|
||||||
parser.add_option('-g', '--gui', default=False, action='store_true',
|
parser.add_option('-g', '--gui', default=False, action='store_true',
|
||||||
help='Run the GUI',)
|
help='Run the GUI',)
|
||||||
|
parser.add_option('-w', '--viewer', default=False, action='store_true',
|
||||||
|
help='Run the ebook viewer',)
|
||||||
parser.add_option('--paths', default=False, action='store_true',
|
parser.add_option('--paths', default=False, action='store_true',
|
||||||
help='Output the paths necessary to setup the calibre environment')
|
help='Output the paths necessary to setup the calibre environment')
|
||||||
parser.add_option('--migrate', action='store_true', default=False,
|
parser.add_option('--migrate', action='store_true', default=False,
|
||||||
@ -98,6 +100,12 @@ def main(args=sys.argv):
|
|||||||
if opts.gui:
|
if opts.gui:
|
||||||
from calibre.gui2.main import main
|
from calibre.gui2.main import main
|
||||||
main(['calibre'])
|
main(['calibre'])
|
||||||
|
elif opts.viewer:
|
||||||
|
from calibre.gui2.viewer.main import main
|
||||||
|
vargs = ['ebook-viewer', '--debug-javascript']
|
||||||
|
if len(args) > 1:
|
||||||
|
vargs.append(args[-1])
|
||||||
|
main(vargs)
|
||||||
elif opts.command:
|
elif opts.command:
|
||||||
sys.argv = args[:1]
|
sys.argv = args[:1]
|
||||||
exec opts.command
|
exec opts.command
|
||||||
|
@ -60,8 +60,10 @@ def debug(ioreg_to_tmp=False, buf=None):
|
|||||||
if isosx:
|
if isosx:
|
||||||
from calibre.devices.usbms.device import Device
|
from calibre.devices.usbms.device import Device
|
||||||
mount = repr(Device.osx_run_mount())
|
mount = repr(Device.osx_run_mount())
|
||||||
ioreg = Device.run_ioreg()
|
drives = pprint.pformat(Device.osx_get_usb_drives())
|
||||||
ioreg = 'Output from mount:\n\n'+mount+'\n\n'+ioreg
|
ioreg = 'Output from mount:\n'+mount+'\n\n'
|
||||||
|
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
|
||||||
|
ioreg += Device.run_ioreg()
|
||||||
connected_devices = []
|
connected_devices = []
|
||||||
for dev in device_plugins():
|
for dev in device_plugins():
|
||||||
out('Looking for', dev.__class__.__name__)
|
out('Looking for', dev.__class__.__name__)
|
||||||
|
@ -15,7 +15,7 @@ class ANDROID(USBMS):
|
|||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub']
|
FORMATS = ['epub', 'pdf']
|
||||||
|
|
||||||
VENDOR_ID = {
|
VENDOR_ID = {
|
||||||
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
||||||
|
@ -195,3 +195,15 @@ class BOOQ(EB600):
|
|||||||
WINDOWS_MAIN_MEM = 'EB600'
|
WINDOWS_MAIN_MEM = 'EB600'
|
||||||
WINDOWS_CARD_A_MEM = 'EB600'
|
WINDOWS_CARD_A_MEM = 'EB600'
|
||||||
|
|
||||||
|
class ELONEX(EB600):
|
||||||
|
|
||||||
|
name = 'Elonex 600EB'
|
||||||
|
gui_name = 'Elonex'
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'pdf', 'txt', 'html']
|
||||||
|
|
||||||
|
VENDOR_NAME = 'ELONEX'
|
||||||
|
WINDOWS_MAIN_MEM = 'EBOOK'
|
||||||
|
WINDOWS_CARD_A_MEM = 'EBOOK'
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
|
|||||||
a backend that implement the Device interface for the SONY PRS500 Reader.
|
a backend that implement the Device interface for the SONY PRS500 Reader.
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
@ -43,6 +44,9 @@ class DevicePlugin(Plugin):
|
|||||||
#: Icon for this device
|
#: Icon for this device
|
||||||
icon = I('reader.svg')
|
icon = I('reader.svg')
|
||||||
|
|
||||||
|
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
|
||||||
|
UserAnnotation = namedtuple('Annotation','type, bookmark')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_gui_name(cls):
|
def get_gui_name(cls):
|
||||||
if hasattr(cls, 'gui_name'):
|
if hasattr(cls, 'gui_name'):
|
||||||
|
@ -7,10 +7,9 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Device driver for Amazon's Kindle
|
Device driver for Amazon's Kindle
|
||||||
'''
|
'''
|
||||||
|
import os, re, sys
|
||||||
import os
|
from cStringIO import StringIO
|
||||||
import re
|
from struct import unpack
|
||||||
import sys
|
|
||||||
|
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
@ -44,6 +43,7 @@ class KINDLE(USBMS):
|
|||||||
EBOOK_DIR_CARD_A = 'documents'
|
EBOOK_DIR_CARD_A = 'documents'
|
||||||
DELETE_EXTS = ['.mbp']
|
DELETE_EXTS = ['.mbp']
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
SUPPORTS_ANNOTATIONS = True
|
||||||
|
|
||||||
WIRELESS_FILE_NAME_PATTERN = re.compile(
|
WIRELESS_FILE_NAME_PATTERN = re.compile(
|
||||||
r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
|
r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
|
||||||
@ -60,6 +60,73 @@ class KINDLE(USBMS):
|
|||||||
'replace')
|
'replace')
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
def get_annotations(self, path_map):
|
||||||
|
MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt']
|
||||||
|
TAN_FORMATS = [u'tpz', u'azw1']
|
||||||
|
|
||||||
|
mbp_formats = set()
|
||||||
|
for fmt in MBP_FORMATS:
|
||||||
|
mbp_formats.add(fmt)
|
||||||
|
tan_formats = set()
|
||||||
|
for fmt in TAN_FORMATS:
|
||||||
|
tan_formats.add(fmt)
|
||||||
|
|
||||||
|
def get_storage():
|
||||||
|
storage = []
|
||||||
|
if self._main_prefix:
|
||||||
|
storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
|
||||||
|
if self._card_a_prefix:
|
||||||
|
storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
|
||||||
|
if self._card_b_prefix:
|
||||||
|
storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
|
||||||
|
return storage
|
||||||
|
|
||||||
|
def resolve_bookmark_paths(storage, path_map):
|
||||||
|
pop_list = []
|
||||||
|
book_ext = {}
|
||||||
|
for id in path_map:
|
||||||
|
file_fmts = set()
|
||||||
|
for fmt in path_map[id]['fmts']:
|
||||||
|
file_fmts.add(fmt)
|
||||||
|
|
||||||
|
bookmark_extension = None
|
||||||
|
if file_fmts.intersection(mbp_formats):
|
||||||
|
book_extension = list(file_fmts.intersection(mbp_formats))[0]
|
||||||
|
bookmark_extension = 'mbp'
|
||||||
|
elif file_fmts.intersection(tan_formats):
|
||||||
|
book_extension = list(file_fmts.intersection(tan_formats))[0]
|
||||||
|
bookmark_extension = 'tan'
|
||||||
|
|
||||||
|
if bookmark_extension:
|
||||||
|
for vol in storage:
|
||||||
|
bkmk_path = path_map[id]['path'].replace(os.path.abspath('/<storage>'),vol)
|
||||||
|
bkmk_path = bkmk_path.replace('bookmark',bookmark_extension)
|
||||||
|
if os.path.exists(bkmk_path):
|
||||||
|
path_map[id] = bkmk_path
|
||||||
|
book_ext[id] = book_extension
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
pop_list.append(id)
|
||||||
|
else:
|
||||||
|
pop_list.append(id)
|
||||||
|
|
||||||
|
# Remove non-existent bookmark templates
|
||||||
|
for id in pop_list:
|
||||||
|
path_map.pop(id)
|
||||||
|
return path_map, book_ext
|
||||||
|
|
||||||
|
storage = get_storage()
|
||||||
|
path_map, book_ext = resolve_bookmark_paths(storage, path_map)
|
||||||
|
|
||||||
|
bookmarked_books = {}
|
||||||
|
for id in path_map:
|
||||||
|
bookmark_ext = path_map[id].rpartition('.')[2]
|
||||||
|
myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
|
||||||
|
bookmarked_books[id] = self.UserAnnotation(type='kindle', bookmark=myBookmark)
|
||||||
|
|
||||||
|
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
|
||||||
|
return bookmarked_books
|
||||||
|
|
||||||
|
|
||||||
class KINDLE2(KINDLE):
|
class KINDLE2(KINDLE):
|
||||||
|
|
||||||
@ -79,3 +146,213 @@ class KINDLE_DX(KINDLE2):
|
|||||||
|
|
||||||
PRODUCT_ID = [0x0003]
|
PRODUCT_ID = [0x0003]
|
||||||
BCD = [0x0100]
|
BCD = [0x0100]
|
||||||
|
|
||||||
|
class Bookmark():
|
||||||
|
'''
|
||||||
|
A simple class fetching bookmark data
|
||||||
|
Kindle-specific
|
||||||
|
'''
|
||||||
|
def __init__(self, path, id, book_format, bookmark_extension):
|
||||||
|
self.book_format = book_format
|
||||||
|
self.bookmark_extension = bookmark_extension
|
||||||
|
self.book_length = 0
|
||||||
|
self.id = id
|
||||||
|
self.last_read = 0
|
||||||
|
self.last_read_location = 0
|
||||||
|
self.timestamp = 0
|
||||||
|
self.user_notes = None
|
||||||
|
|
||||||
|
self.get_bookmark_data(path)
|
||||||
|
self.get_book_length(path)
|
||||||
|
try:
|
||||||
|
self.percent_read = float(100*self.last_read / self.book_length)
|
||||||
|
except:
|
||||||
|
self.percent_read = 0
|
||||||
|
|
||||||
|
def record(self, n):
|
||||||
|
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||||
|
if n >= self.nrecs:
|
||||||
|
raise ValueError('non-existent record %r' % n)
|
||||||
|
offoff = 78 + (8 * n)
|
||||||
|
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
|
||||||
|
stop = None
|
||||||
|
if n < (self.nrecs - 1):
|
||||||
|
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
|
||||||
|
return StreamSlicer(self.stream, start, stop)
|
||||||
|
|
||||||
|
def get_bookmark_data(self, path):
|
||||||
|
''' Return the timestamp and last_read_location '''
|
||||||
|
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||||
|
user_notes = {}
|
||||||
|
if self.bookmark_extension == 'mbp':
|
||||||
|
MAGIC_MOBI_CONSTANT = 150
|
||||||
|
with open(path,'rb') as f:
|
||||||
|
stream = StringIO(f.read())
|
||||||
|
data = StreamSlicer(stream)
|
||||||
|
self.timestamp, = unpack('>I', data[0x24:0x28])
|
||||||
|
bpar_offset, = unpack('>I', data[0x4e:0x52])
|
||||||
|
lrlo = bpar_offset + 0x0c
|
||||||
|
self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
|
||||||
|
self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
|
||||||
|
entries, = unpack('>I', data[0x4a:0x4e])
|
||||||
|
|
||||||
|
# Store the annotations/locations
|
||||||
|
bpl = bpar_offset + 4
|
||||||
|
bpar_len, = unpack('>I', data[bpl:bpl+4])
|
||||||
|
bpar_len += 8
|
||||||
|
#print "bpar_len: 0x%x" % bpar_len
|
||||||
|
eo = bpar_offset + bpar_len
|
||||||
|
|
||||||
|
# Walk bookmark entries
|
||||||
|
#print " --- %s --- " % path
|
||||||
|
current_entry = 1
|
||||||
|
sig = data[eo:eo+4]
|
||||||
|
previous_block = None
|
||||||
|
|
||||||
|
while sig == 'DATA':
|
||||||
|
text = None
|
||||||
|
entry_type = None
|
||||||
|
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||||
|
if rec_len == 0:
|
||||||
|
current_block = "empty_data"
|
||||||
|
elif data[eo+8:eo+12] == "EBAR":
|
||||||
|
current_block = "data_header"
|
||||||
|
#entry_type = "data_header"
|
||||||
|
location, = unpack('>I', data[eo+0x34:eo+0x38])
|
||||||
|
#print "data_header location: %d" % location
|
||||||
|
else:
|
||||||
|
current_block = "text_block"
|
||||||
|
if previous_block == 'empty_data':
|
||||||
|
entry_type = 'Note'
|
||||||
|
elif previous_block == 'data_header':
|
||||||
|
entry_type = 'Highlight'
|
||||||
|
text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
|
||||||
|
|
||||||
|
if entry_type:
|
||||||
|
displayed_location = location/MAGIC_MOBI_CONSTANT + 1
|
||||||
|
user_notes[location] = dict(id=self.id,
|
||||||
|
displayed_location=displayed_location,
|
||||||
|
type=entry_type,
|
||||||
|
text=text)
|
||||||
|
|
||||||
|
eo += rec_len + 8
|
||||||
|
current_entry += 1
|
||||||
|
previous_block = current_block
|
||||||
|
sig = data[eo:eo+4]
|
||||||
|
|
||||||
|
while sig == 'BKMK':
|
||||||
|
# Fix start location for Highlights using BKMK data
|
||||||
|
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
|
||||||
|
if end_loc in user_notes and user_notes[end_loc]['type'] == 'Highlight':
|
||||||
|
start, = unpack('>I', data[eo+8:eo+12])
|
||||||
|
user_notes[start] = user_notes[end_loc]
|
||||||
|
user_notes.pop(end_loc)
|
||||||
|
elif end_loc in user_notes and user_notes[end_loc]['type'] == 'Note':
|
||||||
|
# Skip duplicate bookmarks for notes
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# If a bookmark coincides with a user annotation, the locs could
|
||||||
|
# be the same - cheat by nudging -1
|
||||||
|
# Skip bookmark for last_read_location
|
||||||
|
if end_loc != self.last_read:
|
||||||
|
displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
|
||||||
|
user_notes[end_loc - 1] = dict(id=self.id,
|
||||||
|
displayed_location=displayed_location,
|
||||||
|
type='Bookmark',
|
||||||
|
text=None)
|
||||||
|
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||||
|
eo += rec_len + 8
|
||||||
|
sig = data[eo:eo+4]
|
||||||
|
|
||||||
|
elif self.bookmark_extension == 'tan':
|
||||||
|
# TAN bookmarks
|
||||||
|
MAGIC_TOPAZ_CONSTANT = 33.33
|
||||||
|
self.timestamp = os.path.getmtime(path)
|
||||||
|
with open(path,'rb') as f:
|
||||||
|
stream = StringIO(f.read())
|
||||||
|
data = StreamSlicer(stream)
|
||||||
|
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||||
|
self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
|
||||||
|
entries, = unpack('>I', data[9:13])
|
||||||
|
current_entry = 0
|
||||||
|
e_base = 0x0d
|
||||||
|
while current_entry < entries:
|
||||||
|
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||||
|
text = None
|
||||||
|
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||||
|
e_type, = unpack('>B', data[e_base+1])
|
||||||
|
if e_type == 0:
|
||||||
|
e_type = 'Bookmark'
|
||||||
|
elif e_type == 1:
|
||||||
|
e_type = 'Highlight'
|
||||||
|
text = "(Topaz highlights not yet supported)"
|
||||||
|
elif e_type == 2:
|
||||||
|
e_type = 'Note'
|
||||||
|
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||||
|
else:
|
||||||
|
e_type = 'Unknown annotation type'
|
||||||
|
|
||||||
|
if self.book_format in ['tpz','azw1']:
|
||||||
|
# *** This needs fine-tuning
|
||||||
|
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||||
|
elif self.book_format == 'pdf':
|
||||||
|
# *** This needs testing
|
||||||
|
displayed_location = location
|
||||||
|
user_notes[location] = dict(id=self.id,
|
||||||
|
displayed_location=displayed_location,
|
||||||
|
type=e_type,
|
||||||
|
text=text)
|
||||||
|
if text_len == 0xFFFFFFFF:
|
||||||
|
e_base = e_base + 14
|
||||||
|
else:
|
||||||
|
e_base = e_base + 14 + 2 + text_len
|
||||||
|
current_entry += 1
|
||||||
|
for location in user_notes:
|
||||||
|
if location == self.last_read:
|
||||||
|
user_notes.pop(location)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||||
|
self.user_notes = user_notes
|
||||||
|
|
||||||
|
'''
|
||||||
|
for location in sorted(user_notes):
|
||||||
|
print ' Location %d: %s\n%s' % (user_notes[location]['displayed_location'],
|
||||||
|
user_notes[location]['type'],
|
||||||
|
'\n'.join(self.textdump(user_notes[location]['text'])))
|
||||||
|
'''
|
||||||
|
|
||||||
|
def get_book_length(self, path):
|
||||||
|
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||||
|
book_fs = path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||||
|
|
||||||
|
self.book_length = 0
|
||||||
|
if self.bookmark_extension == 'mbp':
|
||||||
|
# Read the book len from the header
|
||||||
|
with open(book_fs,'rb') as f:
|
||||||
|
self.stream = StringIO(f.read())
|
||||||
|
self.data = StreamSlicer(self.stream)
|
||||||
|
self.nrecs, = unpack('>H', self.data[76:78])
|
||||||
|
record0 = self.record(0)
|
||||||
|
self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
|
||||||
|
elif self.bookmark_extension == 'tan':
|
||||||
|
# Read bookLength from metadata
|
||||||
|
with open(book_fs,'rb') as f:
|
||||||
|
stream = StringIO(f.read())
|
||||||
|
raw = stream.read(8*1024)
|
||||||
|
if not raw.startswith('TPZ'):
|
||||||
|
raise ValueError('Not a Topaz file')
|
||||||
|
first = raw.find('metadata')
|
||||||
|
if first < 0:
|
||||||
|
raise ValueError('Invalid Topaz file')
|
||||||
|
second = raw.find('metadata', first+10)
|
||||||
|
if second < 0:
|
||||||
|
raise ValueError('Invalid Topaz file')
|
||||||
|
raw = raw[second:second+1000]
|
||||||
|
idx = raw.find('bookLength')
|
||||||
|
if idx > -1:
|
||||||
|
length = ord(raw[idx+len('bookLength')])
|
||||||
|
self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length])
|
||||||
|
|
||||||
|
else:
|
||||||
|
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||||
|
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
|
|||||||
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
|
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
|
||||||
from errno import EBUSY, ENOMEM
|
from errno import EBUSY, ENOMEM
|
||||||
|
|
||||||
from calibre import iswindows, isosx, load_library
|
from calibre import iswindows, isosx, isfreebsd, load_library
|
||||||
|
|
||||||
_libusb_name = 'libusb'
|
_libusb_name = 'libusb'
|
||||||
PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
|
PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
|
||||||
if iswindows:
|
if iswindows:
|
||||||
class Structure(_Structure):
|
class Structure(_Structure):
|
||||||
_pack_ = 1
|
_pack_ = 1
|
||||||
|
10
src/calibre/devices/teclast/__init__.py
Normal file
10
src/calibre/devices/teclast/__init__.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
|
42
src/calibre/devices/teclast/driver.py
Normal file
42
src/calibre/devices/teclast/driver.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
|
class TECLAST_K3(USBMS):
|
||||||
|
|
||||||
|
name = 'Teclast K3 Device Interface'
|
||||||
|
gui_name = 'K3'
|
||||||
|
description = _('Communicate with the Teclast K3 reader.')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
# Ordered list of supported formats
|
||||||
|
FORMATS = ['epub', 'fb2', 'doc', 'pdf', 'txt']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x071b]
|
||||||
|
PRODUCT_ID = [0x3203]
|
||||||
|
BCD = [0x0000]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'TECLAST'
|
||||||
|
WINDOWS_MAIN_MEM = 'DIGITAL_PLAYER'
|
||||||
|
WINDOWS_CARD_A_MEM = 'DIGITAL_PLAYER'
|
||||||
|
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'K3 Main Memory'
|
||||||
|
STORAGE_CARD_VOLUME_LABEL = 'K3 Storage Card'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = ''
|
||||||
|
EBOOK_DIR_CARD_A = ''
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
def windows_sort_drives(self, drives):
|
||||||
|
main = drives.get('main', None)
|
||||||
|
card = drives.get('carda', None)
|
||||||
|
if card and main and card < main:
|
||||||
|
drives['main'] = card
|
||||||
|
drives['carda'] = main
|
||||||
|
|
||||||
|
return drives
|
||||||
|
|
||||||
|
|
@ -4,8 +4,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, shutil, time
|
||||||
import shutil
|
|
||||||
|
|
||||||
from calibre.devices.errors import PathError
|
from calibre.devices.errors import PathError
|
||||||
|
|
||||||
@ -50,11 +49,12 @@ class CLI(object):
|
|||||||
d = os.path.dirname(path)
|
d = os.path.dirname(path)
|
||||||
if not os.path.exists(d):
|
if not os.path.exists(d):
|
||||||
os.makedirs(d)
|
os.makedirs(d)
|
||||||
with open(path, 'wb') as dest:
|
with open(path, 'w+b') as dest:
|
||||||
try:
|
try:
|
||||||
shutil.copyfileobj(infile, dest)
|
shutil.copyfileobj(infile, dest)
|
||||||
except IOError:
|
except IOError:
|
||||||
print 'WARNING: First attempt to send file to device failed'
|
print 'WARNING: First attempt to send file to device failed'
|
||||||
|
time.sleep(0.2)
|
||||||
infile.seek(0)
|
infile.seek(0)
|
||||||
dest.seek(0)
|
dest.seek(0)
|
||||||
dest.truncate()
|
dest.truncate()
|
||||||
|
@ -17,6 +17,7 @@ import time
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
|
|
||||||
from calibre.devices.interface import DevicePlugin
|
from calibre.devices.interface import DevicePlugin
|
||||||
@ -333,10 +334,14 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
raise
|
raise
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
def _osx_bsd_names(self):
|
@classmethod
|
||||||
|
def osx_get_usb_drives(cls):
|
||||||
if usbobserver_err:
|
if usbobserver_err:
|
||||||
raise RuntimeError('Failed to load usbobserver: '+usbobserver_err)
|
raise RuntimeError('Failed to load usbobserver: '+usbobserver_err)
|
||||||
drives = usbobserver.get_usb_drives()
|
return usbobserver.get_usb_drives()
|
||||||
|
|
||||||
|
def _osx_bsd_names(self):
|
||||||
|
drives = self.osx_get_usb_drives()
|
||||||
matches = []
|
matches = []
|
||||||
d = self.detected_device
|
d = self.detected_device
|
||||||
if d.serial:
|
if d.serial:
|
||||||
@ -394,16 +399,6 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
if len(matches) > 2:
|
if len(matches) > 2:
|
||||||
drives['cardb'] = matches[2]
|
drives['cardb'] = matches[2]
|
||||||
|
|
||||||
pat = self.OSX_MAIN_MEM_VOL_PAT
|
|
||||||
if pat is not None and len(drives) > 1 and 'main' in drives:
|
|
||||||
if pat.search(drives['main']) is None:
|
|
||||||
main = drives['main']
|
|
||||||
for x in ('carda', 'cardb'):
|
|
||||||
if x in drives and pat.search(drives[x]):
|
|
||||||
drives['main'] = drives.pop(x)
|
|
||||||
drives[x] = main
|
|
||||||
break
|
|
||||||
|
|
||||||
return drives
|
return drives
|
||||||
|
|
||||||
def osx_bsd_names(self):
|
def osx_bsd_names(self):
|
||||||
@ -427,6 +422,16 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
if drives['main'] is None:
|
if drives['main'] is None:
|
||||||
print bsd_drives, mount_map, drives
|
print bsd_drives, mount_map, drives
|
||||||
raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
|
raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
|
||||||
|
pat = self.OSX_MAIN_MEM_VOL_PAT
|
||||||
|
if pat is not None and len(drives) > 1 and 'main' in drives:
|
||||||
|
if pat.search(drives['main']) is None:
|
||||||
|
main = drives['main']
|
||||||
|
for x in ('carda', 'cardb'):
|
||||||
|
if x in drives and pat.search(drives[x]):
|
||||||
|
drives['main'] = drives.pop(x)
|
||||||
|
drives[x] = main
|
||||||
|
break
|
||||||
|
|
||||||
self._main_prefix = drives['main']+os.sep
|
self._main_prefix = drives['main']+os.sep
|
||||||
def get_card_prefix(c):
|
def get_card_prefix(c):
|
||||||
ans = drives.get(c, None)
|
ans = drives.get(c, None)
|
||||||
@ -789,7 +794,13 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
'''
|
'''
|
||||||
return components
|
return components
|
||||||
|
|
||||||
def create_upload_path(self, path, mdata, fname):
|
def get_annotations(self, path_map):
|
||||||
|
'''
|
||||||
|
Resolve path_map to annotation_map of files found on the device
|
||||||
|
'''
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def create_upload_path(self, path, mdata, fname, create_dirs=True):
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
extra_components = []
|
extra_components = []
|
||||||
|
|
||||||
@ -848,7 +859,7 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
filedir = os.path.dirname(filepath)
|
filedir = os.path.dirname(filepath)
|
||||||
|
|
||||||
|
|
||||||
if not os.path.exists(filedir):
|
if create_dirs and not os.path.exists(filedir):
|
||||||
os.makedirs(filedir)
|
os.makedirs(filedir)
|
||||||
|
|
||||||
return filepath
|
return filepath
|
||||||
|
@ -123,7 +123,7 @@ class USBMS(CLI, Device):
|
|||||||
'''
|
'''
|
||||||
:path: the full path were the associated book is located.
|
:path: the full path were the associated book is located.
|
||||||
:filename: the name of the book file without the extension.
|
:filename: the name of the book file without the extension.
|
||||||
:metatdata: metadata belonging to the book. Use metadata.thumbnail
|
:metadata: metadata belonging to the book. Use metadata.thumbnail
|
||||||
for cover
|
for cover
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
@ -129,3 +129,12 @@ def render_html(path_to_html, width=590, height=750):
|
|||||||
del loop
|
del loop
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
|
def check_ebook_format(stream, current_guess):
|
||||||
|
ans = current_guess
|
||||||
|
if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1'):
|
||||||
|
stream.seek(0)
|
||||||
|
if stream.read(3) == 'TPZ':
|
||||||
|
ans = 'tpz'
|
||||||
|
stream.seek(0)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
@ -53,13 +53,15 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
|||||||
"x-sjis" : "shift-jis" }
|
"x-sjis" : "shift-jis" }
|
||||||
|
|
||||||
|
|
||||||
def force_encoding(raw, verbose):
|
def force_encoding(raw, verbose, assume_utf8=False):
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
try:
|
try:
|
||||||
chardet = detect(raw)
|
chardet = detect(raw)
|
||||||
except:
|
except:
|
||||||
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
||||||
encoding = chardet['encoding']
|
encoding = chardet['encoding']
|
||||||
|
if chardet['confidence'] < 1 and assume_utf8:
|
||||||
|
encoding = 'utf-8'
|
||||||
if chardet['confidence'] < 1 and verbose:
|
if chardet['confidence'] < 1 and verbose:
|
||||||
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
|
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
|
||||||
if not encoding:
|
if not encoding:
|
||||||
@ -73,7 +75,7 @@ def force_encoding(raw, verbose):
|
|||||||
|
|
||||||
|
|
||||||
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||||
resolve_entities=False):
|
resolve_entities=False, assume_utf8=False):
|
||||||
'''
|
'''
|
||||||
Force conversion of byte string to unicode. Tries to look for XML/HTML
|
Force conversion of byte string to unicode. Tries to look for XML/HTML
|
||||||
encoding declaration first, if not found uses the chardet library and
|
encoding declaration first, if not found uses the chardet library and
|
||||||
@ -95,7 +97,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
|||||||
encoding = match.group(1)
|
encoding = match.group(1)
|
||||||
break
|
break
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
encoding = force_encoding(raw, verbose)
|
encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
|
||||||
try:
|
try:
|
||||||
if encoding.lower().strip() == 'macintosh':
|
if encoding.lower().strip() == 'macintosh':
|
||||||
encoding = 'mac-roman'
|
encoding = 'mac-roman'
|
||||||
|
@ -1,213 +1,17 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
''' CHM File decoding support '''
|
''' CHM File decoding support '''
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||||
|
|
||||||
import os, shutil, uuid, re
|
import os, uuid
|
||||||
from tempfile import mkdtemp
|
|
||||||
from mimetypes import guess_type as guess_mimetype
|
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from pychm.chm import CHMFile
|
|
||||||
from pychm.chmlib import (
|
|
||||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
|
||||||
chm_enumerate,
|
|
||||||
)
|
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
|
||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
|
|
||||||
def match_string(s1, s2_already_lowered):
|
|
||||||
if s1 is not None and s2_already_lowered is not None:
|
|
||||||
if s1.lower()==s2_already_lowered:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def check_all_prev_empty(tag):
|
|
||||||
if tag is None:
|
|
||||||
return True
|
|
||||||
if tag.__class__ == NavigableString and not check_empty(tag):
|
|
||||||
return False
|
|
||||||
return check_all_prev_empty(tag.previousSibling)
|
|
||||||
|
|
||||||
def check_empty(s, rex = re.compile(r'\S')):
|
|
||||||
return rex.search(s) is None
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
|
||||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
|
||||||
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
|
||||||
parser.add_option("-t", "--title", action="store", type="string", \
|
|
||||||
dest="title", help=_("Set the book title"))
|
|
||||||
parser.add_option('--title-sort', action='store', type='string', default=None,
|
|
||||||
dest='title_sort', help=_('Set sort key for the title'))
|
|
||||||
parser.add_option("-a", "--author", action="store", type="string", \
|
|
||||||
dest="author", help=_("Set the author"))
|
|
||||||
parser.add_option('--author-sort', action='store', type='string', default=None,
|
|
||||||
dest='author_sort', help=_('Set sort key for the author'))
|
|
||||||
parser.add_option("-c", "--category", action="store", type="string", \
|
|
||||||
dest="category", help=_("The category this book belongs"
|
|
||||||
" to. E.g.: History"))
|
|
||||||
parser.add_option("--thumbnail", action="store", type="string", \
|
|
||||||
dest="thumbnail", help=_("Path to a graphic that will be"
|
|
||||||
" set as this files' thumbnail"))
|
|
||||||
parser.add_option("--comment", action="store", type="string", \
|
|
||||||
dest="freetext", help=_("Path to a txt file containing a comment."))
|
|
||||||
parser.add_option("--get-thumbnail", action="store_true", \
|
|
||||||
dest="get_thumbnail", default=False, \
|
|
||||||
help=_("Extract thumbnail from LRF file"))
|
|
||||||
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
|
||||||
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
|
||||||
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
|
||||||
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
|
||||||
parser.add_option('--get-cover', action='store_true', default=False,
|
|
||||||
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
|
||||||
parser.add_option('--bookid', action='store', type='string', default=None,
|
|
||||||
dest='book_id', help=_('Set book ID'))
|
|
||||||
parser.add_option('--font-delta', action='store', type='int', default=0,
|
|
||||||
dest='font_delta', help=_('Set font delta'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
class CHMError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class CHMReader(CHMFile):
|
|
||||||
def __init__(self, input, log):
|
|
||||||
CHMFile.__init__(self)
|
|
||||||
if not self.LoadCHM(input):
|
|
||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
|
||||||
self.log = log
|
|
||||||
self._sourcechm = input
|
|
||||||
self._contents = None
|
|
||||||
self._playorder = 0
|
|
||||||
self._metadata = False
|
|
||||||
self._extracted = False
|
|
||||||
|
|
||||||
# location of '.hhc' file, which is the CHM TOC.
|
|
||||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
|
||||||
self.hhc_path = self.root + ".hhc"
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
|
||||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
|
||||||
self._playorder += 1
|
|
||||||
for li in ul('li', recursive=False):
|
|
||||||
href = li.object('param', {'name': 'Local'})[0]['value']
|
|
||||||
if href.count('#'):
|
|
||||||
href, frag = href.split('#')
|
|
||||||
else:
|
|
||||||
frag = None
|
|
||||||
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
|
||||||
#print "========>", name
|
|
||||||
toc.add_item(href, frag, name, play_order=self._playorder)
|
|
||||||
self._playorder += 1
|
|
||||||
if li.ul:
|
|
||||||
child = self._parse_toc(li.ul)
|
|
||||||
child.parent = toc
|
|
||||||
toc.append(child)
|
|
||||||
#print toc
|
|
||||||
return toc
|
|
||||||
|
|
||||||
|
|
||||||
def GetFile(self, path):
|
|
||||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
|
||||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
|
||||||
# note this path refers to the internal CHM structure
|
|
||||||
if path[0] != '/':
|
|
||||||
path = '/' + path
|
|
||||||
res, ui = self.ResolveObject(path)
|
|
||||||
if res != CHM_RESOLVE_SUCCESS:
|
|
||||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
|
||||||
size, data = self.RetrieveObject(ui)
|
|
||||||
if size == 0:
|
|
||||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
|
||||||
return data
|
|
||||||
|
|
||||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
|
||||||
for path in self.Contents():
|
|
||||||
lpath = os.path.join(output_dir, path)
|
|
||||||
self._ensure_dir(lpath)
|
|
||||||
data = self.GetFile(path)
|
|
||||||
with open(lpath, 'wb') as f:
|
|
||||||
if guess_mimetype(path)[0] == ('text/html'):
|
|
||||||
data = self._reformat(data)
|
|
||||||
f.write(data)
|
|
||||||
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
|
||||||
self._extracted = True
|
|
||||||
|
|
||||||
def _reformat(self, data):
|
|
||||||
try:
|
|
||||||
soup = BeautifulSoup(data)
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
# hit some strange encoding problems...
|
|
||||||
print "Unable to parse html for cleaning, leaving it :("
|
|
||||||
return data
|
|
||||||
# nuke javascript...
|
|
||||||
[s.extract() for s in soup('script')]
|
|
||||||
# remove forward and back nav bars from the top/bottom of each page
|
|
||||||
# cos they really fuck with the flow of things and generally waste space
|
|
||||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
|
||||||
# we'll have to do this manually...
|
|
||||||
t = soup('table')
|
|
||||||
if t:
|
|
||||||
if (t[0].previousSibling is None
|
|
||||||
or t[0].previousSibling.previousSibling is None):
|
|
||||||
t[0].extract()
|
|
||||||
if (t[-1].nextSibling is None
|
|
||||||
or t[-1].nextSibling.nextSibling is None):
|
|
||||||
t[-1].extract()
|
|
||||||
# for some very odd reason each page's content appears to be in a table
|
|
||||||
# too. and this table has sub-tables for random asides... grr.
|
|
||||||
|
|
||||||
# remove br at top of page if present after nav bars removed
|
|
||||||
br = soup('br')
|
|
||||||
if br:
|
|
||||||
if check_all_prev_empty(br[0].previousSibling):
|
|
||||||
br[0].extract()
|
|
||||||
|
|
||||||
# some images seem to be broken in some chm's :/
|
|
||||||
for img in soup('img'):
|
|
||||||
try:
|
|
||||||
# some are supposedly "relative"... lies.
|
|
||||||
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
|
||||||
# some have ";<junk>" at the end.
|
|
||||||
img['src'] = img['src'].split(';')[0]
|
|
||||||
except KeyError:
|
|
||||||
# and some don't even have a src= ?!
|
|
||||||
pass
|
|
||||||
# now give back some pretty html.
|
|
||||||
return soup.prettify()
|
|
||||||
|
|
||||||
def Contents(self):
|
|
||||||
if self._contents is not None:
|
|
||||||
return self._contents
|
|
||||||
paths = []
|
|
||||||
def get_paths(chm, ui, ctx):
|
|
||||||
# skip directories
|
|
||||||
# note this path refers to the internal CHM structure
|
|
||||||
if ui.path[-1] != '/':
|
|
||||||
# and make paths relative
|
|
||||||
paths.append(ui.path.lstrip('/'))
|
|
||||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
|
||||||
self._contents = paths
|
|
||||||
return self._contents
|
|
||||||
|
|
||||||
def _ensure_dir(self, path):
|
|
||||||
dir = os.path.dirname(path)
|
|
||||||
if not os.path.isdir(dir):
|
|
||||||
os.makedirs(dir)
|
|
||||||
|
|
||||||
def extract_content(self, output_dir=os.getcwdu()):
|
|
||||||
self.ExtractFiles(output_dir=output_dir)
|
|
||||||
|
|
||||||
|
|
||||||
class CHMInput(InputFormatPlugin):
|
class CHMInput(InputFormatPlugin):
|
||||||
|
|
||||||
name = 'CHM Input'
|
name = 'CHM Input'
|
||||||
@ -215,52 +19,49 @@ class CHMInput(InputFormatPlugin):
|
|||||||
description = 'Convert CHM files to OEB'
|
description = 'Convert CHM files to OEB'
|
||||||
file_types = set(['chm'])
|
file_types = set(['chm'])
|
||||||
|
|
||||||
options = set([
|
|
||||||
OptionRecommendation(name='dummy_option', recommended_value=False,
|
|
||||||
help=_('dummy option until real options are determined.')),
|
|
||||||
])
|
|
||||||
|
|
||||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||||
|
from calibre.ebooks.chm.reader import CHMReader
|
||||||
log.debug('Opening CHM file')
|
log.debug('Opening CHM file')
|
||||||
rdr = CHMReader(chm_path, log)
|
rdr = CHMReader(chm_path, log)
|
||||||
log.debug('Extracting CHM to %s' % output_dir)
|
log.debug('Extracting CHM to %s' % output_dir)
|
||||||
rdr.extract_content(output_dir)
|
rdr.extract_content(output_dir)
|
||||||
|
self._chm_reader = rdr
|
||||||
return rdr.hhc_path
|
return rdr.hhc_path
|
||||||
|
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
from calibre.ebooks.metadata.chm import get_metadata_
|
from calibre.ebooks.chm.metadata import get_metadata_from_reader
|
||||||
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
|
|
||||||
log.debug('Processing CHM...')
|
log.debug('Processing CHM...')
|
||||||
tdir = mkdtemp(prefix='chm2oeb_')
|
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
html_input = plugin_for_input_format('html')
|
||||||
html_input = plugin_for_input_format('html')
|
for opt in html_input.options:
|
||||||
for opt in html_input.options:
|
setattr(options, opt.option.name, opt.recommended_value)
|
||||||
setattr(options, opt.option.name, opt.recommended_value)
|
options.input_encoding = 'utf-8'
|
||||||
options.input_encoding = 'utf-8'
|
no_images = False #options.no_images
|
||||||
no_images = False #options.no_images
|
chm_name = stream.name
|
||||||
chm_name = stream.name
|
#chm_data = stream.read()
|
||||||
#chm_data = stream.read()
|
|
||||||
|
|
||||||
#closing stream so CHM can be opened by external library
|
#closing stream so CHM can be opened by external library
|
||||||
stream.close()
|
stream.close()
|
||||||
log.debug('tdir=%s' % tdir)
|
log.debug('tdir=%s' % tdir)
|
||||||
log.debug('stream.name=%s' % stream.name)
|
log.debug('stream.name=%s' % stream.name)
|
||||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||||
mainpath = os.path.join(tdir, mainname)
|
mainpath = os.path.join(tdir, mainname)
|
||||||
|
#raw_input()
|
||||||
|
|
||||||
metadata = get_metadata_(tdir)
|
metadata = get_metadata_from_reader(self._chm_reader)
|
||||||
|
|
||||||
odi = options.debug_pipeline
|
odi = options.debug_pipeline
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
# try a custom conversion:
|
# try a custom conversion:
|
||||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||||
# try using html converter:
|
# try using html converter:
|
||||||
htmlpath = self._create_html_root(mainpath, log)
|
htmlpath = self._create_html_root(mainpath, log)
|
||||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
||||||
options.debug_pipeline = odi
|
options.debug_pipeline = odi
|
||||||
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
||||||
shutil.rmtree(tdir)
|
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||||
@ -369,6 +170,9 @@ class CHMInput(InputFormatPlugin):
|
|||||||
# check that node is a normal node (not a comment, DOCTYPE, etc.)
|
# check that node is a normal node (not a comment, DOCTYPE, etc.)
|
||||||
# (normal nodes have string tags)
|
# (normal nodes have string tags)
|
||||||
if isinstance(node.tag, basestring):
|
if isinstance(node.tag, basestring):
|
||||||
|
from calibre.ebooks.chm.reader import match_string
|
||||||
|
|
||||||
|
chapter_path = None
|
||||||
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
|
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
|
||||||
for child in node:
|
for child in node:
|
||||||
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
|
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
|
||||||
|
157
src/calibre/ebooks/chm/metadata.py
Normal file
157
src/calibre/ebooks/chm/metadata.py
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import string_to_authors, MetaInformation
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
|
||||||
|
def _clean(s):
|
||||||
|
return s.replace(u'\u00a0', u' ')
|
||||||
|
|
||||||
|
def _detag(tag):
|
||||||
|
str = u""
|
||||||
|
for elem in tag:
|
||||||
|
if hasattr(elem, "contents"):
|
||||||
|
str += _detag(elem)
|
||||||
|
else:
|
||||||
|
str += _clean(elem)
|
||||||
|
return str
|
||||||
|
|
||||||
|
|
||||||
|
def _metadata_from_table(soup, searchfor):
|
||||||
|
td = soup.find('td', text=re.compile(searchfor, flags=re.I))
|
||||||
|
if td is None:
|
||||||
|
return None
|
||||||
|
td = td.parent
|
||||||
|
# there appears to be multiple ways of structuring the metadata
|
||||||
|
# on the home page. cue some nasty special-case hacks...
|
||||||
|
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I):
|
||||||
|
meta = _detag(td.findNextSibling('td'))
|
||||||
|
return re.sub('^:', '', meta).strip()
|
||||||
|
else:
|
||||||
|
meta = _detag(td)
|
||||||
|
return re.sub(r'^[^:]+:', '', meta).strip()
|
||||||
|
|
||||||
|
def _metadata_from_span(soup, searchfor):
|
||||||
|
span = soup.find('span', {'class': re.compile(searchfor, flags=re.I)})
|
||||||
|
if span is None:
|
||||||
|
return None
|
||||||
|
# this metadata might need some cleaning up still :/
|
||||||
|
return _detag(span.renderContents().strip())
|
||||||
|
|
||||||
|
def _get_authors(soup):
|
||||||
|
aut = (_metadata_from_span(soup, r'author')
|
||||||
|
or _metadata_from_table(soup, r'^\s*by\s*:?\s+'))
|
||||||
|
ans = [_('Unknown')]
|
||||||
|
if aut is not None:
|
||||||
|
ans = string_to_authors(aut)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def _get_publisher(soup):
|
||||||
|
return (_metadata_from_span(soup, 'imprint')
|
||||||
|
or _metadata_from_table(soup, 'publisher'))
|
||||||
|
|
||||||
|
def _get_isbn(soup):
|
||||||
|
return (_metadata_from_span(soup, 'isbn')
|
||||||
|
or _metadata_from_table(soup, 'isbn'))
|
||||||
|
|
||||||
|
def _get_comments(soup):
|
||||||
|
date = (_metadata_from_span(soup, 'cwdate')
|
||||||
|
or _metadata_from_table(soup, 'pub date'))
|
||||||
|
pages = ( _metadata_from_span(soup, 'pages')
|
||||||
|
or _metadata_from_table(soup, 'pages'))
|
||||||
|
try:
|
||||||
|
# date span can have copyright symbols in it...
|
||||||
|
date = date.replace(u'\u00a9', '').strip()
|
||||||
|
# and pages often comes as '(\d+ pages)'
|
||||||
|
pages = re.search(r'\d+', pages).group(0)
|
||||||
|
return u'Published %s, %s pages.' % (date, pages)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_cover(soup, rdr):
|
||||||
|
ans = None
|
||||||
|
try:
|
||||||
|
ans = soup.find('img', alt=re.compile('cover', flags=re.I))['src']
|
||||||
|
except TypeError:
|
||||||
|
# meeehh, no handy alt-tag goodness, try some hackery
|
||||||
|
# the basic idea behind this is that in general, the cover image
|
||||||
|
# has a height:width ratio of ~1.25, whereas most of the nav
|
||||||
|
# buttons are decidedly less than that.
|
||||||
|
# what we do in this is work out that ratio, take 1.25 off it and
|
||||||
|
# save the absolute value when we sort by this value, the smallest
|
||||||
|
# one is most likely to be the cover image, hopefully.
|
||||||
|
r = {}
|
||||||
|
for img in soup('img'):
|
||||||
|
try:
|
||||||
|
r[abs(float(img['height'])/float(img['width'])-1.25)] = img['src']
|
||||||
|
except KeyError:
|
||||||
|
# interestingly, occasionally the only image without height
|
||||||
|
# or width attrs is the cover...
|
||||||
|
r[0] = img['src']
|
||||||
|
l = r.keys()
|
||||||
|
l.sort()
|
||||||
|
ans = r[l[0]]
|
||||||
|
# this link comes from the internal html, which is in a subdir
|
||||||
|
if ans is not None:
|
||||||
|
try:
|
||||||
|
ans = rdr.GetFile(ans)
|
||||||
|
except:
|
||||||
|
ans = rdr.root + "/" + ans
|
||||||
|
try:
|
||||||
|
ans = rdr.GetFile(ans)
|
||||||
|
except:
|
||||||
|
ans = None
|
||||||
|
if ans is not None:
|
||||||
|
from PIL import Image
|
||||||
|
from cStringIO import StringIO
|
||||||
|
buf = StringIO()
|
||||||
|
try:
|
||||||
|
Image.open(StringIO(ans)).convert('RGB').save(buf, 'JPEG')
|
||||||
|
ans = buf.getvalue()
|
||||||
|
except:
|
||||||
|
ans = None
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def get_metadata_from_reader(rdr):
|
||||||
|
raw = rdr.GetFile(rdr.home)
|
||||||
|
home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0])
|
||||||
|
|
||||||
|
title = rdr.title
|
||||||
|
authors = _get_authors(home)
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
publisher = _get_publisher(home)
|
||||||
|
if publisher:
|
||||||
|
mi.publisher = publisher
|
||||||
|
isbn = _get_isbn(home)
|
||||||
|
if isbn:
|
||||||
|
mi.isbn = isbn
|
||||||
|
comments = _get_comments(home)
|
||||||
|
if comments:
|
||||||
|
mi.comments = comments
|
||||||
|
|
||||||
|
cdata = _get_cover(home, rdr)
|
||||||
|
if cdata is not None:
|
||||||
|
mi.cover_data = ('jpg', cdata)
|
||||||
|
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_metadata(stream):
|
||||||
|
with TemporaryFile('_chm_metadata.chm') as fname:
|
||||||
|
with open(fname, 'wb') as f:
|
||||||
|
f.write(stream.read())
|
||||||
|
from calibre.ebooks.chm.reader import CHMReader
|
||||||
|
rdr = CHMReader(fname, default_log)
|
||||||
|
return get_metadata_from_reader(rdr)
|
212
src/calibre/ebooks/chm/reader.py
Normal file
212
src/calibre/ebooks/chm/reader.py
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
''' CHM File decoding support '''
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
|
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||||
|
|
||||||
|
import os, re
|
||||||
|
from mimetypes import guess_type as guess_mimetype
|
||||||
|
|
||||||
|
from BeautifulSoup import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
|
from calibre.utils.chm.chm import CHMFile
|
||||||
|
from calibre.utils.chm.chmlib import (
|
||||||
|
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||||
|
chm_enumerate,
|
||||||
|
)
|
||||||
|
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
|
||||||
|
|
||||||
|
def match_string(s1, s2_already_lowered):
|
||||||
|
if s1 is not None and s2_already_lowered is not None:
|
||||||
|
if s1.lower()==s2_already_lowered:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_all_prev_empty(tag):
|
||||||
|
if tag is None:
|
||||||
|
return True
|
||||||
|
if tag.__class__ == NavigableString and not check_empty(tag):
|
||||||
|
return False
|
||||||
|
return check_all_prev_empty(tag.previousSibling)
|
||||||
|
|
||||||
|
def check_empty(s, rex = re.compile(r'\S')):
|
||||||
|
return rex.search(s) is None
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||||
|
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||||
|
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
||||||
|
parser.add_option("-t", "--title", action="store", type="string", \
|
||||||
|
dest="title", help=_("Set the book title"))
|
||||||
|
parser.add_option('--title-sort', action='store', type='string', default=None,
|
||||||
|
dest='title_sort', help=_('Set sort key for the title'))
|
||||||
|
parser.add_option("-a", "--author", action="store", type="string", \
|
||||||
|
dest="author", help=_("Set the author"))
|
||||||
|
parser.add_option('--author-sort', action='store', type='string', default=None,
|
||||||
|
dest='author_sort', help=_('Set sort key for the author'))
|
||||||
|
parser.add_option("-c", "--category", action="store", type="string", \
|
||||||
|
dest="category", help=_("The category this book belongs"
|
||||||
|
" to. E.g.: History"))
|
||||||
|
parser.add_option("--thumbnail", action="store", type="string", \
|
||||||
|
dest="thumbnail", help=_("Path to a graphic that will be"
|
||||||
|
" set as this files' thumbnail"))
|
||||||
|
parser.add_option("--comment", action="store", type="string", \
|
||||||
|
dest="freetext", help=_("Path to a txt file containing a comment."))
|
||||||
|
parser.add_option("--get-thumbnail", action="store_true", \
|
||||||
|
dest="get_thumbnail", default=False, \
|
||||||
|
help=_("Extract thumbnail from LRF file"))
|
||||||
|
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
||||||
|
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
||||||
|
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
||||||
|
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
||||||
|
parser.add_option('--get-cover', action='store_true', default=False,
|
||||||
|
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
||||||
|
parser.add_option('--bookid', action='store', type='string', default=None,
|
||||||
|
dest='book_id', help=_('Set book ID'))
|
||||||
|
parser.add_option('--font-delta', action='store', type='int', default=0,
|
||||||
|
dest='font_delta', help=_('Set font delta'))
|
||||||
|
return parser
|
||||||
|
|
||||||
|
class CHMError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class CHMReader(CHMFile):
|
||||||
|
def __init__(self, input, log):
|
||||||
|
CHMFile.__init__(self)
|
||||||
|
if not self.LoadCHM(input):
|
||||||
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
|
self.log = log
|
||||||
|
self._sourcechm = input
|
||||||
|
self._contents = None
|
||||||
|
self._playorder = 0
|
||||||
|
self._metadata = False
|
||||||
|
self._extracted = False
|
||||||
|
|
||||||
|
# location of '.hhc' file, which is the CHM TOC.
|
||||||
|
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||||
|
self.hhc_path = self.root + ".hhc"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||||
|
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||||
|
self._playorder += 1
|
||||||
|
for li in ul('li', recursive=False):
|
||||||
|
href = li.object('param', {'name': 'Local'})[0]['value']
|
||||||
|
if href.count('#'):
|
||||||
|
href, frag = href.split('#')
|
||||||
|
else:
|
||||||
|
frag = None
|
||||||
|
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
||||||
|
#print "========>", name
|
||||||
|
toc.add_item(href, frag, name, play_order=self._playorder)
|
||||||
|
self._playorder += 1
|
||||||
|
if li.ul:
|
||||||
|
child = self._parse_toc(li.ul)
|
||||||
|
child.parent = toc
|
||||||
|
toc.append(child)
|
||||||
|
#print toc
|
||||||
|
return toc
|
||||||
|
|
||||||
|
|
||||||
|
def GetFile(self, path):
|
||||||
|
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||||
|
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||||
|
# note this path refers to the internal CHM structure
|
||||||
|
if path[0] != '/':
|
||||||
|
path = '/' + path
|
||||||
|
res, ui = self.ResolveObject(path)
|
||||||
|
if res != CHM_RESOLVE_SUCCESS:
|
||||||
|
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||||
|
size, data = self.RetrieveObject(ui)
|
||||||
|
if size == 0:
|
||||||
|
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||||
|
return data
|
||||||
|
|
||||||
|
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||||
|
for path in self.Contents():
|
||||||
|
lpath = os.path.join(output_dir, path)
|
||||||
|
self._ensure_dir(lpath)
|
||||||
|
data = self.GetFile(path)
|
||||||
|
with open(lpath, 'wb') as f:
|
||||||
|
if guess_mimetype(path)[0] == ('text/html'):
|
||||||
|
data = self._reformat(data)
|
||||||
|
f.write(data)
|
||||||
|
self._extracted = True
|
||||||
|
files = os.listdir(output_dir)
|
||||||
|
if self.hhc_path not in files:
|
||||||
|
for f in files:
|
||||||
|
if f.lower() == self.hhc_path.lower():
|
||||||
|
self.hhc_path = f
|
||||||
|
break
|
||||||
|
|
||||||
|
def _reformat(self, data):
|
||||||
|
try:
|
||||||
|
soup = BeautifulSoup(data)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
# hit some strange encoding problems...
|
||||||
|
print "Unable to parse html for cleaning, leaving it :("
|
||||||
|
return data
|
||||||
|
# nuke javascript...
|
||||||
|
[s.extract() for s in soup('script')]
|
||||||
|
# remove forward and back nav bars from the top/bottom of each page
|
||||||
|
# cos they really fuck with the flow of things and generally waste space
|
||||||
|
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||||
|
# we'll have to do this manually...
|
||||||
|
t = soup('table')
|
||||||
|
if t:
|
||||||
|
if (t[0].previousSibling is None
|
||||||
|
or t[0].previousSibling.previousSibling is None):
|
||||||
|
t[0].extract()
|
||||||
|
if (t[-1].nextSibling is None
|
||||||
|
or t[-1].nextSibling.nextSibling is None):
|
||||||
|
t[-1].extract()
|
||||||
|
# for some very odd reason each page's content appears to be in a table
|
||||||
|
# too. and this table has sub-tables for random asides... grr.
|
||||||
|
|
||||||
|
# remove br at top of page if present after nav bars removed
|
||||||
|
br = soup('br')
|
||||||
|
if br:
|
||||||
|
if check_all_prev_empty(br[0].previousSibling):
|
||||||
|
br[0].extract()
|
||||||
|
|
||||||
|
# some images seem to be broken in some chm's :/
|
||||||
|
for img in soup('img'):
|
||||||
|
try:
|
||||||
|
# some are supposedly "relative"... lies.
|
||||||
|
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
||||||
|
# some have ";<junk>" at the end.
|
||||||
|
img['src'] = img['src'].split(';')[0]
|
||||||
|
except KeyError:
|
||||||
|
# and some don't even have a src= ?!
|
||||||
|
pass
|
||||||
|
# now give back some pretty html.
|
||||||
|
return soup.prettify()
|
||||||
|
|
||||||
|
def Contents(self):
|
||||||
|
if self._contents is not None:
|
||||||
|
return self._contents
|
||||||
|
paths = []
|
||||||
|
def get_paths(chm, ui, ctx):
|
||||||
|
# skip directories
|
||||||
|
# note this path refers to the internal CHM structure
|
||||||
|
if ui.path[-1] != '/':
|
||||||
|
# and make paths relative
|
||||||
|
paths.append(ui.path.lstrip('/'))
|
||||||
|
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||||
|
self._contents = paths
|
||||||
|
return self._contents
|
||||||
|
|
||||||
|
def _ensure_dir(self, path):
|
||||||
|
dir = os.path.dirname(path)
|
||||||
|
if not os.path.isdir(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir=os.getcwdu()):
|
||||||
|
self.ExtractFiles(output_dir=output_dir)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -13,6 +13,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
|
|||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre import extract, walk
|
from calibre import extract, walk
|
||||||
|
|
||||||
DEBUG_README=u'''
|
DEBUG_README=u'''
|
||||||
@ -726,6 +727,13 @@ OptionRecommendation(name='timestamp',
|
|||||||
else:
|
else:
|
||||||
os.makedirs(out_dir)
|
os.makedirs(out_dir)
|
||||||
self.dump_oeb(ret, out_dir)
|
self.dump_oeb(ret, out_dir)
|
||||||
|
if self.input_fmt == 'recipe':
|
||||||
|
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
|
||||||
|
'periodical.downloaded_recipe'), 'w')
|
||||||
|
zf.add_dir(out_dir)
|
||||||
|
with self.input_plugin:
|
||||||
|
self.input_plugin.save_download(zf)
|
||||||
|
zf.close()
|
||||||
|
|
||||||
self.log.info('Input debug saved to:', out_dir)
|
self.log.info('Input debug saved to:', out_dir)
|
||||||
|
|
||||||
@ -773,26 +781,29 @@ OptionRecommendation(name='timestamp',
|
|||||||
self.ui_reporter(0.01, _('Converting input to HTML...'))
|
self.ui_reporter(0.01, _('Converting input to HTML...'))
|
||||||
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
|
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
|
||||||
self.input_plugin.report_progress = ir
|
self.input_plugin.report_progress = ir
|
||||||
self.oeb = self.input_plugin(stream, self.opts,
|
with self.input_plugin:
|
||||||
self.input_fmt, self.log,
|
self.oeb = self.input_plugin(stream, self.opts,
|
||||||
accelerators, tdir)
|
self.input_fmt, self.log,
|
||||||
if self.opts.debug_pipeline is not None:
|
accelerators, tdir)
|
||||||
self.dump_input(self.oeb, tdir)
|
if self.opts.debug_pipeline is not None:
|
||||||
if self.abort_after_input_dump:
|
self.dump_input(self.oeb, tdir)
|
||||||
return
|
if self.abort_after_input_dump:
|
||||||
if self.input_fmt == 'recipe':
|
return
|
||||||
self.opts_to_mi(self.user_metadata)
|
if self.input_fmt in ('recipe', 'downloaded_recipe'):
|
||||||
if not hasattr(self.oeb, 'manifest'):
|
self.opts_to_mi(self.user_metadata)
|
||||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
|
if not hasattr(self.oeb, 'manifest'):
|
||||||
self.input_plugin)
|
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
|
||||||
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
self.input_plugin)
|
||||||
self.opts.is_image_collection = self.input_plugin.is_image_collection
|
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
||||||
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
self.opts.is_image_collection = self.input_plugin.is_image_collection
|
||||||
self.flush()
|
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
||||||
if self.opts.debug_pipeline is not None:
|
self.flush()
|
||||||
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
|
if self.opts.debug_pipeline is not None:
|
||||||
self.dump_oeb(self.oeb, out_dir)
|
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
|
||||||
self.log('Parsed HTML written to:', out_dir)
|
self.dump_oeb(self.oeb, out_dir)
|
||||||
|
self.log('Parsed HTML written to:', out_dir)
|
||||||
|
self.input_plugin.specialize(self.oeb, self.opts, self.log,
|
||||||
|
self.output_fmt)
|
||||||
|
|
||||||
pr(0., _('Running transforms on ebook...'))
|
pr(0., _('Running transforms on ebook...'))
|
||||||
|
|
||||||
@ -882,7 +893,8 @@ OptionRecommendation(name='timestamp',
|
|||||||
our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
|
our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
|
||||||
self.output_plugin.report_progress = our
|
self.output_plugin.report_progress = our
|
||||||
our(0., _('Creating')+' %s'%self.output_plugin.name)
|
our(0., _('Creating')+' %s'%self.output_plugin.name)
|
||||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
with self.output_plugin:
|
||||||
|
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||||
self.opts, self.log)
|
self.opts, self.log)
|
||||||
self.ui_reporter(1.)
|
self.ui_reporter(1.)
|
||||||
run_plugins_on_postprocess(self.output, self.output_fmt)
|
run_plugins_on_postprocess(self.output, self.output_fmt)
|
||||||
|
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, re, uuid
|
import os, uuid
|
||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
@ -19,8 +19,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
|
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
|
||||||
|
|
||||||
@classmethod
|
def decrypt_font(self, key, path):
|
||||||
def decrypt_font(cls, key, path):
|
|
||||||
raw = open(path, 'rb').read()
|
raw = open(path, 'rb').read()
|
||||||
crypt = raw[:1024]
|
crypt = raw[:1024]
|
||||||
key = cycle(iter(key))
|
key = cycle(iter(key))
|
||||||
@ -29,13 +28,18 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
f.write(decrypt)
|
f.write(decrypt)
|
||||||
f.write(raw[1024:])
|
f.write(raw[1024:])
|
||||||
|
|
||||||
@classmethod
|
def process_encryption(self, encfile, opf, log):
|
||||||
def process_encryption(cls, encfile, opf, log):
|
|
||||||
key = None
|
key = None
|
||||||
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
|
for item in opf.identifier_iter():
|
||||||
if m:
|
scheme = None
|
||||||
key = m.group(1)
|
for key in item.attrib.keys():
|
||||||
key = list(map(ord, uuid.UUID(key).bytes))
|
if key.endswith('scheme'):
|
||||||
|
scheme = item.get(key)
|
||||||
|
if (scheme and scheme.lower() == 'uuid') or \
|
||||||
|
(item.text and item.text.startswith('urn:uuid:')):
|
||||||
|
key = str(item.text).rpartition(':')[-1]
|
||||||
|
key = list(map(ord, uuid.UUID(key).bytes))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
root = etree.parse(encfile)
|
root = etree.parse(encfile)
|
||||||
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
|
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
|
||||||
@ -46,7 +50,8 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
uri = cr.get('URI')
|
uri = cr.get('URI')
|
||||||
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
|
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
cls.decrypt_font(key, path)
|
self._encrypted_font_uris.append(uri)
|
||||||
|
self.decrypt_font(key, path)
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
@ -115,14 +120,17 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
if opf is None:
|
if opf is None:
|
||||||
raise ValueError('%s is not a valid EPUB file'%path)
|
raise ValueError('%s is not a valid EPUB file'%path)
|
||||||
|
|
||||||
if os.path.exists(encfile):
|
|
||||||
if not self.process_encryption(encfile, opf, log):
|
|
||||||
raise DRMError(os.path.basename(path))
|
|
||||||
|
|
||||||
opf = os.path.relpath(opf, os.getcwdu())
|
opf = os.path.relpath(opf, os.getcwdu())
|
||||||
parts = os.path.split(opf)
|
parts = os.path.split(opf)
|
||||||
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
|
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
|
||||||
|
|
||||||
|
self._encrypted_font_uris = []
|
||||||
|
if os.path.exists(encfile):
|
||||||
|
if not self.process_encryption(encfile, opf, log):
|
||||||
|
raise DRMError(os.path.basename(path))
|
||||||
|
self.encrypted_fonts = self._encrypted_font_uris
|
||||||
|
|
||||||
|
|
||||||
if len(parts) > 1 and parts[0]:
|
if len(parts) > 1 and parts[0]:
|
||||||
delta = '/'.join(parts[:-1])+'/'
|
delta = '/'.join(parts[:-1])+'/'
|
||||||
for elem in opf.itermanifest():
|
for elem in opf.itermanifest():
|
||||||
|
@ -12,8 +12,9 @@ from urllib import unquote
|
|||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
from calibre import strftime, guess_type, prepare_string_for_xml
|
from calibre import strftime, guess_type, prepare_string_for_xml, CurrentDir
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
|
from calibre.constants import filesystem_encoding
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -157,11 +158,9 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
self.workaround_ade_quirks()
|
self.workaround_ade_quirks()
|
||||||
self.workaround_webkit_quirks()
|
self.workaround_webkit_quirks()
|
||||||
self.workaround_sony_quirks()
|
|
||||||
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
||||||
RescaleImages()(oeb, opts)
|
RescaleImages()(oeb, opts)
|
||||||
|
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.split import Split
|
from calibre.ebooks.oeb.transforms.split import Split
|
||||||
split = Split(not self.opts.dont_split_on_page_breaks,
|
split = Split(not self.opts.dont_split_on_page_breaks,
|
||||||
max_flow_size=self.opts.flow_size*1024
|
max_flow_size=self.opts.flow_size*1024
|
||||||
@ -170,6 +169,21 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
self.insert_cover()
|
self.insert_cover()
|
||||||
|
|
||||||
|
self.workaround_sony_quirks()
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import OPF
|
||||||
|
identifiers = oeb.metadata['identifier']
|
||||||
|
uuid = None
|
||||||
|
for x in identifiers:
|
||||||
|
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||||
|
uuid = unicode(x).split(':')[-1]
|
||||||
|
break
|
||||||
|
if uuid is None:
|
||||||
|
self.log.warn('No UUID identifier found')
|
||||||
|
from uuid import uuid4
|
||||||
|
uuid = str(uuid4())
|
||||||
|
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
|
||||||
|
|
||||||
with TemporaryDirectory('_epub_output') as tdir:
|
with TemporaryDirectory('_epub_output') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
oeb_output = plugin_for_output_format('oeb')
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
@ -177,10 +191,16 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||||
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
|
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
|
||||||
if x.endswith('.ncx')][0])
|
if x.endswith('.ncx')][0])
|
||||||
|
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
||||||
|
encryption = None
|
||||||
|
if encrypted_fonts:
|
||||||
|
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||||
|
|
||||||
from calibre.ebooks.epub import initialize_container
|
from calibre.ebooks.epub import initialize_container
|
||||||
epub = initialize_container(output_path, os.path.basename(opf))
|
epub = initialize_container(output_path, os.path.basename(opf))
|
||||||
epub.add_dir(tdir)
|
epub.add_dir(tdir)
|
||||||
|
if encryption is not None:
|
||||||
|
epub.writestr('META-INF/encryption.xml', encryption)
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
if os.path.exists(opts.extract_to):
|
if os.path.exists(opts.extract_to):
|
||||||
shutil.rmtree(opts.extract_to)
|
shutil.rmtree(opts.extract_to)
|
||||||
@ -189,6 +209,52 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
self.log.info('EPUB extracted to', opts.extract_to)
|
self.log.info('EPUB extracted to', opts.extract_to)
|
||||||
epub.close()
|
epub.close()
|
||||||
|
|
||||||
|
def encrypt_fonts(self, uris, tdir, uuid):
|
||||||
|
from binascii import unhexlify
|
||||||
|
|
||||||
|
key = re.sub(r'[^a-fA-F0-9]', '', uuid)
|
||||||
|
if len(key) < 16:
|
||||||
|
raise ValueError('UUID identifier %r is invalid'%uuid)
|
||||||
|
key = unhexlify((key + key)[:32])
|
||||||
|
key = tuple(map(ord, key))
|
||||||
|
paths = []
|
||||||
|
with CurrentDir(tdir):
|
||||||
|
paths = [os.path.join(*x.split('/')) for x in uris]
|
||||||
|
uris = dict(zip(uris, paths))
|
||||||
|
fonts = []
|
||||||
|
for uri in list(uris.keys()):
|
||||||
|
path = uris[uri]
|
||||||
|
if isinstance(path, unicode):
|
||||||
|
path = path.encode(filesystem_encoding)
|
||||||
|
if not os.path.exists(path):
|
||||||
|
uris.pop(uri)
|
||||||
|
continue
|
||||||
|
self.log.debug('Encrypting font:', uri)
|
||||||
|
with open(path, 'r+b') as f:
|
||||||
|
data = f.read(1024)
|
||||||
|
f.seek(0)
|
||||||
|
for i in range(1024):
|
||||||
|
f.write(chr(ord(data[i]) ^ key[i%16]))
|
||||||
|
if not isinstance(uri, unicode):
|
||||||
|
uri = uri.decode('utf-8')
|
||||||
|
fonts.append(u'''
|
||||||
|
<enc:EncryptedData>
|
||||||
|
<enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>
|
||||||
|
<enc:CipherData>
|
||||||
|
<enc:CipherReference URI="%s"/>
|
||||||
|
</enc:CipherData>
|
||||||
|
</enc:EncryptedData>
|
||||||
|
'''%(uri.replace('"', '\\"')))
|
||||||
|
if fonts:
|
||||||
|
ans = '''<encryption
|
||||||
|
xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
|
||||||
|
xmlns:enc="http://www.w3.org/2001/04/xmlenc#"
|
||||||
|
xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">
|
||||||
|
'''
|
||||||
|
ans += (u'\n'.join(fonts)).encode('utf-8')
|
||||||
|
ans += '\n</encryption>'
|
||||||
|
return ans
|
||||||
|
|
||||||
def default_cover(self):
|
def default_cover(self):
|
||||||
'''
|
'''
|
||||||
Create a generic cover for books that dont have a cover
|
Create a generic cover for books that dont have a cover
|
||||||
|
@ -20,7 +20,7 @@ from itertools import izip
|
|||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre.constants import islinux
|
from calibre.constants import islinux, isfreebsd
|
||||||
from calibre import unicode_path
|
from calibre import unicode_path
|
||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
self.added_resources = {}
|
self.added_resources = {}
|
||||||
self.log = log
|
self.log = log
|
||||||
for path, href in htmlfile_map.items():
|
for path, href in htmlfile_map.items():
|
||||||
if not islinux:
|
if not (islinux or isfreebsd):
|
||||||
path = path.lower()
|
path = path.lower()
|
||||||
self.added_resources[path] = href
|
self.added_resources[path] = href
|
||||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||||
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
if os.path.isdir(link):
|
if os.path.isdir(link):
|
||||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||||
return link_
|
return link_
|
||||||
if not islinux:
|
if not (islinux or isfreebsd):
|
||||||
link = link.lower()
|
link = link.lower()
|
||||||
if link not in self.added_resources:
|
if link not in self.added_resources:
|
||||||
bhref = os.path.basename(link)
|
bhref = os.path.basename(link)
|
||||||
|
@ -215,6 +215,28 @@ def merge_results(one, two):
|
|||||||
else:
|
else:
|
||||||
one[idx].smart_update(x)
|
one[idx].smart_update(x)
|
||||||
|
|
||||||
|
class MetadataSources(object):
|
||||||
|
|
||||||
|
def __init__(self, sources):
|
||||||
|
self.sources = sources
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
for s in self.sources:
|
||||||
|
s.__enter__()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
for s in self.sources:
|
||||||
|
s.__exit__()
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
for s in self.sources:
|
||||||
|
s(*args, **kwargs)
|
||||||
|
|
||||||
|
def join(self):
|
||||||
|
for s in self.sources:
|
||||||
|
s.join()
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||||
verbose=0):
|
verbose=0):
|
||||||
assert not(title is None and author is None and publisher is None and \
|
assert not(title is None and author is None and publisher is None and \
|
||||||
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
|||||||
if isbn is not None:
|
if isbn is not None:
|
||||||
isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
|
isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
|
||||||
fetchers = list(metadata_sources(isbndb_key=isbndb_key))
|
fetchers = list(metadata_sources(isbndb_key=isbndb_key))
|
||||||
|
with MetadataSources(fetchers) as manager:
|
||||||
|
manager(title, author, publisher, isbn, verbose)
|
||||||
|
manager.join()
|
||||||
|
|
||||||
for fetcher in fetchers:
|
|
||||||
fetcher(title, author, publisher, isbn, verbose)
|
|
||||||
for fetcher in fetchers:
|
|
||||||
fetcher.join()
|
|
||||||
results = list(fetchers[0].results)
|
results = list(fetchers[0].results)
|
||||||
for fetcher in fetchers[1:]:
|
for fetcher in fetchers[1:]:
|
||||||
merge_results(results, fetcher.results)
|
merge_results(results, fetcher.results)
|
||||||
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
|||||||
def get_social_metadata(mi, verbose=0):
|
def get_social_metadata(mi, verbose=0):
|
||||||
from calibre.customize.ui import metadata_sources
|
from calibre.customize.ui import metadata_sources
|
||||||
fetchers = list(metadata_sources(metadata_type='social'))
|
fetchers = list(metadata_sources(metadata_type='social'))
|
||||||
for fetcher in fetchers:
|
with MetadataSources(fetchers) as manager:
|
||||||
fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
|
manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
|
||||||
for fetcher in fetchers:
|
manager.join()
|
||||||
fetcher.join()
|
|
||||||
ratings, tags, comments = [], set([]), set([])
|
ratings, tags, comments = [], set([]), set([])
|
||||||
for fetcher in fetchers:
|
for fetcher in fetchers:
|
||||||
if fetcher.results:
|
if fetcher.results:
|
||||||
|
@ -70,6 +70,17 @@ def is_recipe(filename):
|
|||||||
filename.rpartition('.')[0].endswith('_recipe_out')
|
filename.rpartition('.')[0].endswith('_recipe_out')
|
||||||
|
|
||||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||||
|
pos = 0
|
||||||
|
if hasattr(stream, 'tell'):
|
||||||
|
pos = stream.tell()
|
||||||
|
try:
|
||||||
|
return _get_metadata(stream, stream_type, use_libprs_metadata)
|
||||||
|
finally:
|
||||||
|
if hasattr(stream, 'seek'):
|
||||||
|
stream.seek(pos)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_metadata(stream, stream_type, use_libprs_metadata):
|
||||||
if stream_type: stream_type = stream_type.lower()
|
if stream_type: stream_type = stream_type.lower()
|
||||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
||||||
stream_type = 'html'
|
stream_type = 'html'
|
||||||
|
@ -97,9 +97,14 @@ class MetadataUpdater(object):
|
|||||||
|
|
||||||
self.nrecs, = unpack('>H', data[76:78])
|
self.nrecs, = unpack('>H', data[76:78])
|
||||||
record0 = self.record0 = self.record(0)
|
record0 = self.record0 = self.record(0)
|
||||||
|
mobi_header_length, = unpack('>I', record0[0x14:0x18])
|
||||||
|
if not mobi_header_length:
|
||||||
|
raise MobiError("Non-standard file format. Try 'Convert E-Books' with MOBI as Input and Output formats.")
|
||||||
|
|
||||||
self.encryption_type, = unpack('>H', record0[12:14])
|
self.encryption_type, = unpack('>H', record0[12:14])
|
||||||
codepage, = unpack('>I', record0[28:32])
|
codepage, = unpack('>I', record0[28:32])
|
||||||
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
|
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
|
||||||
|
|
||||||
image_base, = unpack('>I', record0[108:112])
|
image_base, = unpack('>I', record0[108:112])
|
||||||
flags, = self.flags, = unpack('>I', record0[128:132])
|
flags, = self.flags, = unpack('>I', record0[128:132])
|
||||||
have_exth = self.have_exth = (flags & 0x40) != 0
|
have_exth = self.have_exth = (flags & 0x40) != 0
|
||||||
@ -306,9 +311,10 @@ class MetadataUpdater(object):
|
|||||||
return StreamSlicer(self.stream, start, stop)
|
return StreamSlicer(self.stream, start, stop)
|
||||||
|
|
||||||
def update(self, mi):
|
def update(self, mi):
|
||||||
def pop_exth_record(exth_id):
|
def update_exth_record(rec):
|
||||||
if exth_id in self.original_exth_records:
|
recs.append(rec)
|
||||||
self.original_exth_records.pop(exth_id)
|
if rec[0] in self.original_exth_records:
|
||||||
|
self.original_exth_records.pop(rec[0])
|
||||||
|
|
||||||
if self.type != "BOOKMOBI":
|
if self.type != "BOOKMOBI":
|
||||||
raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
|
raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
|
||||||
@ -323,47 +329,36 @@ class MetadataUpdater(object):
|
|||||||
pas = False
|
pas = False
|
||||||
if mi.author_sort and pas:
|
if mi.author_sort and pas:
|
||||||
authors = mi.author_sort
|
authors = mi.author_sort
|
||||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
update_exth_record((100, authors.encode(self.codec, 'replace')))
|
||||||
pop_exth_record(100)
|
|
||||||
elif mi.authors:
|
elif mi.authors:
|
||||||
authors = '; '.join(mi.authors)
|
authors = '; '.join(mi.authors)
|
||||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
update_exth_record((100, authors.encode(self.codec, 'replace')))
|
||||||
pop_exth_record(100)
|
|
||||||
if mi.publisher:
|
if mi.publisher:
|
||||||
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
|
update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
|
||||||
pop_exth_record(101)
|
|
||||||
if mi.comments:
|
if mi.comments:
|
||||||
recs.append((103, mi.comments.encode(self.codec, 'replace')))
|
update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
|
||||||
pop_exth_record(103)
|
|
||||||
if mi.isbn:
|
if mi.isbn:
|
||||||
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
|
update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
|
||||||
pop_exth_record(104)
|
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
subjects = '; '.join(mi.tags)
|
subjects = '; '.join(mi.tags)
|
||||||
recs.append((105, subjects.encode(self.codec, 'replace')))
|
update_exth_record((105, subjects.encode(self.codec, 'replace')))
|
||||||
pop_exth_record(105)
|
|
||||||
if mi.pubdate:
|
if mi.pubdate:
|
||||||
recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
|
update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace')))
|
||||||
pop_exth_record(106)
|
|
||||||
elif mi.timestamp:
|
elif mi.timestamp:
|
||||||
recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
|
update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace')))
|
||||||
pop_exth_record(106)
|
|
||||||
elif self.timestamp:
|
elif self.timestamp:
|
||||||
recs.append((106, self.timestamp))
|
update_exth_record((106, self.timestamp))
|
||||||
pop_exth_record(106)
|
|
||||||
else:
|
else:
|
||||||
recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
|
update_exth_record((106, nowf().isoformat().encode(self.codec, 'replace')))
|
||||||
pop_exth_record(106)
|
|
||||||
if self.cover_record is not None:
|
if self.cover_record is not None:
|
||||||
recs.append((201, pack('>I', self.cover_rindex)))
|
update_exth_record((201, pack('>I', self.cover_rindex)))
|
||||||
recs.append((203, pack('>I', 0)))
|
update_exth_record((203, pack('>I', 0)))
|
||||||
pop_exth_record(201)
|
|
||||||
pop_exth_record(203)
|
|
||||||
if self.thumbnail_record is not None:
|
if self.thumbnail_record is not None:
|
||||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
update_exth_record((202, pack('>I', self.thumbnail_rindex)))
|
||||||
pop_exth_record(202)
|
if 503 in self.original_exth_records:
|
||||||
|
update_exth_record((503, mi.title.encode(self.codec, 'replace')))
|
||||||
|
|
||||||
# Restore any original EXTH fields that weren't updated
|
# Include remaining original EXTH fields
|
||||||
for id in sorted(self.original_exth_records):
|
for id in sorted(self.original_exth_records):
|
||||||
recs.append((id, self.original_exth_records[id]))
|
recs.append((id, self.original_exth_records[id]))
|
||||||
recs = sorted(recs, key=lambda x:(x[0],x[0]))
|
recs = sorted(recs, key=lambda x:(x[0],x[0]))
|
||||||
|
@ -779,6 +779,9 @@ class OPF(object):
|
|||||||
self.set_text(matches[0], unicode(val))
|
self.set_text(matches[0], unicode(val))
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
|
def identifier_iter(self):
|
||||||
|
for item in self.identifier_path(self.metadata):
|
||||||
|
yield item
|
||||||
|
|
||||||
def guess_cover(self):
|
def guess_cover(self):
|
||||||
'''
|
'''
|
||||||
|
@ -8,9 +8,10 @@ Read metadata from RAR archives
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from cStringIO import StringIO
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
|
||||||
from calibre.libunrar import extract_member, names
|
from calibre.libunrar import extract_member, names
|
||||||
|
from calibre import CurrentDir
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
from calibre.ebooks.metadata.archive import is_comic
|
from calibre.ebooks.metadata.archive import is_comic
|
||||||
@ -32,8 +33,10 @@ def get_metadata(stream):
|
|||||||
stream_type = stream_type[1:]
|
stream_type = stream_type[1:]
|
||||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||||
'rb', 'imp', 'pdf', 'lrf'):
|
'rb', 'imp', 'pdf', 'lrf'):
|
||||||
data = extract_member(path, match=None, name=f)[1]
|
with TemporaryDirectory() as tdir:
|
||||||
stream = StringIO(data)
|
with CurrentDir(tdir):
|
||||||
|
stream = extract_member(path, match=None, name=f,
|
||||||
|
as_file=True)[1]
|
||||||
return get_metadata(stream, stream_type)
|
return get_metadata(stream, stream_type)
|
||||||
raise ValueError('No ebook found in RAR archive')
|
raise ValueError('No ebook found in RAR archive')
|
||||||
|
|
||||||
|
@ -149,7 +149,8 @@ class TOC(list):
|
|||||||
|
|
||||||
def read_ncx_toc(self, toc):
|
def read_ncx_toc(self, toc):
|
||||||
self.base_path = os.path.dirname(toc)
|
self.base_path = os.path.dirname(toc)
|
||||||
soup = NCXSoup(xml_to_unicode(open(toc, 'rb').read())[0])
|
raw = xml_to_unicode(open(toc, 'rb').read(), assume_utf8=True)[0]
|
||||||
|
soup = NCXSoup(raw)
|
||||||
|
|
||||||
def process_navpoint(np, dest):
|
def process_navpoint(np, dest):
|
||||||
play_order = np.get('playOrder', None)
|
play_order = np.get('playOrder', None)
|
||||||
@ -160,7 +161,7 @@ class TOC(list):
|
|||||||
if nl is not None:
|
if nl is not None:
|
||||||
text = u''
|
text = u''
|
||||||
for txt in nl.findAll(re.compile('text')):
|
for txt in nl.findAll(re.compile('text')):
|
||||||
text += ''.join([unicode(s) for s in txt.findAll(text=True)])
|
text += u''.join([unicode(s) for s in txt.findAll(text=True)])
|
||||||
content = np.find(re.compile('content'))
|
content = np.find(re.compile('content'))
|
||||||
if content is None or not content.has_key('src') or not txt:
|
if content is None or not content.has_key('src') or not txt:
|
||||||
return
|
return
|
||||||
|
@ -43,6 +43,8 @@ def read_metadata_(task, tdir, notification=lambda x,y:x):
|
|||||||
import_map = {}
|
import_map = {}
|
||||||
for format in formats:
|
for format in formats:
|
||||||
nfp = run_plugins_on_import(format)
|
nfp = run_plugins_on_import(format)
|
||||||
|
if nfp is None:
|
||||||
|
nfp = format
|
||||||
nfp = os.path.abspath(nfp)
|
nfp = os.path.abspath(nfp)
|
||||||
if isinstance(nfp, unicode):
|
if isinstance(nfp, unicode):
|
||||||
nfp.encode(filesystem_encoding)
|
nfp.encode(filesystem_encoding)
|
||||||
|
@ -3,9 +3,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from zipfile import ZipFile
|
|
||||||
from cStringIO import StringIO
|
|
||||||
|
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre import CurrentDir
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
@ -23,8 +24,10 @@ def get_metadata(stream):
|
|||||||
stream_type = stream_type[1:]
|
stream_type = stream_type[1:]
|
||||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||||
'rb', 'imp', 'pdf', 'lrf'):
|
'rb', 'imp', 'pdf', 'lrf'):
|
||||||
stream = StringIO(zf.read(f))
|
with TemporaryDirectory() as tdir:
|
||||||
return get_metadata(stream, stream_type)
|
with CurrentDir(tdir):
|
||||||
|
path = zf.extract(f)
|
||||||
|
return get_metadata(open(path, 'rb'), stream_type)
|
||||||
raise ValueError('No ebook found in ZIP archive')
|
raise ValueError('No ebook found in ZIP archive')
|
||||||
|
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
MobiWriter, PALMDOC, UNCOMPRESSED
|
MobiWriter, PALMDOC, UNCOMPRESSED
|
||||||
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
||||||
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||||
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||||
@ -163,8 +163,11 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
tocadder(oeb, opts)
|
tocadder(oeb, opts)
|
||||||
mangler = CaseMangler()
|
mangler = CaseMangler()
|
||||||
mangler(oeb, opts)
|
mangler(oeb, opts)
|
||||||
rasterizer = SVGRasterizer()
|
try:
|
||||||
rasterizer(oeb, opts)
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer(oeb, opts)
|
||||||
|
except Unavailable:
|
||||||
|
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
||||||
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||||
mobimlizer(oeb, opts)
|
mobimlizer(oeb, opts)
|
||||||
self.check_for_periodical()
|
self.check_for_periodical()
|
||||||
|
@ -4,12 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Read data from .mobi files
|
Read data from .mobi files
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import functools
|
import functools, shutil, os, re, struct, textwrap, cStringIO, sys
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import struct
|
|
||||||
import textwrap
|
|
||||||
import cStringIO
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
@ -619,6 +614,16 @@ class MobiReader(object):
|
|||||||
* opf.cover.split('/'))):
|
* opf.cover.split('/'))):
|
||||||
opf.cover = None
|
opf.cover = None
|
||||||
|
|
||||||
|
cover = opf.cover
|
||||||
|
if cover is not None:
|
||||||
|
cover = cover.replace('/', os.sep)
|
||||||
|
if os.path.exists(cover):
|
||||||
|
ncover = 'images'+os.sep+'calibre_cover.jpg'
|
||||||
|
if os.path.exists(ncover):
|
||||||
|
os.remove(ncover)
|
||||||
|
shutil.copyfile(cover, ncover)
|
||||||
|
opf.cover = ncover.replace(os.sep, '/')
|
||||||
|
|
||||||
manifest = [(htmlfile, 'application/xhtml+xml'),
|
manifest = [(htmlfile, 'application/xhtml+xml'),
|
||||||
(os.path.abspath('styles.css'), 'text/css')]
|
(os.path.abspath('styles.css'), 'text/css')]
|
||||||
bp = os.path.dirname(htmlfile)
|
bp = os.path.dirname(htmlfile)
|
||||||
@ -796,15 +801,22 @@ class MobiReader(object):
|
|||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
log = Log()
|
log = Log()
|
||||||
|
|
||||||
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
|
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
|
||||||
try:
|
mh = MetadataHeader(stream, log)
|
||||||
mh = MetadataHeader(stream, log)
|
if mh.title and mh.title != _('Unknown'):
|
||||||
|
mi.title = mh.title
|
||||||
|
|
||||||
if mh.exth is not None:
|
if mh.exth is not None:
|
||||||
if mh.exth.mi is not None:
|
if mh.exth.mi is not None:
|
||||||
mi = mh.exth.mi
|
mi = mh.exth.mi
|
||||||
else:
|
else:
|
||||||
|
size = sys.maxint
|
||||||
|
if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
|
||||||
|
pos = stream.tell()
|
||||||
|
stream.seek(0, 2)
|
||||||
|
size = stream.tell()
|
||||||
|
stream.seek(pos)
|
||||||
|
if size < 4*1024*1024:
|
||||||
with TemporaryDirectory('_mobi_meta_reader') as tdir:
|
with TemporaryDirectory('_mobi_meta_reader') as tdir:
|
||||||
with CurrentDir(tdir):
|
with CurrentDir(tdir):
|
||||||
mr = MobiReader(stream, log)
|
mr = MobiReader(stream, log)
|
||||||
@ -812,16 +824,18 @@ def get_metadata(stream):
|
|||||||
mr.extract_content(tdir, parse_cache)
|
mr.extract_content(tdir, parse_cache)
|
||||||
if mr.embedded_mi is not None:
|
if mr.embedded_mi is not None:
|
||||||
mi = mr.embedded_mi
|
mi = mr.embedded_mi
|
||||||
if hasattr(mh.exth, 'cover_offset'):
|
if hasattr(mh.exth, 'cover_offset'):
|
||||||
cover_index = mh.first_image_index + mh.exth.cover_offset
|
cover_index = mh.first_image_index + mh.exth.cover_offset
|
||||||
data = mh.section_data(int(cover_index))
|
data = mh.section_data(int(cover_index))
|
||||||
else:
|
else:
|
||||||
data = mh.section_data(mh.first_image_index)
|
data = mh.section_data(mh.first_image_index)
|
||||||
buf = cStringIO.StringIO(data)
|
buf = cStringIO.StringIO(data)
|
||||||
|
try:
|
||||||
im = PILImage.open(buf)
|
im = PILImage.open(buf)
|
||||||
obuf = cStringIO.StringIO()
|
|
||||||
im.convert('RGBA').save(obuf, format='JPEG')
|
|
||||||
mi.cover_data = ('jpg', obuf.getvalue())
|
|
||||||
except:
|
except:
|
||||||
log.exception()
|
log.exception('Failed to read MOBI cover')
|
||||||
|
else:
|
||||||
|
obuf = cStringIO.StringIO()
|
||||||
|
im.convert('RGB').save(obuf, format='JPEG')
|
||||||
|
mi.cover_data = ('jpg', obuf.getvalue())
|
||||||
return mi
|
return mi
|
||||||
|
@ -152,13 +152,17 @@ class EbookIterator(object):
|
|||||||
prints('Substituting font family: %s -> %s'%(bad, good))
|
prints('Substituting font family: %s -> %s'%(bad, good))
|
||||||
return match.group().replace(bad, '"%s"'%good)
|
return match.group().replace(bad, '"%s"'%good)
|
||||||
|
|
||||||
|
from calibre.ebooks.chardet import force_encoding
|
||||||
for csspath in css_files:
|
for csspath in css_files:
|
||||||
with open(csspath, 'r+b') as f:
|
with open(csspath, 'r+b') as f:
|
||||||
css = f.read()
|
css = f.read()
|
||||||
css = font_family_pat.sub(prepend_embedded_font, css)
|
enc = force_encoding(css, False)
|
||||||
f.seek(0)
|
css = css.decode(enc, 'replace')
|
||||||
f.truncate()
|
ncss = font_family_pat.sub(prepend_embedded_font, css)
|
||||||
f.write(css)
|
if ncss != css:
|
||||||
|
f.seek(0)
|
||||||
|
f.truncate()
|
||||||
|
f.write(ncss.encode(enc))
|
||||||
|
|
||||||
def __enter__(self, processed=False):
|
def __enter__(self, processed=False):
|
||||||
self.delete_on_exit = []
|
self.delete_on_exit = []
|
||||||
@ -173,11 +177,12 @@ class EbookIterator(object):
|
|||||||
plumber.opts.no_process = True
|
plumber.opts.no_process = True
|
||||||
|
|
||||||
plumber.input_plugin.for_viewer = True
|
plumber.input_plugin.for_viewer = True
|
||||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
with plumber.input_plugin:
|
||||||
|
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||||
plumber.opts, plumber.input_fmt, self.log,
|
plumber.opts, plumber.input_fmt, self.log,
|
||||||
{}, self.base)
|
{}, self.base)
|
||||||
|
|
||||||
if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \
|
if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
|
||||||
not hasattr(self.pathtoopf, 'manifest'):
|
not hasattr(self.pathtoopf, 'manifest'):
|
||||||
self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
|
self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
|
||||||
plumber.input_plugin)
|
plumber.input_plugin)
|
||||||
|
@ -331,7 +331,10 @@ class OEBReader(object):
|
|||||||
id = child.get('id')
|
id = child.get('id')
|
||||||
klass = child.get('class', 'chapter')
|
klass = child.get('class', 'chapter')
|
||||||
|
|
||||||
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
|
try:
|
||||||
|
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
|
||||||
|
except:
|
||||||
|
po = self.oeb.toc.next_play_order()
|
||||||
|
|
||||||
authorElement = xpath(child,
|
authorElement = xpath(child,
|
||||||
'descendant::calibre:meta[@name = "author"]')
|
'descendant::calibre:meta[@name = "author"]')
|
||||||
|
@ -190,11 +190,11 @@ class Stylizer(object):
|
|||||||
selector = CSSSelector(ntext)
|
selector = CSSSelector(ntext)
|
||||||
matches = selector(tree)
|
matches = selector(tree)
|
||||||
|
|
||||||
if not matches and class_sel_pat.match(text):
|
if not matches and class_sel_pat.match(text) and text.lower() != text:
|
||||||
found = False
|
found = False
|
||||||
|
ltext = text.lower()
|
||||||
for x in tree.xpath('//*[@class]'):
|
for x in tree.xpath('//*[@class]'):
|
||||||
if text.lower().endswith('.'+x.get('class').lower()) and \
|
if ltext.endswith('.'+x.get('class').lower()):
|
||||||
text.lower() != text:
|
|
||||||
matches.append(x)
|
matches.append(x)
|
||||||
found = True
|
found = True
|
||||||
if found:
|
if found:
|
||||||
|
@ -27,11 +27,14 @@ from calibre.ebooks.oeb.stylizer import Stylizer
|
|||||||
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
|
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
|
||||||
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
|
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
|
||||||
|
|
||||||
|
class Unavailable(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class SVGRasterizer(object):
|
class SVGRasterizer(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
from calibre.gui2 import is_ok_to_use_qt
|
from calibre.gui2 import is_ok_to_use_qt
|
||||||
if not is_ok_to_use_qt():
|
if not is_ok_to_use_qt():
|
||||||
raise Exception('Not OK to use Qt')
|
raise Unavailable('Not OK to use Qt')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def config(cls, cfg):
|
def config(cls, cfg):
|
||||||
|
@ -29,7 +29,7 @@ class RescaleImages(object):
|
|||||||
|
|
||||||
|
|
||||||
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
||||||
if not self.opts.is_image_collection:
|
if not getattr(self.opts, 'is_image_collection', False):
|
||||||
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
|
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
|
||||||
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
|
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
|
@ -11,12 +11,14 @@ class PDBError(Exception):
|
|||||||
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
||||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
||||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
||||||
|
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
|
||||||
|
|
||||||
FORMAT_READERS = {
|
FORMAT_READERS = {
|
||||||
'PNPdPPrs': ereader_reader,
|
'PNPdPPrs': ereader_reader,
|
||||||
'PNRdPPrs': ereader_reader,
|
'PNRdPPrs': ereader_reader,
|
||||||
'zTXTGPlm': ztxt_reader,
|
'zTXTGPlm': ztxt_reader,
|
||||||
'TEXtREAd': palmdoc_reader,
|
'TEXtREAd': palmdoc_reader,
|
||||||
|
'.pdfADBE': pdf_reader,
|
||||||
}
|
}
|
||||||
|
|
||||||
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
||||||
@ -34,8 +36,8 @@ IDENTITY_TO_NAME = {
|
|||||||
'PNRdPPrs': 'eReader',
|
'PNRdPPrs': 'eReader',
|
||||||
'zTXTGPlm': 'zTXT',
|
'zTXTGPlm': 'zTXT',
|
||||||
'TEXtREAd': 'PalmDOC',
|
'TEXtREAd': 'PalmDOC',
|
||||||
|
|
||||||
'.pdfADBE': 'Adobe Reader',
|
'.pdfADBE': 'Adobe Reader',
|
||||||
|
|
||||||
'BVokBDIC': 'BDicty',
|
'BVokBDIC': 'BDicty',
|
||||||
'DB99DBOS': 'DB (Database program)',
|
'DB99DBOS': 'DB (Database program)',
|
||||||
'vIMGView': 'FireViewer (ImageViewer)',
|
'vIMGView': 'FireViewer (ImageViewer)',
|
||||||
|
0
src/calibre/ebooks/pdb/pdf/__init__.py
Normal file
0
src/calibre/ebooks/pdb/pdf/__init__.py
Normal file
37
src/calibre/ebooks/pdb/pdf/reader.py
Normal file
37
src/calibre/ebooks/pdb/pdf/reader.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from palmdoc pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
|
||||||
|
class Reader(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, options):
|
||||||
|
self.header = header
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
self.options = options
|
||||||
|
setattr(self.options, 'new_pdf_engine', False)
|
||||||
|
setattr(self.options, 'no_images', False)
|
||||||
|
setattr(self.options, 'unwrap_factor', 0.5)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
self.log.info('Extracting PDF...')
|
||||||
|
|
||||||
|
with TemporaryFile() as pdf_n:
|
||||||
|
pdf = open(pdf_n, 'rwb')
|
||||||
|
for x in xrange(self.header.section_count()):
|
||||||
|
pdf.write(self.header.section_data(x))
|
||||||
|
|
||||||
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
|
pdf.seek(0)
|
||||||
|
return plugin_for_input_format('pdf').convert(pdf, self.options,
|
||||||
|
'pdf', self.log, [])
|
@ -13,7 +13,7 @@ from functools import partial
|
|||||||
|
|
||||||
from calibre.ebooks import ConversionError, DRMError
|
from calibre.ebooks import ConversionError, DRMError
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre import isosx, iswindows, islinux
|
from calibre import isosx, iswindows, islinux, isfreebsd
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
|
|
||||||
PDFTOHTML = 'pdftohtml'
|
PDFTOHTML = 'pdftohtml'
|
||||||
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
|
|||||||
if iswindows and hasattr(sys, 'frozen'):
|
if iswindows and hasattr(sys, 'frozen'):
|
||||||
PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
|
PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
|
||||||
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
|
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
|
||||||
if islinux and getattr(sys, 'frozen_path', False):
|
if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
|
||||||
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
|
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
|
||||||
|
|
||||||
def pdftohtml(output_dir, pdf_path, no_images):
|
def pdftohtml(output_dir, pdf_path, no_images):
|
||||||
|
@ -72,14 +72,14 @@ class PML_HTMLizer(object):
|
|||||||
'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
|
'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
|
||||||
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
|
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
|
||||||
'r': ('<div style="text-align: right;">', '</div>'),
|
'r': ('<div style="text-align: right;">', '</div>'),
|
||||||
't': ('<div style="margin-left: 5%;">', '</div>'),
|
't': ('<div style="text-indent: 5%;">', '</div>'),
|
||||||
'T': ('<div style="margin-left: %s;">', '</div>'),
|
'T': ('<div style="text-indent: %s;">', '</div>'),
|
||||||
'i': ('<span style="font-style: italic;">', '</span>'),
|
'i': ('<span style="font-style: italic;">', '</span>'),
|
||||||
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
||||||
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
||||||
'b': ('<span style="font-weight: bold;">', '</span>'),
|
'b': ('<span style="font-weight: bold;">', '</span>'),
|
||||||
'l': ('<span style="font-size: 150%;">', '</span>'),
|
'l': ('<span style="font-size: 150%;">', '</span>'),
|
||||||
'k': ('<span style="font-size: 75%;">', '</span>'),
|
'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
|
||||||
'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
|
'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
|
||||||
'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
|
'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
|
||||||
}
|
}
|
||||||
@ -154,6 +154,11 @@ class PML_HTMLizer(object):
|
|||||||
self.file_name = ''
|
self.file_name = ''
|
||||||
|
|
||||||
def prepare_pml(self, pml):
|
def prepare_pml(self, pml):
|
||||||
|
# Give Chapters the form \\*='text'text\\*. This is used for generating
|
||||||
|
# the TOC later.
|
||||||
|
pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
|
||||||
|
pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
|
||||||
|
|
||||||
# Remove comments
|
# Remove comments
|
||||||
pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
|
pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
|
||||||
|
|
||||||
@ -163,7 +168,7 @@ class PML_HTMLizer(object):
|
|||||||
pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
|
pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
|
||||||
pml = re.sub(r'(?mus)^[ ]*$', '', pml)
|
pml = re.sub(r'(?mus)^[ ]*$', '', pml)
|
||||||
|
|
||||||
# Footnotes and Sidebars
|
# Footnotes and Sidebars.
|
||||||
pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
||||||
pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
|
||||||
|
|
||||||
@ -171,9 +176,7 @@ class PML_HTMLizer(object):
|
|||||||
# &. It will display as &
|
# &. It will display as &
|
||||||
pml = pml.replace('&', '&')
|
pml = pml.replace('&', '&')
|
||||||
|
|
||||||
pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
|
# Replace \\a and \\U with either the unicode character or the entity.
|
||||||
pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
|
|
||||||
|
|
||||||
pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
|
pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
|
||||||
pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
|
pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
|
||||||
|
|
||||||
@ -536,6 +539,7 @@ class PML_HTMLizer(object):
|
|||||||
elif '%s%s' % (c, l) == 'Sd':
|
elif '%s%s' % (c, l) == 'Sd':
|
||||||
text = self.process_code('Sd', line, 'sb')
|
text = self.process_code('Sd', line, 'sb')
|
||||||
elif c in 'xXC':
|
elif c in 'xXC':
|
||||||
|
empty = False
|
||||||
# The PML was modified eariler so x and X put the text
|
# The PML was modified eariler so x and X put the text
|
||||||
# inside of ="" so we don't have do special processing
|
# inside of ="" so we don't have do special processing
|
||||||
# for C.
|
# for C.
|
||||||
@ -578,10 +582,7 @@ class PML_HTMLizer(object):
|
|||||||
else:
|
else:
|
||||||
if c != ' ':
|
if c != ' ':
|
||||||
empty = False
|
empty = False
|
||||||
if self.state['k'][0]:
|
text = c
|
||||||
text = c.upper()
|
|
||||||
else:
|
|
||||||
text = c
|
|
||||||
parsed.append(text)
|
parsed.append(text)
|
||||||
c = line.read(1)
|
c = line.read(1)
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ class PMLMLizer(object):
|
|||||||
if item.href in self.link_hrefs.keys():
|
if item.href in self.link_hrefs.keys():
|
||||||
toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
|
toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
|
||||||
else:
|
else:
|
||||||
self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
|
self.oeb_book.warn('Ignoring toc item: %s not found in document.' % item)
|
||||||
return ''.join(toc)
|
return ''.join(toc)
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
|
@ -131,9 +131,9 @@ class RtfTokenParser():
|
|||||||
if isString(self.tokens[i].name, "\\'"):
|
if isString(self.tokens[i].name, "\\'"):
|
||||||
i = i + 1
|
i = i + 1
|
||||||
if not isinstance(self.tokens[i], tokenData):
|
if not isinstance(self.tokens[i], tokenData):
|
||||||
raise BaseException('Error: token8bitChar without data.')
|
raise Exception('Error: token8bitChar without data.')
|
||||||
if len(self.tokens[i].data) < 2:
|
if len(self.tokens[i].data) < 2:
|
||||||
raise BaseException('Error: token8bitChar without data.')
|
raise Exception('Error: token8bitChar without data.')
|
||||||
newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
|
newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
|
||||||
if len(self.tokens[i].data) > 2:
|
if len(self.tokens[i].data) > 2:
|
||||||
newTokens.append(tokenData(self.tokens[i].data[2:]))
|
newTokens.append(tokenData(self.tokens[i].data[2:]))
|
||||||
@ -195,7 +195,7 @@ class RtfTokenParser():
|
|||||||
i = i + 1
|
i = i + 1
|
||||||
j = j + 1
|
j = j + 1
|
||||||
continue
|
continue
|
||||||
raise BaseException('Error: incorect utf replacement.')
|
raise Exception('Error: incorect utf replacement.')
|
||||||
|
|
||||||
#calibre rtf2xml does not support utfreplace
|
#calibre rtf2xml does not support utfreplace
|
||||||
replace = []
|
replace = []
|
||||||
@ -248,7 +248,7 @@ class RtfTokenizer():
|
|||||||
|
|
||||||
if isChar(self.rtfData[i], '\\'):
|
if isChar(self.rtfData[i], '\\'):
|
||||||
if i + 1 >= len(self.rtfData):
|
if i + 1 >= len(self.rtfData):
|
||||||
raise BaseException('Error: Control character found at the end of the document.')
|
raise Exception('Error: Control character found at the end of the document.')
|
||||||
|
|
||||||
if lastDataStart > -1:
|
if lastDataStart > -1:
|
||||||
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
|
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
|
||||||
@ -269,7 +269,7 @@ class RtfTokenizer():
|
|||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
if not consumed:
|
if not consumed:
|
||||||
raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
|
raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
|
||||||
|
|
||||||
#we have numeric argument before delimiter
|
#we have numeric argument before delimiter
|
||||||
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
|
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
|
||||||
@ -283,10 +283,10 @@ class RtfTokenizer():
|
|||||||
l = l + 1
|
l = l + 1
|
||||||
i = i + 1
|
i = i + 1
|
||||||
if l > 10 :
|
if l > 10 :
|
||||||
raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
||||||
|
|
||||||
if not consumed:
|
if not consumed:
|
||||||
raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
|
raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
|
||||||
|
|
||||||
separator = ''
|
separator = ''
|
||||||
if isChar(self.rtfData[i], ' '):
|
if isChar(self.rtfData[i], ' '):
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user