KG updates

This commit is contained in:
GRiker 2011-02-15 09:43:17 -07:00
commit 0bf3e3db12
92 changed files with 147810 additions and 3530 deletions

View File

@ -193,6 +193,33 @@ License: GPL-3
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL-3 on Debian systems.
Files: src/calibre/ebooks/unihandecode/pykakasi/*
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
Copyright: 1992, Hironobu Takahashi
License: GPL-2+
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL on Debian systems.
Files: resources/kanwadict2.db
Files: resources/itaijidict2.pickle
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
License: GPL-2+
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL on Debian systems.
Files: src/calibre/ebooks/unihandecode/*
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
Copyright: 2009, John Schember
Copyright: 2007, Russell Norris
Copyright: 2001, Sean M. Burke
License: GPL-3, Perl
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL-3 on Debian systems.
Files: src/encutils/__init__.py
Copyright: 2005-2008: Christof Hoeke
License: LGPL-3+, CC-BY-3.0

View File

@ -13,12 +13,12 @@
id="Layer_1"
x="0px"
y="0px"
width="134.77701"
height="199.99901"
viewBox="0 0 134.777 199.999"
width="200"
height="200"
viewBox="0 0 199.99999 199.99999"
enable-background="new 0 0 595.28 841.89"
xml:space="preserve"
inkscape:version="0.47 r22583"
inkscape:version="0.48.0 r9654"
sodipodi:docname="news.svg"><metadata
id="metadata26"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
@ -38,22 +38,22 @@
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
inkscape:window-width="640"
inkscape:window-height="489"
inkscape:window-width="1680"
inkscape:window-height="997"
id="namedview22"
showgrid="false"
inkscape:zoom="0.28032165"
inkscape:cx="67.389001"
inkscape:cy="99.722002"
inkscape:window-x="0"
inkscape:window-y="41"
inkscape:window-maximized="0"
inkscape:window-x="-4"
inkscape:window-y="30"
inkscape:window-maximized="1"
inkscape:current-layer="Layer_1" />
<g
id="g3"
transform="translate(-230.25101,-320.668)">
transform="translate(-194.57771,-320.66701)">
<polygon
points="360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 "
points="286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 "
id="polygon5"
style="fill:#ffffff" />
<linearGradient
@ -73,7 +73,7 @@
id="stop10" />
</linearGradient>
<polygon
points="360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 "
points="286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 "
id="polygon12"
style="fill:url(#SVGID_1_)" />
<g
@ -81,14 +81,16 @@
<path
d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
id="path16"
style="fill:#993720" />
style="fill:#993720"
inkscape:connector-curvature="0" />
</g>
<g
id="g18">
<path
d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
id="path20"
style="fill:#f0efef" />
style="fill:#f0efef"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -105,7 +105,7 @@ bool_custom_columns_are_tristate = 'yes'
# title within authors.
sort_columns_at_startup = None
#; Control how dates are displayed
#: Control how dates are displayed
# Format to be used for publication date and the timestamp (date).
# A string controlling how the publication date is displayed in the GUI
# d the day as number without a leading zero (1 to 31)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 834 B

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
adevarul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Adevarul(BasicNewsRecipe):
title = u'Adev\u0103rul'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Adevarul'
category = 'Ziare,Stiri,Romania'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
,dict(name='div', attrs={'class':'bd'})
]
remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
,dict(name='form', attrs={'id':'bb-comment-create-form'})
]
remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,161 @@
# -*- coding: utf-8 -*-
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class AppleDaily(BasicNewsRecipe):
title = u'蘋果日報'
__author__ = u'蘋果日報'
__publisher__ = u'蘋果日報'
description = u'蘋果日報'
masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
language = 'zh_TW'
encoding = 'UTF-8'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = False
remove_javascript = True
remove_tags_before = dict(name=['ul', 'h1'])
remove_tags_after = dict(name='form')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style', 'form'])]
no_stylesheets = True
extra_css = '''
@font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
body {margin-right: 8pt; font-family: 'uming', serif;}
h1 {font-family: 'uming', serif, sans-serif}
'''
#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
preprocess_regexps = [
(re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
]
def get_cover_url(self):
return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
#def get_browser(self):
#br = BasicNewsRecipe.get_browser()
#if self.username is not None and self.password is not None:
# br.open('http://www.nytimes.com/auth/login')
# br.select_form(name='login')
# br['USERID'] = self.username
# br['PASSWORD'] = self.password
# br.submit()
#return br
def preprocess_html(self, soup):
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
#print 'checking image: ' + iurl
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
m = p.search(iurl)
if m is not None:
iurl = 'http://' + m.group('server') + '/' + m.group('path')
#print 'working! new url: ' + iurl
tag['src'] = iurl
#else:
#print 'not good'
for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
iurl = tag['href']
#print 'checking image: ' + iurl
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
m = p.search(iurl)
if m is not None:
iurl = 'http://' + m.group('server') + '/' + m.group('path')
#print 'working! new url: ' + iurl
tag['href'] = iurl
#else:
#print 'not good'
return soup
def parse_index(self):
base = 'http://news.hotpot.hk/fruit'
soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
#def feed_title(div):
# return ''.join(div.findAll(text=True, recursive=False)).strip()
articles = {}
key = None
ans = []
for div in soup.findAll('li'):
key = div.find(text=True, recursive=True);
#if key == u'豪情':
# continue;
print 'section=' + key
articles[key] = []
ans.append(key)
a = div.find('a', href=True)
if not a:
continue
url = base + '/' + a['href']
print 'url=' + url
if not articles.has_key(key):
articles[key] = []
else:
# sub page
subSoup = self.index_to_soup(url)
for subDiv in subSoup.findAll('li'):
subA = subDiv.find('a', href=True)
subTitle = subDiv.find(text=True, recursive=True)
subUrl = base + '/' + subA['href']
print 'subUrl' + subUrl
articles[key].append(
dict(title=subTitle,
url=subUrl,
date='',
description='',
content=''))
# elif div['class'] in ['story', 'story headline']:
# a = div.find('a', href=True)
# if not a:
# continue
# url = re.sub(r'\?.*', '', a['href'])
# url += '?pagewanted=all'
# title = self.tag_to_string(a, use_alt=True).strip()
# description = ''
# pubdate = strftime('%a, %d %b')
# summary = div.find(True, attrs={'class':'summary'})
# if summary:
# description = self.tag_to_string(summary, use_alt=False)
#
# feed = key if key is not None else 'Uncategorized'
# if not articles.has_key(feed):
# articles[feed] = []
# if not 'podcasts' in url:
# articles[feed].append(
# dict(title=title, url=url, date=pubdate,
# description=description,
# content=''))
# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
capital.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Capital(BasicNewsRecipe):
title = 'Capital'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
remove_javascript = True
publisher = 'Capital'
cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'single one_article'})
]
remove_tags = [ dict(name='div', attrs={'class':'single_details'})
, dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'})
]
feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
catavencu.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Catavencu(BasicNewsRecipe):
title = u'Academia Ca\u0163avencu'
__author__ = u'Silviu Cotoar\u0103'
description = 'Tagma cum laude'
publisher = 'Catavencu'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare'
encoding = 'utf-8'
cover_url = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='ul', attrs={'class':'articles'})
]
remove_tags = [
dict(name='div', attrs={'class':['tools']})
, dict(name='div', attrs={'class':['share']})
, dict(name='div', attrs={'class':['category']})
, dict(name='div', attrs={'id':['comments']})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comments'})
]
feeds = [
(u'Feeds', u'http://catavencu.ro/feed/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
standaard.be
'''
@ -9,15 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DeStandaard(BasicNewsRecipe):
title = u'De Standaard'
__author__ = u'Darko Miletic'
language = 'nl_BE'
language = 'nl_BE'
description = u'News from Belgium in Dutch'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
masthead_url = 'http://www.standaard.be/extra/css/images/masthead/logo_340x45.png'
publication_type = 'newspaper'
keep_only_tags = [dict(name='div' , attrs={'id':['intro','continued']})]
feeds = [(u'De Standaard Online', u'http://feeds.feedburner.com/dso-front')]
@ -27,4 +28,4 @@ class DeStandaard(BasicNewsRecipe):
return article.get('guid', None)
def print_version(self, url):
return url.replace('/Detail.aspx?','/PrintArtikel.aspx?')
return url.replace('/artikel/detail.aspx?','/Artikel/PrintArtikel.aspx?')

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
gandul.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gandul(BasicNewsRecipe):
title = u'G\u00E2ndul'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Gandul'
description = 'Cotidian Online'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
remove_tags = [
dict(name='a', attrs={'class':'photo'})
, dict(name='div', attrs={'class':'ad'})
]
feeds = [
(u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
hotnews.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hotnews(BasicNewsRecipe):
title = 'Hotnews'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Hotnews'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.hotnews.ro/images/new/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'title'})
,dict(name='div', attrs={'id':'articleContent'})
]
feeds = [ (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate')
,(u'English', u'http://www.hotnews.ro/rss/english')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
jurnalul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class JurnalulNational(BasicNewsRecipe):
title = u'Jurnalul Na\u0163ional'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Jurnalul National'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.jurnalul.ro/images/sigla.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'h3 art_title'})
,dict(name='div', attrs={'class':'only_text'})
]
feeds = [
(u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html')
,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html')
,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html')
,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html')
,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html')
,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html')
,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html')
,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html')
,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Lifehacker(BasicNewsRecipe):
title = 'Lifehacker'
__author__ = 'NA'
__author__ = 'Kovid Goyal'
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
publisher = 'lifehacker.com'
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
@ -32,14 +32,20 @@ class Lifehacker(BasicNewsRecipe):
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_attributes = ['width', 'height', 'style']
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
keep_only_tags = [dict(id='container')]
remove_tags_after = dict(attrs={'class':'post-body'})
remove_tags = [
dict(id="sharemenu"),
{'class': 'related'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)
def print_version(self, url):
return url.replace('#!', '?_escaped_fragment_=')

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
mediafax.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Mediafax(BasicNewsRecipe):
title = 'Mediafax'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Mediafax'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'news tabs-container'})
]
remove_tags = [
dict(name='ul', attrs={'class':['CategoryNews']})
,dict(name='div', attrs={'class':['read']})
]
remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ]
feeds = [
(u'Actualitate', u'http://www.mediafax.ro/rss/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
money.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoneyRo(BasicNewsRecipe):
title = 'Money Ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'MoneyRo'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://assets.moneyweb.ro/images/logo_money.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'id':'titluArticol'})
, dict(name='img', attrs={'id':'objImage'})
, dict(name='div', attrs={'class':'leftColumnArticle'})
]
remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ]
remove_tags = [ dict(name='div', attrs={'id':'ads'})
, dict(name='div', attrs={'id':'aus'})
, dict(name='div', attrs={'id':'bb-comment-create-form'})
, dict(name='div', attrs={'id':'articleTags'})
, dict(name='div', attrs={'class':'breadcrumb'})
]
feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -668,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
try:
#remove "Related content" bar
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ']})
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ','articleInline runaroundLeft lastArticleInline']})
if runAroundsFound:
for runAround in runAroundsFound:
#find all section headers

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
prosport.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Prosport(BasicNewsRecipe):
title = 'Prosport'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Prosport'
description = u'\u0218tiri Sportive din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania,Sport'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'a-title'})
,dict(name='div', attrs={'class':'a-entry'})
]
remove_tags = [ dict(name='div', attrs={'class':'utils'})
,dict(name='div', attrs={'class':'g-slide'})
]
feeds = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
realitatea.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Realitatea(BasicNewsRecipe):
title = 'Realitatea'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Realitatea'
description = u'\u0218tiri din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://assets.realitatea.ro/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'articleTitle '})
,dict(name='div', attrs={'class':'articleBody'})
]
remove_tags = [ dict(name='div', attrs={'id':'aus'}) ]
feeds = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
smh.com.au
'''
@ -22,7 +22,11 @@ class Smh_au(BasicNewsRecipe):
remove_empty_feeds = True
masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
publication_type = 'newspaper'
extra_css = ' h1{font-family: Georgia,"Times New Roman",Times,serif } body{font-family: Arial,Helvetica,sans-serif} .cT-imageLandscape{font-size: x-small} '
extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
body{font-family: Arial,Helvetica,sans-serif}
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
"""
conversion_options = {
'comment' : description
@ -38,7 +42,11 @@ class Smh_au(BasicNewsRecipe):
]
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
remove_attributes = ['width','height']
remove_tags = [
dict(attrs={'class':'hidden'}),
dict(name=['link','meta','base','embed','object','iframe'])
]
remove_attributes = ['width','height','lang']
def parse_index(self):
articles = []
@ -66,3 +74,14 @@ class Smh_au(BasicNewsRecipe):
,'description':description
})
return [(self.tag_to_string(soup.find('title')), articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('bod'):
item.name = 'div'
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
standard.money.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class StandardMoneyRo(BasicNewsRecipe):
title = 'Standard Money Ro'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Standard Money'
description = 'Portal de Business'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'post-title'})
, dict(name='div', attrs={'class':'content_post'})
]
feeds = [
(u'Actualitate', u'http://standard.money.ro/feed')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,26 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class WorkersWorld(BasicNewsRecipe):
title = u'Workers World'
description = u'Socialist news and analysis'
__author__ = u'urslnx'
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
oldest_article = 7
max_articles_per_feed = 100
encoding = 'utf8'
publisher = 'workers.org'
category = 'news, politics, USA, world'
language = 'en'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Arial,Helvetica,sans-serif; } h1{ font-size: x-large; text-align: left; margin-top:0.5em; margin-bottom:0.25em; } h2{ font-size: large; } p{ text-align: left; } .published{ font-size: small; } .byline{ font-size: small; } .copyright{ font-size: small; } '
remove_tags_before = dict(name='div', attrs={'id':'evernote'})
remove_tags_after = dict(name='div', attrs={'id':'footer'})
masthead_url='http://www.workers.org/graphics/wwlogo300.gif'
cover_url = 'http://www.workers.org/pdf/current.jpg'
feeds = [(u'Headlines', u'http://www.workers.org/rss/nonstandard_rss.xml'),
]

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
zf.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ZiarulFinanciar(BasicNewsRecipe):
title = 'Ziarul Financiar'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Business'
publisher = 'Ziarul Financiar'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
feeds = [
(u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -6,9 +6,10 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, cPickle
import os, cPickle, re, anydbm, shutil
from zlib import compress
from setup import Command, basenames
from setup import Command, basenames, __appname__
def get_opts_from_parser(parser):
def do_opt(opt):
@ -26,6 +27,9 @@ class Resources(Command):
description = 'Compile various needed calibre resources'
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
'ebooks', 'unihandecode', 'pykakasi')
def run(self, opts):
scripts = {}
for x in ('console', 'gui'):
@ -101,11 +105,107 @@ class Resources(Command):
import json
json.dump(function_dict, open(dest, 'wb'), indent=4)
self.run_kakasi(opts)
def run_kakasi(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
dic = anydbm.open(out, 'c')
for (k, v) in self.records.iteritems():
dic[k] = compress(cPickle.dumps(v, -1))
dic.close()
def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle')
if os.path.exists(x):
os.remove(x)
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
if os.path.exists(kakasi):
shutil.rmtree(kakasi)

View File

@ -90,6 +90,11 @@ class Plugin(object): # {{{
an optional method validate() that takes no arguments and is called
immediately after the user clicks OK. Changes are applied if and only
if the method returns True.
If for some reason you cannot perform the configuration at this time,
return a tuple of two strings (message, details), these will be
displayed as a warning dialog to the user and the process will be
aborted.
'''
raise NotImplementedError()
@ -133,6 +138,12 @@ class Plugin(object): # {{{
except NotImplementedError:
config_widget = None
if isinstance(config_widget, tuple):
from calibre.gui2 import warning_dialog
warning_dialog(parent, _('Cannot configure'), config_widget[0],
det_msg=config_widget[1], show=True)
return False
if config_widget is not None:
v.addWidget(config_widget)
v.addWidget(button_box)

View File

@ -2,11 +2,13 @@ import os.path
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import textwrap, os, glob, functools
import textwrap, os, glob, functools, re
from calibre import guess_type
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.oeb.base import OEB_IMAGES
# To archive plugins {{{
class HTML2ZIP(FileTypePlugin):
@ -82,6 +84,66 @@ class PML2PMLZ(FileTypePlugin):
return of.name
class TXT2TXTZ(FileTypePlugin):
name = 'TXT to TXTZ'
author = 'John Schember'
description = _('Create a TXTZ archive when a TXT file is imported '
'containing Markdown or Textile references to images. The referenced '
'images as well as the TXT file are added to the archive.')
version = numeric_version
file_types = set(['txt'])
supported_platforms = ['windows', 'osx', 'linux']
on_import = True
def _get_image_references(self, txt, base_dir):
images = []
# Textile
for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown inline
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown reference
refs = {}
for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
if m.group('id') and m.group('path'):
refs[m.group('id')] = m.group('path')
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt):
path = refs.get(m.group('id'), None)
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Remove duplicates
return list(set(images))
def run(self, path_to_ebook):
with open(path_to_ebook, 'rb') as ebf:
txt = ebf.read()
base_dir = os.path.dirname(path_to_ebook)
images = self._get_image_references(txt, base_dir)
if images:
# Create TXTZ and put file plus images inside of it.
import zipfile
of = self.temporary_file('_plugin_txt2txtz.txtz')
txtz = zipfile.ZipFile(of.name, 'w')
txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
for image in images:
txtz.write(os.path.join(base_dir, image), image)
txtz.close()
return of.name
else:
# No images so just import the TXT file.
return path_to_ebook
# }}}
# Metadata reader plugins {{{
@ -511,14 +573,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
AmazonCovers, DoubanCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
NiceBooksCovers]
plugins += [
ComicInput,

View File

@ -19,7 +19,7 @@ class ANDROID(USBMS):
VENDOR_ID = {
# HTC
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226],
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222],
0x0c01 : [0x100, 0x0227, 0x0226],
0x0ff9 : [0x0100, 0x0227, 0x0226],
0x0c87 : [0x0100, 0x0227, 0x0226],

View File

@ -40,6 +40,7 @@ if iswindows:
class DriverBase(DeviceConfig, DevicePlugin):
# Needed for config_widget to work
FORMATS = ['epub', 'pdf']
USER_CAN_ADD_NEW_FORMATS = False
SUPPORTS_SUB_DIRS = True # To enable second checkbox in customize widget
@classmethod

View File

@ -32,6 +32,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin):
ip = None
FORMATS = [ "snb" ]
USER_CAN_ADD_NEW_FORMATS = False
VENDOR_ID = 0x230b
PRODUCT_ID = 0x0001
BCD = None
@ -421,7 +422,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin):
from calibre.gui2.device_drivers.configwidget import ConfigWidget
cw = ConfigWidget(cls.settings(), cls.FORMATS, cls.SUPPORTS_SUB_DIRS,
cls.MUST_READ_METADATA, cls.SUPPORTS_USE_AUTHOR_SORT,
cls.EXTRA_CUSTOMIZATION_MESSAGE)
cls.EXTRA_CUSTOMIZATION_MESSAGE, cls)
# Turn off the Save template
cw.opt_save_template.setVisible(False)
cw.label.setVisible(False)

View File

@ -93,11 +93,11 @@ class MIBUK(USBMS):
VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5]
BCD = [0x314]
BCD = [0x314, 0x319]
SUPPORTS_SUB_DIRS = True
VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = 'WOLDERMIBUK'
VENDOR_NAME = ['LINUX', 'FILE_BAC']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['WOLDERMIBUK', 'KED_STORAGE_GADG']
class JETBOOK_MINI(USBMS):

View File

@ -11,22 +11,42 @@ Generates and writes an APNX page mapping file.
import struct
import uuid
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.utils.logging import default_log
class APNXBuilder(object):
'''
Currently uses the Adobe 1024 byte count equal one page formula.
Create an APNX file using a pseudo page mapping.
'''
def write_apnx(self, mobi_file_path, apnx_path):
def write_apnx(self, mobi_file_path, apnx_path, accurate=True):
# Check that this is really a MOBI file.
with open(mobi_file_path, 'rb') as mf:
phead = PdbHeaderReader(mf)
r0 = phead.section_data(0)
text_length = struct.unpack('>I', r0[4:8])[0]
ident = PdbHeaderReader(mf).identity()
if ident != 'BOOKMOBI':
raise Exception(_('Not a valid MOBI file. Reports identity of %s' % ident))
pages = self.get_pages(text_length)
# Get the pages depending on the chosen parser
pages = []
if accurate:
try:
pages = self.get_pages_accurate(mobi_file_path)
except:
# Fall back to the fast parser if we can't
# use the accurate one. Typically this is
# due to the file having DRM.
pages = self.get_pages_fast(mobi_file_path)
else:
pages = self.get_pages_fast(mobi_file_path)
if not pages:
raise Exception(_('Could not generate page mapping.'))
# Generate the APNX file from the page mapping.
apnx = self.generate_apnx(pages)
# Write the APNX.
with open(apnx_path, 'wb') as apnxf:
apnxf.write(apnx)
@ -51,18 +71,126 @@ class APNXBuilder(object):
apnx += struct.pack('>H', 32)
apnx += page_header
# write page values to apnx
# Write page values to APNX.
for page in pages:
apnx += struct.pack('>L', page)
apnx += struct.pack('>I', page)
return apnx
def get_pages(self, text_length):
def get_pages_fast(self, mobi_file_path):
'''
2300 characters of uncompressed text per page. This is
not meant to map 1 to 1 to a print book but to be a
close enough measure.
A test book was chosen and the characters were counted
on one page. This number was round to 2240 then 60
characters of markup were added to the total giving
2300.
Uncompressed text length is used because it's easily
accessible in MOBI files (part of the header). Also,
It's faster to work off of the length then to
decompress and parse the actual text.
'''
text_length = 0
pages = []
count = 0
with open(mobi_file_path, 'rb') as mf:
phead = PdbHeaderReader(mf)
r0 = phead.section_data(0)
text_length = struct.unpack('>I', r0[4:8])[0]
while count < text_length:
pages.append(count)
count += 1024
count += 2300
return pages
def get_pages_accurate(self, mobi_file_path):
'''
A more accurate but much more resource intensive and slower
method to calculate the page length.
Parses the uncompressed text. In an average paper back book
There are 32 lines per page and a maximum of 70 characters
per line.
Each paragraph starts a new line and every 70 characters
(minus markup) in a paragraph starts a new line. The
position after every 30 lines will be marked as a new
page.
This can be make more accurate by accounting for
<div class="mbp_pagebreak" /> as a new page marker.
And <br> elements as an empty line.
'''
pages = []
# Get the MOBI html.
mr = MobiReader(mobi_file_path, default_log)
if mr.book_header.encryption_type != 0:
# DRMed book
return self.get_pages_fast(mobi_file_path)
mr.extract_text()
# States
in_tag = False
in_p = False
check_p = False
closing = False
p_char_count = 0
# Get positions of every line
# A line is either a paragraph starting
# or every 70 characters in a paragraph.
lines = []
pos = -1
# We want this to be as fast as possible so we
# are going to do one pass across the text. re
# and string functions will parse the text each
# time they are called.
#
# We can can use .lower() here because we are
# not modifying the text. In this case the case
# doesn't matter just the absolute character and
# the position within the stream.
for c in mr.mobi_html.lower():
pos += 1
# Check if we are starting or stopping a p tag.
if check_p:
if c == '/':
closing = True
continue
elif c == 'p':
if closing:
in_p = False
else:
in_p = True
lines.append(pos - 2)
check_p = False
closing = False
continue
if c == '<':
in_tag = True
check_p = True
continue
elif c == '>':
in_tag = False
check_p = False
continue
if in_p and not in_tag:
p_char_count += 1
if p_char_count == 70:
lines.append(pos)
p_char_count = 0
# Every 30 lines is a new page
for i in xrange(0, len(lines), 32):
pages.append(lines[i])
return pages

View File

@ -176,6 +176,28 @@ class KINDLE2(KINDLE):
PRODUCT_ID = [0x0002, 0x0004]
BCD = [0x0100]
EXTRA_CUSTOMIZATION_MESSAGE = [
_('Send page number information when sending books') +
':::' +
_('The Kindle 3 and newer versions can use page number information '
'in MOBI files. With this option, calibre will calculate and send'
' this information to the Kindle when uploading MOBI files by'
' USB. Note that the page numbers do not correspond to any paper'
' book.'),
_('Use slower but more accurate page number generation') +
':::' +
_('There are two ways to generate the page number information. Using the more accurate '
'generator will produce pages that correspond better to a printed book. '
'However, this method is slower and will slow down sending files '
'to the Kindle.'),
]
EXTRA_CUSTOMIZATION_DEFAULT = [
True,
False,
]
OPT_APNX = 0
OPT_APNX_ACCURATE = 1
def books(self, oncard=None, end_session=True):
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
# Read collections information
@ -212,13 +234,17 @@ class KINDLE2(KINDLE):
'''
Hijacking this function to write the apnx file.
'''
if not filepath.lower().endswith('.mobi'):
opts = self.settings()
if not opts.extra_customization[self.OPT_APNX]:
return
if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'):
return
apnx_path = '%s.apnx' % os.path.join(path, filename)
apnx_builder = APNXBuilder()
try:
apnx_builder.write_apnx(filepath, apnx_path)
apnx_builder.write_apnx(filepath, apnx_path, accurate=opts.extra_customization[self.OPT_APNX_ACCURATE])
except:
print 'Failed to generate APNX'
import traceback

View File

@ -98,7 +98,6 @@ class KOBO(USBMS):
def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID, readstatus, MimeType):
changed = False
# if path_to_ext(path) in self.FORMATS:
try:
lpath = path.partition(self.normalize_path(prefix))[2]
if lpath.startswith(os.sep):
@ -220,7 +219,7 @@ class KOBO(USBMS):
# 2) volume_shorcover
# 2) content
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
cursor = connection.cursor()
t = (ContentID,)
@ -532,7 +531,7 @@ class KOBO(USBMS):
if result is None:
datelastread = '1970-01-01T00:00:00'
else:
datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
t = (datelastread,ContentID,)

View File

@ -232,16 +232,37 @@ class Device(DeviceConfig, DevicePlugin):
time.sleep(5)
drives = {}
seen = set()
prod_pat = re.compile(r'PROD_(.+?)&')
dup_prod_id = False
def check_for_dups(pnp_id):
try:
match = prod_pat.search(pnp_id)
if match is not None:
prodid = match.group(1)
if prodid in seen:
return True
else:
seen.add(prodid)
except:
pass
return False
for drive, pnp_id in win_pnp_drives().items():
if self.windows_match_device(pnp_id, 'WINDOWS_CARD_A_MEM') and \
not drives.get('carda', False):
drives['carda'] = drive
dup_prod_id |= check_for_dups(pnp_id)
elif self.windows_match_device(pnp_id, 'WINDOWS_CARD_B_MEM') and \
not drives.get('cardb', False):
drives['cardb'] = drive
dup_prod_id |= check_for_dups(pnp_id)
elif self.windows_match_device(pnp_id, 'WINDOWS_MAIN_MEM') and \
not drives.get('main', False):
drives['main'] = drive
dup_prod_id |= check_for_dups(pnp_id)
if 'main' in drives.keys() and 'carda' in drives.keys() and \
'cardb' in drives.keys():
@ -263,7 +284,8 @@ class Device(DeviceConfig, DevicePlugin):
# Sort drives by their PNP drive numbers if the CARD and MAIN
# MEM strings are identical
if self.WINDOWS_MAIN_MEM in (self.WINDOWS_CARD_A_MEM,
if dup_prod_id or \
self.WINDOWS_MAIN_MEM in (self.WINDOWS_CARD_A_MEM,
self.WINDOWS_CARD_B_MEM) or \
self.WINDOWS_CARD_A_MEM == self.WINDOWS_CARD_B_MEM:
letters = sorted(drives.values(), cmp=drivecmp)

View File

@ -34,6 +34,10 @@ class DeviceConfig(object):
#: If None the default is used
SAVE_TEMPLATE = None
#: If True the user can add new formats to the driver
USER_CAN_ADD_NEW_FORMATS = True
@classmethod
def _default_save_template(cls):
from calibre.library.save_to_disk import config
@ -73,7 +77,7 @@ class DeviceConfig(object):
from calibre.gui2.device_drivers.configwidget import ConfigWidget
cw = ConfigWidget(cls.settings(), cls.FORMATS, cls.SUPPORTS_SUB_DIRS,
cls.MUST_READ_METADATA, cls.SUPPORTS_USE_AUTHOR_SORT,
cls.EXTRA_CUSTOMIZATION_MESSAGE)
cls.EXTRA_CUSTOMIZATION_MESSAGE, cls)
return cw
@classmethod

View File

@ -93,9 +93,11 @@ class USBMS(CLI, Device):
for idx,b in enumerate(bl):
bl_cache[b.lpath] = idx
all_formats = set(self.settings().format_map) | set(self.FORMATS)
def update_booklist(filename, path, prefix):
changed = False
if path_to_ext(filename) in self.FORMATS:
if path_to_ext(filename) in all_formats:
try:
lpath = os.path.join(path, filename).partition(self.normalize_path(prefix))[2]
if lpath.startswith(os.sep):

View File

@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
stream.seek(0)
return ans
def normalize(x):
if isinstance(x, unicode):
import unicodedata
x = unicodedata.normalize('NFKC', x)
return x
def calibre_cover(title, author_string, series_string=None,
output_format='jpg', title_size=46, author_size=36):
title = normalize(title)
author_string = normalize(author_string)
series_string = normalize(series_string)
from calibre.utils.magick.draw import create_cover_page, TextLine
lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
if series_string:

View File

@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
'with "Mikhail Gorbachiov". Also, note that in '
'cases where there are multiple representations of a character '
'(characters shared by Chinese and Japanese for instance) the '
'representation used by the largest number of people will be '
'used (Chinese in the previous example).')%\
'representation based on the current calibre interface language will be '
'used.')%\
u'\u041c\u0438\u0445\u0430\u0438\u043b '
u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
)

View File

@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
html = XMLDECL_RE.sub('', html)
if getattr(self.extra_opts, 'asciiize', False):
from calibre.ebooks.unidecode.unidecoder import Unidecoder
unidecoder = Unidecoder()
html = unidecoder.decode(html)
from calibre.utils.localization import get_udc
unihandecoder = get_udc()
html = unihandecoder.decode(html)
if getattr(self.extra_opts, 'enable_heuristics', False):
from calibre.ebooks.conversion.utils import HeuristicProcessor
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
if unsupported_unicode_chars:
from calibre.ebooks.unidecode.unidecoder import Unidecoder
unidecoder = Unidecoder()
from calibre.utils.localization import get_udc
unihandecoder = get_udc()
for char in unsupported_unicode_chars:
asciichar = unidecoder.decode(char)
asciichar = unihandecoder.decode(char)
html = html.replace(char, asciichar)
return html

View File

@ -156,17 +156,17 @@ class HeuristicProcessor(object):
]
ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=[\s>])_(?P<words>[^_]+)?_',
r'(?msu)(?<=[\s>])/(?P<words>[^/]+)?/',
r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)?~~',
r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)?\*',
r'(?msu)(?<=[\s>])~(?P<words>[^~]+)?~',
r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)?/_',
r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)?\*_',
r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)?/\*',
r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)?/\*_',
r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+)?:/',
r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+)?:\|',
r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
r'(?msu)(?<=[\s>])/(?P<words>[^/]+)/',
r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
]
for word in ITALICIZE_WORDS:

View File

@ -271,6 +271,8 @@ def check_isbn13(isbn):
return None
def check_isbn(isbn):
if not isbn:
return None
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
if len(isbn) == 10:
return check_isbn10(isbn)

View File

@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
Fetch metadata using Amazon AWS
'''
import sys, re
from threading import RLock
from lxml import html
from lxml.html import soupparser
@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html
asin_cache = {}
cover_url_cache = {}
cache_lock = RLock()
def find_asin(br, isbn):
q = 'http://www.amazon.com/s?field-keywords='+isbn
raw = br.open_novisit(q).read()
@ -29,6 +34,12 @@ def find_asin(br, isbn):
return revs[0]
def to_asin(br, isbn):
with cache_lock:
ans = asin_cache.get(isbn, None)
if ans:
return ans
if ans is False:
return None
if len(isbn) == 13:
try:
asin = find_asin(br, isbn)
@ -38,8 +49,11 @@ def to_asin(br, isbn):
asin = None
else:
asin = isbn
with cache_lock:
asin_cache[isbn] = ans if ans else False
return asin
def get_social_metadata(title, authors, publisher, isbn):
mi = Metadata(title, authors)
if not isbn:
@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
return mi
return mi
def get_cover_url(isbn, br):
isbn = check_isbn(isbn)
if not isbn:
return None
with cache_lock:
ans = cover_url_cache.get(isbn, None)
if ans:
return ans
if ans is False:
return None
asin = to_asin(br, isbn)
if asin:
ans = _get_cover_url(br, asin)
if ans:
with cache_lock:
cover_url_cache[isbn] = ans
return ans
from calibre.ebooks.metadata.xisbn import xisbn
for i in xisbn.get_associated_isbns(isbn):
asin = to_asin(br, i)
if asin:
ans = _get_cover_url(br, asin)
if ans:
with cache_lock:
cover_url_cache[isbn] = ans
cover_url_cache[i] = ans
return ans
with cache_lock:
cover_url_cache[isbn] = False
return None
def _get_cover_url(br, asin):
q = 'http://amzn.com/'+asin
try:
raw = br.open_novisit(q).read()
except Exception, e:
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
raise
if '<title>404 - ' in raw:
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
root = soupparser.fromstring(raw)
except:
return False
imgs = root.xpath('//img[@id="prodImage" and @src]')
if imgs:
src = imgs[0].get('src')
parts = src.split('/')
if len(parts) > 3:
bn = parts[-1]
sparts = bn.split('_')
if len(sparts) > 2:
bn = sparts[0] + sparts[-1]
return ('/'.join(parts[:-1]))+'/'+bn
return None
def get_metadata(br, asin, mi):
q = 'http://amzn.com/'+asin
try:
@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):
def main(args=sys.argv):
# Test xisbn
print get_social_metadata('Learning Python', None, None, '8324616489')
print
import tempfile, os
tdir = tempfile.gettempdir()
br = browser()
for title, isbn in [
('Learning Python', '8324616489'), # Test xisbn
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
# Random tests
('Star Trek: Destiny: Mere Mortals', '9781416551720'),
('The Great Gatsby', '0743273567'),
]:
cpath = os.path.join(tdir, title+'.jpg')
curl = get_cover_url(isbn, br)
if curl is None:
print 'No cover found for', title
else:
open(cpath, 'wb').write(br.open_novisit(curl).read())
print 'Cover for', title, 'saved to', cpath
# Test sophisticated comment formatting
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
print
# Random tests
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
print
print get_social_metadata('The Great Gatsby', None, None, '0743273567')
print get_social_metadata(title, None, None, isbn)
return 0

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback, socket, re, sys
import traceback, socket, sys
from functools import partial
from threading import Thread, Event
from Queue import Queue, Empty
@ -15,7 +15,6 @@ import mechanize
from calibre.customize import Plugin
from calibre import browser, prints
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.constants import preferred_encoding, DEBUG
class CoverDownload(Plugin):
@ -112,72 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{
# }}}
class LibraryThingCovers(CoverDownload): # {{{
class AmazonCovers(CoverDownload): # {{{
name = 'librarything.com covers'
description = _('Download covers from librarything.com')
name = 'amazon.com covers'
description = _('Download covers from amazon.com')
author = 'Kovid Goyal'
LIBRARYTHING = 'http://www.librarything.com/isbn/'
def get_cover_url(self, isbn, br, timeout=5.):
try:
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
timeout=timeout).read().decode('utf-8', 'replace')
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = Exception(_('LibraryThing.com timed out. Try again later.'))
raise err
else:
if '/wiki/index.php/HelpThing:Verify' in src:
raise Exception('LibraryThing is blocking calibre.')
s = BeautifulSoup(src)
url = s.find('td', attrs={'class':'left'})
if url is None:
if s.find('div', attrs={'class':'highloadwarning'}) is not None:
raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
raise Exception(_('ISBN: %s not found')%isbn)
url = url.find('img')
if url is None:
raise Exception(_('LibraryThing.com server error. Try again later.'))
url = re.sub(r'_S[XY]\d+', '', url['src'])
return url
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn or not self.site_customization:
if not mi.isbn:
return False
from calibre.ebooks.metadata.library_thing import get_browser, login
br = get_browser()
un, _, pw = self.site_customization.partition(':')
login(br, un, pw)
from calibre.ebooks.metadata.amazon import get_cover_url
br = browser()
try:
self.get_cover_url(mi.isbn, br, timeout=timeout)
get_cover_url(mi.isbn, br)
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception, e:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn or not self.site_customization:
if not mi.isbn:
return
from calibre.ebooks.metadata.library_thing import get_browser, login
br = get_browser()
un, _, pw = self.site_customization.partition(':')
login(br, un, pw)
from calibre.ebooks.metadata.amazon import get_cover_url
br = browser()
try:
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
url = get_cover_url(mi.isbn, br)
cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name))
except Exception, e:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
def customization_help(self, gui=False):
ans = _('To use librarything.com you must sign up for a %sfree account%s '
'and enter your username and password separated by a : below.')
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
# }}}
def check_for_cover(mi, timeout=5.): # {{{

View File

@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
from struct import pack, unpack
from cStringIO import StringIO
from calibre.ebooks import normalize
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
from calibre.ebooks.mobi.langcodes import iana2mobi
@ -311,6 +312,7 @@ class MetadataUpdater(object):
return StreamSlicer(self.stream, start, stop)
def update(self, mi):
mi.title = normalize(mi.title)
def update_exth_record(rec):
recs.append(rec)
if rec[0] in self.original_exth_records:
@ -331,12 +333,12 @@ class MetadataUpdater(object):
kindle_pdoc = None
if mi.author_sort and pas:
authors = mi.author_sort
update_exth_record((100, authors.encode(self.codec, 'replace')))
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
elif mi.authors:
authors = ';'.join(mi.authors)
update_exth_record((100, authors.encode(self.codec, 'replace')))
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
if mi.publisher:
update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
if mi.comments:
# Strip user annotations
a_offset = mi.comments.find('<div class="user_annotations">')
@ -345,12 +347,12 @@ class MetadataUpdater(object):
mi.comments = mi.comments[:a_offset]
if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset]
update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
if mi.isbn:
update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
if mi.tags:
subjects = '; '.join(mi.tags)
update_exth_record((105, subjects.encode(self.codec, 'replace')))
update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
if kindle_pdoc and kindle_pdoc in mi.tags:
update_exth_record((501, str('PDOC')))

View File

@ -367,6 +367,9 @@ class MobiMLizer(object):
istate.attrib['src'] = elem.attrib['src']
istate.attrib['align'] = 'baseline'
cssdict = style.cssdict()
valign = cssdict.get('vertical-align', None)
if valign in ('top', 'bottom', 'middle'):
istate.attrib['align'] = valign
for prop in ('width', 'height'):
if cssdict[prop] != 'auto':
value = style[prop]
@ -451,8 +454,11 @@ class MobiMLizer(object):
text = COLLAPSE.sub(' ', elem.text)
valign = style['vertical-align']
not_baseline = valign in ('super', 'sub', 'text-top',
'text-bottom')
vtag = 'sup' if valign in ('super', 'text-top') else 'sub'
'text-bottom') or (
isinstance(valign, (float, int)) and abs(valign) != 0)
issup = valign in ('super', 'text-top') or (
isinstance(valign, (float, int)) and valign > 0)
vtag = 'sup' if issup else 'sub'
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))

View File

@ -14,8 +14,9 @@ import re
from struct import pack
import time
from urlparse import urldefrag
from cStringIO import StringIO
from calibre.ebooks import normalize
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.oeb.base import OEB_DOCS
@ -1365,7 +1366,7 @@ class MobiWriter(object):
self._text_length,
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff)
title = unicode(metadata.title[0]).encode('utf-8')
title = normalize(unicode(metadata.title[0])).encode('utf-8')
# The MOBI Header
# 0x0 - 0x3
@ -1523,12 +1524,12 @@ class MobiWriter(object):
items = oeb.metadata[term]
if term == 'creator':
if self._prefer_author_sort:
creators = [unicode(c.file_as or c) for c in items]
creators = [normalize(unicode(c.file_as or c)) for c in items]
else:
creators = [unicode(c) for c in items]
creators = [normalize(unicode(c)) for c in items]
items = ['; '.join(creators)]
for item in items:
data = self.COLLAPSE_RE.sub(' ', unicode(item))
data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
if term == 'identifier':
if data.lower().startswith('urn:isbn:'):
data = data[9:]
@ -1542,7 +1543,7 @@ class MobiWriter(object):
nrecs += 1
if term == 'rights' :
try:
rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
except:
rights = 'Unknown'
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))

View File

@ -207,7 +207,14 @@ class CSSFlattener(object):
font_size = self.sbase if self.sbase is not None else \
self.context.source.fbase
if 'align' in node.attrib:
cssdict['text-align'] = node.attrib['align']
if tag != 'img':
cssdict['text-align'] = node.attrib['align']
else:
val = node.attrib['align']
if val in ('middle', 'bottom', 'top'):
cssdict['vertical-align'] = val
elif val in ('left', 'right'):
cssdict['text-align'] = val
del node.attrib['align']
if node.tag == XHTML('font'):
node.tag = XHTML('span')

View File

@ -56,7 +56,7 @@ def add_options(parser):
group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
@ -82,15 +82,15 @@ def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 2:
print 'Error: Two or more PDF files are required.\n'
print_help(parser, log)
return 1
bad_pdfs = is_valid_pdfs(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
@ -104,7 +104,7 @@ def main(args=sys.argv, name=''):
print 'Error: file `%s` is encrypted.' % pdf
if enc:
return 1
mi = metadata_from_formats([args[0]])
merge_files(args, opts.output, mi)

View File

@ -4,10 +4,9 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import glob
import os
from calibre import _ent_pat, xml_entity_to_unicode
from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect
@ -16,7 +15,6 @@ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
separate_hard_scene_breaks
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
class TXTInput(InputFormatPlugin):
@ -28,20 +26,23 @@ class TXTInput(InputFormatPlugin):
options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto',
choices=['auto', 'block', 'single', 'print', 'unformatted'],
choices=['auto', 'block', 'single', 'print', 'unformatted', 'off'],
help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\', \'off\']\n'
'* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab '
'starts a paragraph.'
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
'starts a paragraph.\n'
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents. '
'Tries to determine structure and reformat the differentiate elements.\n'
'* off: Don\'t modify the paragraph structure. This is useful when combined with '
'Markdown or Textile formatting to ensure no formatting is lost.')),
OptionRecommendation(name='formatting_type', recommended_value='auto',
choices=['auto', 'none', 'heuristic', 'textile', 'markdown'],
choices=['auto', 'plain', 'heuristic', 'textile', 'markdown'],
help=_('Formatting used within the document.'
'* auto: Automatically decide which formatting processor to use.\n'
'* none: Do not process the document formatting. Everything is a '
'* plain: Do not process the document formatting. Everything is a '
'paragraph and no styling is applied.\n'
'* heuristic: Process using heuristics to determine formatting such '
'as chapter headings and italic text.\n'
@ -64,18 +65,17 @@ class TXTInput(InputFormatPlugin):
txt = ''
log.debug('Reading text from file...')
length = 0
# [(u'path', mime),]
# Extract content from zip archive.
if file_ext == 'txtz':
log.debug('De-compressing content to temporary directory...')
with TemporaryDirectory('_untxtz') as tdir:
zf = ZipFile(stream)
zf.extractall(tdir)
zf = ZipFile(stream)
zf.extractall('.')
txts = glob.glob(os.path.join(tdir, '*.txt'))
for t in txts:
with open(t, 'rb') as tf:
txt += tf.read()
for x in walk('.'):
if os.path.splitext(x)[1].lower() == '.txt':
with open(x, 'rb') as tf:
txt += tf.read() + '\n\n'
else:
txt = stream.read()
@ -134,7 +134,7 @@ class TXTInput(InputFormatPlugin):
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
txt = separate_paragraphs_single_line(txt)
else:
elif options.paragraph_type == 'block':
txt = separate_hard_scene_breaks(txt)
txt = block_to_single_line(txt)
@ -178,7 +178,7 @@ class TXTInput(InputFormatPlugin):
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
base = os.getcwdu()
if hasattr(stream, 'name'):
if file_ext != 'txtz' and hasattr(stream, 'name'):
base = os.path.dirname(stream.name)
fname = os.path.join(base, 'index.html')
c = 0
@ -190,16 +190,16 @@ class TXTInput(InputFormatPlugin):
htmlfile.write(html.encode('utf-8'))
odi = options.debug_pipeline
options.debug_pipeline = None
# Generate oeb from htl conversion.
# Generate oeb from html conversion.
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
{})
options.debug_pipeline = odi
os.remove(htmlfile.name)
# Set metadata from file.
from calibre.customize.ui import get_file_type_metadata
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
mi = get_file_type_metadata(stream, file_ext)
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
return oeb

View File

@ -126,7 +126,7 @@ def separate_hard_scene_breaks(txt):
return '\n%s\n' % line
else:
return line
txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt)
txt = re.sub(u'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
return txt
def block_to_single_line(txt):

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
__all__ = ["Unihandecoder"]
'''
Decode unicode text to an ASCII representation of the text.
Translate unicode characters to ASCII.
Inspired from John Schember's unidecode library which was created as part
of calibre.
Copyright(c) 2009, John Schember
Tranliterate the string from unicode characters to ASCII in Chinese and others.
'''
import unicodedata
class Unihandecoder(object):
preferred_encoding = None
decoder = None
def __init__(self, lang="zh", encoding='utf-8'):
self.preferred_encoding = encoding
lang = lang.lower()
if lang[:2] == u'ja':
from calibre.ebooks.unihandecode.jadecoder import Jadecoder
self.decoder = Jadecoder()
elif lang[:2] == u'kr' or lang == u'korean':
from calibre.ebooks.unihandecode.krdecoder import Krdecoder
self.decoder = Krdecoder()
elif lang[:2] == u'vn' or lang == u'vietnum':
from calibre.ebooks.unihandecode.vndecoder import Vndecoder
self.decoder = Vndecoder()
else: #zh and others
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
self.decoder = Unidecoder()
def decode(self, text):
try:
unicode # python2
if not isinstance(text, unicode):
try:
text = unicode(text)
except:
try:
text = text.decode(self.preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
except: # python3, str is unicode
pass
#at first unicode normalize it. (see Unicode standards)
ntext = unicodedata.normalize('NFKC', text)
return self.decoder.decode(ntext)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
# coding:utf8
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text for Japanese.
Translate unicode string to ASCII roman string.
API is based on the python unidecode,
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
and perl module Text::Unidecode
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
This functionality is owned by Kakasi Japanese processing engine.
Copyright (c) 2010 Hiroshi Miura
'''
import re
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
class Jadecoder(Unidecoder):
kakasi = None
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(JACODES)
self.kakasi = kakasi()
def decode(self, text):
try:
result=self.kakasi.do(text)
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
except:
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text in Korean.
Based on unidecoder.
'''
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
class Krdecoder(Unidecoder):
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(HANCODES)

View File

@ -0,0 +1,5 @@
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
kakasi
__all__ = ["pykakasi"]

View File

@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
# h2a.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
class H2a (object):
H2a_table = {
u"\u3041":"a", u"\u3042":"a",
u"\u3043":"i", u"\u3044":"i",
u"\u3045":"u", u"\u3046":"u",
u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
u"\u3046\u309b\u3049":"vo",
u"\u3047":"e", u"\u3048":"e",
u"\u3049":"o", u"\u304a":"o",
u"\u304b":"ka", u"\u304c":"ga",
u"\u304d":"ki", u"\u304d\u3041":"kya",
u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
u"\u304e":"gi", u"\u3050\u3083":"gya",
u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
u"\u304f":"ku", u"\u3050":"gu",
u"\u3051":"ke", u"\u3052":"ge",
u"\u3053":"ko", u"\u3054":"go",
u"\u3055":"sa", u"\u3056":"za",
u"\u3057":"shi", u"\u3057\u3083":"sha",
u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
u"\u3058":"ji", u"\u3058\u3083":"ja",
u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
u"\u3059":"su", u"\u305a":"zu",
u"\u305b":"se", u"\u305c":"ze",
u"\u305d":"so", u"\u305e":"zo",
u"\u305f":"ta", u"\u3060":"da",
u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
u"\u3062":"ji", u"\u3062\u3083":"ja",
u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
u"\u3063":"tsu",
u"\u3063\u3046\u309b":"vvu",
u"\u3063\u3046\u309b\u3041":"vva",
u"\u3063\u3046\u309b\u3043":"vvi",
u"\u3063\u3046\u309b\u3047":"vve",
u"\u3063\u3046\u309b\u3049":"vvo",
u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
u"\u3063\u3071":"ppa",
u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
u"\u3063\u3075\u3049":"ffo",
u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
u"\u3063\u307a":"ppe",
u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
u"\u3063\u307d":"ppo",
u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
u"\u3063\u3088":"yyo",
u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
u"\u3063\u308a\u3087":"rryo",
u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
u"\u3063\u308d":"rro",
u"\u3064":"tsu", u"\u3065":"zu",
u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
u"\u3068":"to", u"\u3069":"do",
u"\u306a":"na",
u"\u306b":"ni", u"\u306b\u3083":"nya",
u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
u"\u3072":"hi", u"\u3072\u3083":"hya",
u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
u"\u3073":"bi", u"\u3073\u3083":"bya",
u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
u"\u3074":"pi", u"\u3074\u3083":"pya",
u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
u"\u3075":"fu", u"\u3075\u3041":"fa",
u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
u"\u3075\u3049":"fo",
u"\u3076":"bu", u"\u3077":"pu",
u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
u"\u307e":"ma",
u"\u307f":"mi", u"\u307f\u3083":"mya",
u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
u"\u3083":"ya", u"\u3084":"ya",
u"\u3085":"yu", u"\u3086":"yu",
u"\u3087":"yo", u"\u3088":"yo",
u"\u3089":"ra",
u"\u308a":"ri", u"\u308a\u3083":"rya",
u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
u"\u308e":"wa", u"\u308f":"wa",
u"\u3090":"i", u"\u3091":"e",
u"\u3092":"wo", u"\u3093":"n",
u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
u"\u3093\u304a":"n'o",
}
# this class is Borg
_shared_state = {}
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
return self
def isHiragana(self, char):
return ( 0x3040 < ord(char) and ord(char) < 0x3094)
def convert(self, text):
Hstr = ""
max_len = -1
r = min(4, len(text)+1)
for x in xrange(r):
if text[:x] in self.H2a_table:
if max_len < x:
max_len = x
Hstr = self.H2a_table[text[:x]]
return (Hstr, max_len)

View File

@ -0,0 +1,564 @@
芦蘆
壱一
苅刈
舘館
曽曾
菟兎
島嶋
盃杯
冨富
峯峰
亘亙
弌一
乘乗
亂乱
豫予
亊事
弍二
亞亜
亰京
从従
仭仞
佛仏
來来
儘侭
伜倅
假仮
會会
做作
傳伝
僞偽
價価
儉倹
兒児
兔兎
竸競
兩両
囘回
册冊
冢塚
冩写
决決
冱冴
冰氷
况況
凉涼
處処
凾函
刄刃
刔抉
刧劫
剩剰
劍剣
劔剣
劒剣
剱剣
劑剤
辨弁
勞労
勳勲
勵励
勸勧
區区
卆卒
丗世
凖準
夘卯
卻却
卷巻
厠廁
厦廈
厮廝
厰廠
參参
雙双
咒呪
單単
噐器
營営
嚏嚔
嚴厳
囑嘱
囓齧
圀国
圈圏
國国
圍囲
圓円
團団
圖図
埀垂
埓埒
塲場
壞壊
墮堕
壓圧
壘塁
壥廛
壤壌
壯壮
壺壷
壹一
壻婿
壽寿
夂夊
夛多
梦夢
竒奇
奧奥
奬奨
侫佞
姙妊
嫻嫺
孃嬢
學学
斈学
寃冤
寇冦
寢寝
寫写
寶宝
寳宝
尅剋
將将
專専
對対
尓爾
尢尤
屆届
屬属
峽峡
嶌嶋
嵜崎
崙崘
嵳嵯
嶽岳
巛川
巵卮
帋紙
帶帯
幤幣
廐厩
廏厩
廣広
廚厨
廢廃
廳庁
廰庁
廸迪
弃棄
弉奘
彜彝
彈弾
彌弥
弯彎
徃往
徑径
從従
徠来
悳徳
恠怪
恆恒
悧俐
惡悪
惠恵
忰悴
惱悩
愼慎
愽博
慘惨
慚慙
憇憩
應応
懷懐
懴懺
戀恋
戞戛
戰戦
戲戯
拔抜
拏拿
擔担
拜拝
拂払
挾挟
搜捜
插挿
搖揺
攝摂
攪撹
據拠
擇択
擧拳
舉拳
抬擡
擴拡
攜携
攵攴
攷考
收収
效効
敕勅
敍叙
敘叙
數数
變変
斷断
旙旛
昜陽
晄晃
晉晋
晝昼
晰晢
暎映
曉暁
暸瞭
昿曠
曵曳
朖朗
朞期
霸覇
杤栃
杰傑
枩松
檜桧
條条
檮梼
梹檳
棊棋
棧桟
棕椶
楙茂
榮栄
槨椁
樂楽
權権
樞枢
樣様
樓楼
橢楕
檢検
櫻桜
鬱欝
盜盗
飮飲
歐嘔
歡歓
歸帰
殘残
殱殲
殼殻
毆殴
毓育
氣気
沒没
泪涙
濤涛
渕淵
渊淵
淨浄
淺浅
滿満
溂剌
溪渓
灌潅
滯滞
澁渋
澀渋
潛潜
濳潜
澂澄
澑溜
澤沢
濟済
濕湿
濱浜
濾滬
灣湾
烱炯
烟煙
熈煕
熏燻
燒焼
爐炉
爭争
爲為
爼俎
犁犂
犹猶
犲豺
狹狭
獎奨
默黙
獨独
獸獣
獵猟
獻献
珎珍
璢瑠
瑯琅
珱瓔
瓣弁
甞嘗
甼町
畄留
畍界
畊耕
畆畝
畧略
畫画
當当
畴疇
疊畳
疉畳
疂畳
癡痴
發発
皃猊
皈帰
皹皸
盖蓋
盡尽
蘯盪
眞真
眦眥
礦鉱
礪砺
碎砕
碯瑙
祕秘
祿禄
齋斎
禪禅
禮礼
禀稟
稱称
稻稲
稾稿
穗穂
穩穏
龝穐
穰穣
窗窓
竈竃
窰窯
竊窃
竝並
筺筐
笋筍
箟箘
筝箏
簔蓑
籠篭
籘籐
籖籤
粹粋
糺糾
絲糸
經経
總総
緜綿
縣県
縱縦
繪絵
繩縄
繼継
緕纃
續続
纖繊
纎繊
纜繿
缺欠
罐缶
罸罰
羃冪
羣群
羮羹
譱善
翆翠
翦剪
耻恥
聟婿
聨聯
聲声
聰聡
聽聴
肅粛
冐冒
脉脈
腦脳
腟膣
膓腸
膸髄
膽胆
臈臘
臟臓
臺台
與与
舊旧
舍舎
舖舗
舩船
艢檣
舮艫
艷艶
莖茎
莊荘
莵兎
菷帚
萠萌
蕚萼
蒂蔕
萬万
葢蓋
蘂蕊
蕋蕊
藪薮
藏蔵
藝芸
藥薬
蘓蘇
乕虎
號号
蠣蛎
蝨虱
蠅蝿
螢蛍
蟆蟇
蟲虫
蠏蟹
蟷螳
蟒蠎
蠶蚕
蠧蠹
蠻蛮
衂衄
衞衛
袵衽
裝装
襃褒
褝襌
覩睹
覺覚
覽覧
觀観
觧解
觸触
誡戒
謌歌
諡謚
謠謡
證証
譛譖
譯訳
譽誉
讀読
讓譲
讚賛
豐豊
貉狢
貍狸
貎猊
豼貔
貘獏
戝財
貭質
貳弐
貮弐
賤賎
賣売
贊賛
賍贓
赱走
踈疎
踴踊
躰体
軆体
軈軅
軣轟
輕軽
輙輒
輌輛
轉転
辭辞
辯弁
迯逃
逹達
逎遒
遞逓
遲遅
邊辺
邉辺
邨村
鄰隣
醉酔
醫医
釀醸
釋釈
釡釜
釼剣
銕鉄
錢銭
鎭鎮
鐵鉄
鐡鉄
鑒鑑
鑄鋳
鑛鉱
鈩鑪
鑚鑽
閇閉
濶闊
關関
阯址
陷陥
險険
隱隠
隸隷
襍雑
雜雑
靈霊
靜静
靱靭
韭韮
韲齏
韵韻
顏顔
顯顕
飃飄
餘余
餝飾
餠餅
騷騒
驅駆
驛駅
驗験
髓髄
體体
髮髪
鬪闘
鰺鯵
鰛鰮
鳬鳧
鳫鴈
鵄鴟
鵞鵝
鷄鶏
鷏鷆
鹽塩
麥麦
麸麩
麪麺
點点
黨党
皷鼓
鼡鼠
齊斉
齒歯
齡齢
龜亀
槇槙
遙遥
瑤瑶
凜凛
熙煕

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
# j2h.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original Copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
import re
class J2H (object):
kanwa = None
cl_table = [
"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
"k", "", "", "", "", "", "", "", "", ""]
def __init__(self):
self.kanwa = jisyo()
def isKanji(self, c):
return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
def isCletter(self, l, c):
if (ord(u"") <= ord(c) and ord(c) <= 0x309f) and ( l in self.cl_table[ord(c) - ord(u"")-1]):
return True
return False
def itaiji_conv(self, text):
r = []
for c in text:
if c in self.kanwa.itaijidict:
r.append(c)
for c in r:
text = re.sub(c, self.kanwa.itaijidict[c], text)
return text
def convert(self, text):
max_len = 0
Hstr = ""
table = self.kanwa.load_jisyo(text[0])
if table is None:
return ("", 0)
for (k,v) in table.iteritems():
length = len(k)
if len(text) >= length:
if text.startswith(k):
for (yomi, tail) in v:
if tail is '':
if max_len < length:
Hstr = yomi
max_len = length
elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
Hstr=''.join([yomi,text[length]])
max_len = length+1
return (Hstr, max_len)

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# jisyo.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
from cPickle import load
import anydbm,marshal
from zlib import decompress
import os
import calibre.utils.resources as resources
class jisyo (object):
kanwadict = None
itaijidict = None
kanadict = None
jisyo_table = {}
# this class is Borg
_shared_state = {}
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
return self
def __init__(self):
if self.kanwadict is None:
dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
self.kanwadict = anydbm.open(dictpath,'r')
if self.itaijidict is None:
itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
itaiji_pkl = open(itaijipath, 'rb')
self.itaijidict = load(itaiji_pkl)
if self.kanadict is None:
kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
kanadict_pkl = open(kanadictpath, 'rb')
self.kanadict = load(kanadict_pkl)
def load_jisyo(self, char):
try:#python2
key = "%04x"%ord(unicode(char))
except:#python3
key = "%04x"%ord(char)
try: #already exist?
table = self.jisyo_table[key]
except:
try:
table = self.jisyo_table[key] = marshal.loads(decompress(self.kanwadict[key]))
except:
return None
return table

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
# k2a.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
class K2a (object):
kanwa = None
def __init__(self):
self.kanwa = jisyo()
def isKatakana(self, char):
return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
def convert(self, text):
Hstr = ""
max_len = -1
r = min(10, len(text)+1)
for x in xrange(r):
if text[:x] in self.kanwa.kanadict:
if max_len < x:
max_len = x
Hstr = self.kanwa.kanadict[text[:x]]
return (Hstr, max_len)

View File

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
# kakasi.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original Copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
class kakasi(object):
j2h = None
h2a = None
k2a = None
def __init__(self):
self.j2h = J2H()
self.h2a = H2a()
self.k2a = K2a()
def do(self, text):
otext = ''
i = 0
while True:
if i >= len(text):
break
if self.j2h.isKanji(text[i]):
(t, l) = self.j2h.convert(text[i:])
if l <= 0:
otext = otext + text[i]
i = i + 1
continue
i = i + l
m = 0
tmptext = ""
while True:
if m >= len(t):
break
(s, n) = self.h2a.convert(t[m:])
if n <= 0:
break
m = m + n
tmptext = tmptext+s
if i >= len(text):
otext = otext + tmptext.capitalize()
else:
otext = otext + tmptext.capitalize() +' '
elif self.h2a.isHiragana(text[i]):
tmptext = ''
while True:
(t, l) = self.h2a.convert(text[i:])
tmptext = tmptext+t
i = i + l
if i >= len(text):
otext = otext + tmptext
break
elif not self.h2a.isHiragana(text[i]):
otext = otext + tmptext + ' '
break
elif self.k2a.isKatakana(text[i]):
tmptext = ''
while True:
(t, l) = self.k2a.convert(text[i:])
tmptext = tmptext+t
i = i + l
if i >= len(text):
otext = otext + tmptext
break
elif not self.k2a.isKatakana(text[i]):
otext = otext + tmptext + ' '
break
else:
otext = otext + text[i]
i += 1
return otext

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,317 @@
;; Kana-Alphabet mapping dictionary
;;
;; To use this mapping table,
;; you should unicode normalize NKFC form.
;;
;; basic mapping
;;
a ァ
a ア
ba バ
bba ッバ
bbe ッベ
bbi ッビ
bbo ッボ
bbu ッブ
bbya ッビャ
bbyo ッビョ
bbyu ッビュ
be ベ
bi ビ
bo ボ
bu ブ
bya ビャ
byo ビョ
byu ビュ
cha チャ
che チェ
chi チ
cho チョ
chu チュ
da ダ
dda ッダ
dde ッデ
ddo ッド
de デ
di ディ
do ド
e ェ
e エ
e ヱ
fa ファ
fe フェ
ffa ッファ
ffe ッフェ
ffi ッフィ
ffo ッフォ
ffu ッフ
fi フィ
fo フォ
fu フ
ga ガ
ge ゲ
gga ッガ
gge ッゲ
ggi ッギ
ggo ッゴ
ggu ッグ
ggya ッギャ
ggyo ッギョ
ggyu ッギュ
gi ギ
go ゴ
gu グ
gya グャ
gyo ギョ
gyu ギゥ
ha ハ
he ヘ
hha ッハ
hhe ッヘ
hhi ッヒ
hho ッホ
hhya ッヒャ
hhyo ッヒョ
hhyu ッヒュ
hi ヒ
ho ホ
hya ヒャ
hyo ヒョ
hyu ヒュ
i ィ
i イ
i ヰ
ja ジャ
ja ヂャ
ji ジ
ji ヂ
jja ッジャ
jji ッジ
jji ッヂ
jjo ッジョ
jju ッジュ
jjya ッヂャ
jjyo ッヂョ
jjyu ッヂュ
jo ジョ
jo ヂョ
ju ジュ
ju ヂュ
ka カ
ka ヵ
ke ケ
ke ヶ
ki キ
kka ッカ
kke ッケ
kki ッキ
kko ッコ
kku ック
kkya ッキャ
kkyo ッキョ
kkyu ッキュ
ko コ
ku ク
kya キァ
kyo キォ
kyu キゥ
ma マ
me メ
mi ミ
mo モ
mu ム
mya ミャ
myo ミョ
myu ミュ
n ン
n'a ンア
n'e ンエ
n'i ンイ
n'o ンオ
n'u ンウ
na ナ
ne ネ
ni ニ
no
nu ヌ
nya ニャ
nyo ニョ
nyu ニュ
o ォ
o オ
pa パ
pe ペ
pi ピ
po ポ
ppa ッパ
ppe ッペ
ppi ッピ
ppo ッポ
ppu ップ
ppya ッピャ
ppyo ッピョ
ppyu ッピュ
pu プ
pya ピャ
pyo ピョ
pyu ピュ
ra ラ
re レ
ri リ
ro ロ
rra ッラ
rre ッレ
rri ッリ
rro ッロ
rru ッル
rrya ッリャ
rryo ッリョ
rryu ッリュ
ru ル
rya リャ
ryo リョ
ryu リュ
sa サ
se セ
sha シャ
shi シ
sho ショ
shu シュ
so ソ
ssa ッサ
sse ッセ
ssha ッシャ
sshi ッシ
ssho ッショ
sshu ッシュ
sso ッソ
ssu ッス
su ス
ta タ
tcha ッチャ
tchi ッチ
tcho ッチョ
tchu ッチュ
te テ
to ト
tsu ッ
tsu ツ
tta ッタ
tte ッテ
tto ット
ttsu ッツ
u ゥ
u ウ
va ヴァ
ve ヴェ
vi ヴィ
vo ヴォ
vu ヴ
vva ッヴァ
vve ッヴェ
vvi ッヴィ
vvo ッヴォ
vvu ッヴ
wa ヮ
wa ワ
wo ヲ
ya ャ
ya ヤ
yo ョ
yo ヨ
yu ュ
yu ユ
yya ッヤ
yyo ッヨ
yyu ッユ
za ザ
ze ゼ
zo ゾ
zu ズ
zu ヅ
zza ッザ
zzo ッゾ
zzu ッズ
zzu ッヅ
;;
;; extended characters
;;
;;
;; gairai terms
;;
all オール
algrism アルゴリズム
answer アンサー
base ベース
begineer ビギナー
connection コネクション
contents コンテンツ
creator クリエーター
comic コミック
comics コミックス
culture カルチャー
debug デバッグ
debugging デバッギング
design デザイン
digital デジタル
dillenma ジレンマ
directory ディレクトリ
disk ディスク
document ドキュメント
download ダウンロード
electric エレクトリック
facebook フェイスブック
firefox ファイアーフォックス
folder フォルダ
format フォーマット
forum フォーラム
fox フォックス
free フリー
gnome ノーム
gnu グヌー
gozilla ゴジラ
guide ガイド
harvard ハーバード
help ヘルプ
highlight ハイライト
japan ジャパン
journal ジャーナル
library ライブラリ
line ライン
love ラヴ
love ラブ
mail メール
main メイン
mystery ミステリ
mozilla モジラ
network ネットワーク
next ネクスト
new ニュー
news ニュース
native ネイティブ
online オンライン
open オープン
professional プロフェッショナル
profile プロファイル
programmer プログラマ
sample サンプル
series シリーズ
share シェア
social ソーシャル
society ソサエティ
software ソフトウエア
source ソース
street ストリート
system システム
tag タグ
text テキスト
thunderbird サンダーバード
training トレーニング
twitter ツイッター
unicode ユニコード
wall ウオール
wall ウォール
welcome ウェルカム
welcome ウエルカム
wikinomics ウィキノミクス
york ヨーク

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,17 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text. Transliterate
unicode characters to ASCII.
Decode unicode text to an ASCII representation of the text in Chinese.
Transliterate unicode characters to ASCII based on chinese pronounce.
Derived from John Schember's unidecode library. Which was created
as part of calibre.
Copyright(c) 2009, John Schember <john@nachtimwald.com>
Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
is based on the perl module Text::Unidecode
@ -55,29 +60,20 @@ it under the same terms as Perl itself.
'''
import re
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
from calibre.constants import preferred_encoding
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
class Unidecoder(object):
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(HANCODES)
def decode(self, text):
'''
Tranliterate the string from unicode characters to ASCII.
'''
# The keys for CODEPOINTS is unicode characters, we want to be sure the
# input text is unicode.
if not isinstance(text, unicode):
try:
text = unicode(text)
except:
try:
text = text.decode(preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
# Replace characters larger than 127 with their ASCII equivelent.
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
text)
return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)
def replace_point(self, codepoint):
'''
@ -87,7 +83,7 @@ class Unidecoder(object):
# Split the unicode character xABCD into parts 0xAB and 0xCD.
# 0xAB represents the group within CODEPOINTS to query and 0xCD
# represents the position in the list of characters for the group.
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
return self.codepoints[self.code_group(codepoint)][self.grouped_point(
codepoint)]
except:
return '?'
@ -97,12 +93,18 @@ class Unidecoder(object):
Find what group character is a part of.
'''
# Code groups withing CODEPOINTS take the form 'xAB'
return u'x%02x' % (ord(unicode(character)) >> 8)
try:#python2
return 'x%02x' % (ord(unicode(character)) >> 8)
except:
return 'x%02x' % (ord(character) >> 8)
def grouped_point(self, character):
'''
Return the location the replacement character is in the list for a
the group character is a part of.
'''
return ord(unicode(character)) & 255
try:#python2
return ord(unicode(character)) & 255
except:
return ord(character) & 255

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text in Vietnamese.
'''
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
class Vndecoder(Unidecoder):
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(HANCODES)

File diff suppressed because it is too large Load Diff

View File

@ -137,14 +137,18 @@ def _config():
help=_('Automatically download the cover, if available'))
c.add_opt('enforce_cpu_limit', default=True,
help=_('Limit max simultaneous jobs to number of CPUs'))
c.add_opt('tag_browser_hidden_categories', default=set(),
help=_('tag browser categories not to display'))
c.add_opt('gui_layout', choices=['wide', 'narrow'],
help=_('The layout of the user interface'), default='wide')
c.add_opt('show_avg_rating', default=True,
help=_('Show the average rating per item indication in the tag browser'))
c.add_opt('disable_animations', default=False,
help=_('Disable UI animations'))
# This option is no longer used. It remains for compatibility with upgrades
# so the value can be migrated
c.add_opt('tag_browser_hidden_categories', default=set(),
help=_('tag browser categories not to display'))
c.add_opt
return ConfigProxy(c)

View File

@ -204,7 +204,8 @@ class AddAction(InterfaceAction):
]
to_device = self.gui.stack.currentIndex() != 0
if to_device:
filters = [(_('Supported books'), self.gui.device_manager.device.FORMATS)]
fmts = self.gui.device_manager.device.settings().format_map
filters = [(_('Supported books'), fmts)]
books = choose_files(self.gui, 'add books dialog dir', 'Select books',
filters=filters)

View File

@ -271,11 +271,6 @@ class DeleteAction(InterfaceAction):
partial(self.library_ids_deleted, current_row=row))
# Device view is visible.
else:
if not confirm('<p>'+_('The selected books will be '
'<b>permanently deleted</b> '
'from your device. Are you sure?')
+'</p>', 'device_delete_books', self.gui):
return
if self.gui.stack.currentIndex() == 1:
view = self.gui.memory_view
elif self.gui.stack.currentIndex() == 2:
@ -283,8 +278,14 @@ class DeleteAction(InterfaceAction):
else:
view = self.gui.card_b_view
paths = view.model().paths(rows)
ids = view.model().indices(rows)
if not confirm('<p>'+_('The selected books will be '
'<b>permanently deleted</b> '
'from your device. Are you sure?')
+'</p>', 'device_delete_books', self.gui):
return
job = self.gui.remove_paths(paths)
self.delete_memory[job] = (paths, view.model())
view.model().mark_for_deletion(job, rows)
view.model().mark_for_deletion(job, ids, rows_are_ids=True)
self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)

View File

@ -158,6 +158,8 @@ class MultiCompleteComboBox(EnComboBox):
# item that matches case insensitively
c = self.lineEdit().completer()
c.setCaseSensitivity(Qt.CaseSensitive)
self.dummy_model = CompleteModel(self)
c.setModel(self.dummy_model)
def update_items_cache(self, complete_items):
self.lineEdit().update_items_cache(complete_items)

View File

@ -551,7 +551,11 @@ class BulkBool(BulkBase, Bool):
def setup_ui(self, parent):
self.make_widgets(parent, QComboBox)
items = [_('Yes'), _('No'), _('Undefined')]
items = [_('Yes'), _('No')]
if tweaks['bool_custom_columns_are_tristate'] == 'no':
items.append('')
else:
items.append(_('Undefined'))
icons = [I('ok.png'), I('list_remove.png'), I('blank.png')]
self.main_widget.blockSignals(True)
for icon, text in zip(icons, items):
@ -560,7 +564,10 @@ class BulkBool(BulkBase, Bool):
def getter(self):
val = self.main_widget.currentIndex()
return {2: None, 1: False, 0: True}[val]
if tweaks['bool_custom_columns_are_tristate'] == 'no':
return {2: False, 1: False, 0: True}[val]
else:
return {2: None, 1: False, 0: True}[val]
def setter(self, val):
val = {None: 2, False: 1, True: 0}[val]
@ -576,6 +583,14 @@ class BulkBool(BulkBase, Bool):
val = False
self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify)
def a_c_checkbox_changed(self):
if not self.ignore_change_signals:
if tweaks['bool_custom_columns_are_tristate'] == 'no' and \
self.main_widget.currentIndex() == 2:
self.a_c_checkbox.setChecked(False)
else:
self.a_c_checkbox.setChecked(True)
class BulkInt(BulkBase):
def setup_ui(self, parent):

View File

@ -1292,6 +1292,16 @@ class DeviceMixin(object): # {{{
to both speed up matching and to count matches.
'''
if not self.device_manager.is_device_connected:
return False
# It might be possible to get here without having initialized the
# library view. In this case, simply give up
try:
db = self.library_view.model().db
except:
return False
string_pat = re.compile('(?u)\W|[_]')
def clean_string(x):
x = x.lower() if x else ''
@ -1299,26 +1309,19 @@ class DeviceMixin(object): # {{{
update_metadata = prefs['manage_device_metadata'] == 'on_connect'
get_covers = False
if update_metadata and self.device_manager.is_device_connected:
if self.device_manager.device.WANTS_UPDATED_THUMBNAILS:
get_covers = True
# Force a reset if the caches are not initialized
if reset or not hasattr(self, 'db_book_title_cache'):
# Build a cache (map) of the library, so the search isn't On**2
db_book_title_cache = {}
db_book_uuid_cache = {}
# It might be possible to get here without having initialized the
# library view. In this case, simply give up
try:
db = self.library_view.model().db
except:
return False
get_covers = False
if update_metadata and self.device_manager.is_device_connected:
if self.device_manager.device.WANTS_UPDATED_THUMBNAILS:
get_covers = True
for id in db.data.iterallids():
mi = db.get_metadata(id, index_is_id=True, get_cover=get_covers)
title = clean_string(mi.title)
for id_ in db.data.iterallids():
title = clean_string(db.title(id_, index_is_id=True))
if title not in db_book_title_cache:
db_book_title_cache[title] = \
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
@ -1326,14 +1329,14 @@ class DeviceMixin(object): # {{{
# and author, then remember the last one. That is OK, because as
# we can't tell the difference between the books, one is as good
# as another.
if mi.authors:
authors = clean_string(authors_to_string(mi.authors))
db_book_title_cache[title]['authors'][authors] = mi
if mi.author_sort:
aus = clean_string(mi.author_sort)
db_book_title_cache[title]['author_sort'][aus] = mi
db_book_title_cache[title]['db_ids'][mi.application_id] = mi
db_book_uuid_cache[mi.uuid] = mi
authors = clean_string(db.authors(id_, index_is_id=True))
if authors:
db_book_title_cache[title]['authors'][authors] = id_
if db.author_sort(id_, index_is_id=True):
aus = clean_string(db.author_sort(id_, index_is_id=True))
db_book_title_cache[title]['author_sort'][aus] = id_
db_book_title_cache[title]['db_ids'][id_] = id_
db_book_uuid_cache[db.uuid(id_, index_is_id=True)] = id_
self.db_book_title_cache = db_book_title_cache
self.db_book_uuid_cache = db_book_uuid_cache
@ -1341,19 +1344,22 @@ class DeviceMixin(object): # {{{
# in_library field. If the UUID matches a book in the library, then
# do not consider that book for other matching. In all cases set
# the application_id to the db_id of the matching book. This value
# will be used by books_on_device to indicate matches.
# will be used by books_on_device to indicate matches. While we are
# going by, update the metadata for a book if automatic management is on
for booklist in booklists:
for book in booklist:
book.in_library = None
if getattr(book, 'uuid', None) in self.db_book_uuid_cache:
id_ = db_book_uuid_cache[book.uuid]
if update_metadata:
book.smart_update(self.db_book_uuid_cache[book.uuid],
book.smart_update(db.get_metadata(id_,
index_is_id=True,
get_cover=get_covers),
replace_metadata=True)
book.in_library = 'UUID'
# ensure that the correct application_id is set
book.application_id = \
self.db_book_uuid_cache[book.uuid].application_id
book.application_id = id_
continue
# No UUID exact match. Try metadata matching.
book_title = clean_string(book.title)
@ -1363,21 +1369,25 @@ class DeviceMixin(object): # {{{
# will match if any of the db_id, author, or author_sort
# also match.
if getattr(book, 'application_id', None) in d['db_ids']:
# app_id already matches a db_id. No need to set it.
if update_metadata:
book.smart_update(d['db_ids'][book.application_id],
id_ = getattr(book, 'application_id', None)
book.smart_update(db.get_metadata(id_,
index_is_id=True,
get_cover=get_covers),
replace_metadata=True)
book.in_library = 'APP_ID'
# app_id already matches a db_id. No need to set it.
continue
# Sonys know their db_id independent of the application_id
# in the metadata cache. Check that as well.
if getattr(book, 'db_id', None) in d['db_ids']:
if update_metadata:
book.smart_update(d['db_ids'][book.db_id],
book.smart_update(db.get_metadata(book.db_id,
index_is_id=True,
get_cover=get_covers),
replace_metadata=True)
book.in_library = 'DB_ID'
book.application_id = \
d['db_ids'][book.db_id].application_id
book.application_id = book.db_id
continue
# We now know that the application_id is not right. Set it
# to None to prevent book_on_device from accidentally
@ -1389,19 +1399,23 @@ class DeviceMixin(object): # {{{
# either can appear as the author
book_authors = clean_string(authors_to_string(book.authors))
if book_authors in d['authors']:
id_ = d['authors'][book_authors]
if update_metadata:
book.smart_update(d['authors'][book_authors],
replace_metadata=True)
book.smart_update(db.get_metadata(id_,
index_is_id=True,
get_cover=get_covers),
replace_metadata=True)
book.in_library = 'AUTHOR'
book.application_id = \
d['authors'][book_authors].application_id
book.application_id = id_
elif book_authors in d['author_sort']:
id_ = d['author_sort'][book_authors]
if update_metadata:
book.smart_update(d['author_sort'][book_authors],
book.smart_update(db.get_metadata(id_,
index_is_id=True,
get_cover=get_covers),
replace_metadata=True)
book.in_library = 'AUTH_SORT'
book.application_id = \
d['author_sort'][book_authors].application_id
book.application_id = id_
else:
# Book definitely not matched. Clear its application ID
book.application_id = None

View File

@ -9,15 +9,16 @@ import textwrap
from PyQt4.Qt import QWidget, QListWidgetItem, Qt, QVariant, SIGNAL, \
QLabel, QLineEdit, QCheckBox
from calibre.gui2 import error_dialog
from calibre.gui2 import error_dialog, question_dialog
from calibre.gui2.device_drivers.configwidget_ui import Ui_ConfigWidget
from calibre.utils.formatter import validation_formatter
from calibre.ebooks import BOOK_EXTENSIONS
class ConfigWidget(QWidget, Ui_ConfigWidget):
def __init__(self, settings, all_formats, supports_subdirs,
must_read_metadata, supports_use_author_sort,
extra_customization_message):
extra_customization_message, device):
QWidget.__init__(self)
Ui_ConfigWidget.__init__(self)
@ -25,9 +26,15 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
self.settings = settings
all_formats = set(all_formats)
self.calibre_known_formats = device.FORMATS
self.device_name = device.get_gui_name()
if device.USER_CAN_ADD_NEW_FORMATS:
all_formats = set(all_formats) | set(BOOK_EXTENSIONS)
format_map = settings.format_map
disabled_formats = list(set(all_formats).difference(format_map))
for format in format_map + disabled_formats:
for format in format_map + list(sorted(disabled_formats)):
item = QListWidgetItem(format, self.columns)
item.setData(Qt.UserRole, QVariant(format))
item.setFlags(Qt.ItemIsEnabled|Qt.ItemIsUserCheckable|Qt.ItemIsSelectable)
@ -110,6 +117,18 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
return self.opt_use_author_sort.isChecked()
def validate(self):
formats = set(self.format_map())
extra = formats - set(self.calibre_known_formats)
if extra:
fmts = sorted([x.upper() for x in extra])
if not question_dialog(self, _('Unknown formats'),
_('You have enabled the <b>{0}</b> formats for'
' your {1}. The {1} may not support them.'
' If you send these formats to your {1} they '
'may not work. Are you sure?').format(
(', '.join(fmts)), self.device_name)):
return False
tmpl = unicode(self.opt_save_template.text())
try:
validation_formatter.validate(tmpl)

View File

@ -213,6 +213,8 @@ class CheckLibraryDialog(QDialog):
self.log = QTreeWidget(self)
self.log.itemChanged.connect(self.item_changed)
self.log.itemExpanded.connect(self.item_expanded_or_collapsed)
self.log.itemCollapsed.connect(self.item_expanded_or_collapsed)
self._layout.addWidget(self.log)
self.check_button = QPushButton(_('&Run the check again'))
@ -333,11 +335,15 @@ class CheckLibraryDialog(QDialog):
for check in CHECKS:
builder(t, checker, check)
t.setColumnWidth(0, 200)
t.setColumnWidth(1, 400)
t.resizeColumnToContents(0)
t.resizeColumnToContents(1)
self.delete_button.setEnabled(False)
self.text_results = '\n'.join(plaintext)
def item_expanded_or_collapsed(self, item):
self.log.resizeColumnToContents(0)
self.log.resizeColumnToContents(1)
def item_changed(self, item, column):
self.fix_button.setEnabled(False)
for it in self.top_level_items.values():

View File

@ -912,6 +912,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
def series_changed(self, *args):
self.write_series = True
self.autonumber_series.setEnabled(True)
def s_r_remove_query(self, *args):
if self.query_field.currentIndex() == 0:

View File

@ -303,6 +303,9 @@
<layout class="QHBoxLayout" name="HLayout_3">
<item>
<widget class="QCheckBox" name="autonumber_series">
<property name="enabled">
<bool>false</bool>
</property>
<property name="toolTip">
<string>If not checked, the series number for the books will be set to 1.
If checked, selected books will be automatically numbered, in the order
@ -1006,8 +1009,8 @@ not multiple and the destination field is multiple</string>
<rect>
<x>0</x>
<y>0</y>
<width>938</width>
<height>268</height>
<width>197</width>
<height>60</height>
</rect>
</property>
<layout class="QGridLayout" name="testgrid">

View File

@ -99,8 +99,8 @@ class TagListEditor(QDialog, Ui_TagListEditor):
return
self.available_tags.editItem(item)
def delete_tags(self, item=None):
deletes = self.available_tags.selectedItems() if item is None else [item]
def delete_tags(self):
deletes = self.available_tags.selectedItems()
if not deletes:
error_dialog(self, _('No items selected'),
_('You must select at least one items from the list.')).exec_()

View File

@ -120,11 +120,10 @@ class BooksModel(QAbstractTableModel): # {{{
def set_device_connected(self, is_connected):
self.device_connected = is_connected
self.refresh_ondevice()
def refresh_ondevice(self):
self.db.refresh_ondevice()
self.refresh() # does a resort()
self.resort()
self.research()
def set_book_on_device_func(self, func):
@ -685,7 +684,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.dc[col] = functools.partial(bool_type, idx=idx)
self.dc_decorator[col] = functools.partial(
bool_type_decorator, idx=idx,
bool_cols_are_tristate=tweaks['bool_custom_columns_are_tristate'] == 'yes')
bool_cols_are_tristate=tweaks['bool_custom_columns_are_tristate'] != 'no')
elif datatype == 'rating':
self.dc[col] = functools.partial(rating_type, idx=idx)
elif datatype == 'series':
@ -826,7 +825,7 @@ class BooksModel(QAbstractTableModel): # {{{
return False
val = int(value.toInt()[0]) if column == 'rating' else \
value.toDate() if column in ('timestamp', 'pubdate') else \
unicode(value.toString())
unicode(value.toString()).strip()
id = self.db.id(row)
books_to_refresh = set([id])
if column == 'rating':

View File

@ -26,12 +26,19 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('limit_search_columns_to', prefs, setting=CommaSeparatedList)
fl = gui.library_view.model().db.field_metadata.get_search_terms()
self.opt_limit_search_columns_to.update_items_cache(fl)
self.clear_history_button.clicked.connect(self.clear_histories)
def refresh_gui(self, gui):
gui.search.search_as_you_type(config['search_as_you_type'])
gui.library_view.model().set_highlight_only(config['highlight_search_matches'])
gui.search.do_search()
def clear_histories(self, *args):
for key, val in config.defaults.iteritems():
if key.endswith('_search_history') and isinstance(val, list):
config[key] = []
self.gui.search.clear_history()
if __name__ == '__main__':
app = QApplication([])
test_widget('Interface', 'Search')

View File

@ -77,7 +77,7 @@
</layout>
</widget>
</item>
<item row="3" column="0">
<item row="4" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -90,13 +90,23 @@
</property>
</spacer>
</item>
<item row="3" column="0">
<widget class="QPushButton" name="clear_history_button">
<property name="toolTip">
<string>Clear search histories from all over calibre. Including the book list, e-book viewer, fetch news dialog, etc.</string>
</property>
<property name="text">
<string>Clear search &amp;histories</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>
<customwidget>
<class>MultiCompleteLineEdit</class>
<extends>QLineEdit</extends>
<header>calibre/gui2.complete.h</header>
<header>calibre/gui2/complete.h</header>
</customwidget>
</customwidgets>
<resources/>

View File

@ -114,6 +114,9 @@ class SearchBox2(QComboBox): # {{{
def text(self):
return self.currentText()
def clear_history(self, *args):
QComboBox.clear(self)
def clear(self, emit_search=True):
self.normalize_state()
self.setEditText('')

View File

@ -116,7 +116,14 @@ class TagsView(QTreeView): # {{{
self.set_new_model(self._model.get_filter_categories_by())
def set_database(self, db, tag_match, sort_by):
self.hidden_categories = config['tag_browser_hidden_categories']
self.hidden_categories = db.prefs.get('tag_browser_hidden_categories', None)
# migrate from config to db prefs
if self.hidden_categories is None:
self.hidden_categories = config['tag_browser_hidden_categories']
db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
else:
self.hidden_categories = set(self.hidden_categories)
old = getattr(self, '_model', None)
if old is not None:
old.break_cycles()
@ -234,7 +241,7 @@ class TagsView(QTreeView): # {{{
gprefs['tags_browser_partition_method'] = category
elif action == 'defaults':
self.hidden_categories.clear()
config.set('tag_browser_hidden_categories', self.hidden_categories)
self.db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
self.set_new_model()
except:
return

View File

@ -17,16 +17,16 @@ from calibre.gui2.viewer.bookmarkmanager import BookmarkManager
from calibre.gui2.widgets import ProgressIndicator
from calibre.gui2.main_window import MainWindow
from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
info_dialog, error_dialog, open_url, available_height
info_dialog, error_dialog, open_url, available_height, gprefs
from calibre.ebooks.oeb.iterator import EbookIterator
from calibre.ebooks import DRMError
from calibre.constants import islinux, isfreebsd, isosx
from calibre.constants import islinux, isfreebsd, isosx, filesystem_encoding
from calibre.utils.config import Config, StringConfig, dynamic
from calibre.gui2.search_box import SearchBox2
from calibre.ebooks.metadata import MetaInformation
from calibre.customize.ui import available_input_formats
from calibre.gui2.viewer.dictionary import Lookup
from calibre import as_unicode
from calibre import as_unicode, force_unicode, isbytestring
class TOCItem(QStandardItem):
@ -160,6 +160,12 @@ class HelpfulLineEdit(QLineEdit):
self.setPalette(self.gray)
self.setText(self.HELP_TEXT)
class RecentAction(QAction):
def __init__(self, path, parent):
self.path = path
QAction.__init__(self, os.path.basename(path), parent)
class EbookViewer(MainWindow, Ui_EbookViewer):
STATE_VERSION = 1
@ -284,8 +290,26 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
ca = self.view.copy_action
ca.setShortcut(QKeySequence.Copy)
self.addAction(ca)
self.open_history_menu = QMenu()
self.build_recent_menu()
self.action_open_ebook.setMenu(self.open_history_menu)
self.open_history_menu.triggered[QAction].connect(self.open_recent)
w = self.tool_bar.widgetForAction(self.action_open_ebook)
w.setPopupMode(QToolButton.MenuButtonPopup)
self.restore_state()
def build_recent_menu(self):
m = self.open_history_menu
m.clear()
count = 0
for path in gprefs.get('viewer_open_history', []):
if count > 9:
break
if os.path.exists(path):
m.addAction(RecentAction(path, m))
count += 1
def closeEvent(self, e):
self.save_state()
return MainWindow.closeEvent(self, e)
@ -425,6 +449,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
if files:
self.load_ebook(files[0])
def open_recent(self, action):
self.load_ebook(action.path)
def font_size_larger(self, checked):
frac = self.view.magnify_fonts()
self.action_font_size_larger.setEnabled(self.view.multiplier() < 3)
@ -647,6 +674,17 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.action_table_of_contents.setChecked(True)
else:
self.action_table_of_contents.setChecked(False)
if isbytestring(pathtoebook):
pathtoebook = force_unicode(pathtoebook, filesystem_encoding)
vh = gprefs.get('viewer_open_history', [])
try:
vh.remove(pathtoebook)
except:
pass
vh.insert(0, pathtoebook)
gprefs.set('viewer_open_history', vh[:50])
self.build_recent_menu()
self.action_table_of_contents.setDisabled(not self.iterator.toc)
self.current_book_has_toc = bool(self.iterator.toc)
self.current_title = title

View File

@ -528,7 +528,7 @@ class ResultCache(SearchQueryParser): # {{{
location[i] = db_col[loc]
# get the tweak here so that the string lookup and compare aren't in the loop
bools_are_tristate = tweaks['bool_custom_columns_are_tristate'] == 'yes'
bools_are_tristate = tweaks['bool_custom_columns_are_tristate'] != 'no'
for loc in location: # location is now an array of field indices
if loc == db_col['authors']:
@ -812,7 +812,10 @@ class SortKeyGenerator(object):
val = self.string_sort_key(val)
elif dt == 'bool':
val = {True: 1, False: 2, None: 3}.get(val, 3)
if tweaks['bool_custom_columns_are_tristate'] == 'no':
val = {True: 1, False: 2, None: 2}.get(val, 2)
else:
val = {True: 1, False: 2, None: 3}.get(val, 3)
yield val

View File

@ -12,6 +12,8 @@ def clean_ascii_chars(txt, charlist=None):
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
This is all control chars except \\t,\\n and \\r
'''
if not txt:
return ''
global _ascii_pat
if _ascii_pat is None:
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))

View File

@ -6,12 +6,12 @@ meaning as possible.
import os
from math import ceil
from calibre.ebooks.unidecode.unidecoder import Unidecoder
from calibre import sanitize_file_name
from calibre.constants import preferred_encoding, iswindows
udc = Unidecoder()
from calibre.utils.localization import get_udc
def ascii_text(orig):
udc = get_udc()
try:
ascii = udc.decode(orig)
except:

View File

@ -169,3 +169,13 @@ def set_qt_translator(translator):
return translator.load(p)
return False
_udc = None
def get_udc():
global _udc
if _udc is None:
from calibre.ebooks.unihandecode import Unihandecoder
_udc = Unihandecoder(lang=get_lang())
return _udc