merge from trunk

This commit is contained in:
ldolse 2011-02-16 09:37:24 +08:00
commit 3d20aadbd3
75 changed files with 148407 additions and 4075 deletions

View File

@ -193,6 +193,33 @@ License: GPL-3
The full text of the GPL is distributed as in The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL-3 on Debian systems. /usr/share/common-licenses/GPL-3 on Debian systems.
Files: src/calibre/ebooks/unihandecode/pykakasi/*
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
Copyright: 1992, Hironobu Takahashi
License: GPL-2+
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL on Debian systems.
Files: resources/kanwadict2.db
Files: resources/itaijidict2.pickle
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
License: GPL-2+
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL on Debian systems.
Files: src/calibre/ebooks/unihandecode/*
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
Copyright: 2009, John Schember
Copyright: 2007, Russell Norris
Copyright: 2001, Sean M. Burke
License: GPL-3, Perl
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL-3 on Debian systems.
Files: src/encutils/__init__.py Files: src/encutils/__init__.py
Copyright: 2005-2008: Christof Hoeke Copyright: 2005-2008: Christof Hoeke
License: LGPL-3+, CC-BY-3.0 License: LGPL-3+, CC-BY-3.0

View File

@ -13,12 +13,12 @@
id="Layer_1" id="Layer_1"
x="0px" x="0px"
y="0px" y="0px"
width="134.77701" width="200"
height="199.99901" height="200"
viewBox="0 0 134.777 199.999" viewBox="0 0 199.99999 199.99999"
enable-background="new 0 0 595.28 841.89" enable-background="new 0 0 595.28 841.89"
xml:space="preserve" xml:space="preserve"
inkscape:version="0.47 r22583" inkscape:version="0.48.0 r9654"
sodipodi:docname="news.svg"><metadata sodipodi:docname="news.svg"><metadata
id="metadata26"><rdf:RDF><cc:Work id="metadata26"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
@ -38,22 +38,22 @@
guidetolerance="10" guidetolerance="10"
inkscape:pageopacity="0" inkscape:pageopacity="0"
inkscape:pageshadow="2" inkscape:pageshadow="2"
inkscape:window-width="640" inkscape:window-width="1680"
inkscape:window-height="489" inkscape:window-height="997"
id="namedview22" id="namedview22"
showgrid="false" showgrid="false"
inkscape:zoom="0.28032165" inkscape:zoom="0.28032165"
inkscape:cx="67.389001" inkscape:cx="67.389001"
inkscape:cy="99.722002" inkscape:cy="99.722002"
inkscape:window-x="0" inkscape:window-x="-4"
inkscape:window-y="41" inkscape:window-y="30"
inkscape:window-maximized="0" inkscape:window-maximized="1"
inkscape:current-layer="Layer_1" /> inkscape:current-layer="Layer_1" />
<g <g
id="g3" id="g3"
transform="translate(-230.25101,-320.668)"> transform="translate(-194.57771,-320.66701)">
<polygon <polygon
points="360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 " points="286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 "
id="polygon5" id="polygon5"
style="fill:#ffffff" /> style="fill:#ffffff" />
<linearGradient <linearGradient
@ -73,7 +73,7 @@
id="stop10" /> id="stop10" />
</linearGradient> </linearGradient>
<polygon <polygon
points="360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 " points="286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 "
id="polygon12" id="polygon12"
style="fill:url(#SVGID_1_)" /> style="fill:url(#SVGID_1_)" />
<g <g
@ -81,14 +81,16 @@
<path <path
d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z" d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
id="path16" id="path16"
style="fill:#993720" /> style="fill:#993720"
inkscape:connector-curvature="0" />
</g> </g>
<g <g
id="g18"> id="g18">
<path <path
d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z" d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
id="path20" id="path20"
style="fill:#f0efef" /> style="fill:#f0efef"
inkscape:connector-curvature="0" />
</g> </g>
</g> </g>
</svg> </svg>

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -105,7 +105,7 @@ bool_custom_columns_are_tristate = 'yes'
# title within authors. # title within authors.
sort_columns_at_startup = None sort_columns_at_startup = None
#; Control how dates are displayed #: Control how dates are displayed
# Format to be used for publication date and the timestamp (date). # Format to be used for publication date and the timestamp (date).
# A string controlling how the publication date is displayed in the GUI # A string controlling how the publication date is displayed in the GUI
# d the day as number without a leading zero (1 to 31) # d the day as number without a leading zero (1 to 31)
@ -245,19 +245,6 @@ sony_collection_name_template='{value}{category:| (|)}'
sony_collection_sorting_rules = [] sony_collection_sorting_rules = []
#: Create search terms to apply a query across several built-in search terms.
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
# Example: create the term 'myseries' that when used as myseries:foo would
# search all of the search categories 'series', '#myseries', and '#myseries2':
# grouped_search_terms={'myseries':['series','#myseries', '#myseries2']}
# Example: two search terms 'a' and 'b' both that search 'tags' and '#mytags':
# grouped_search_terms={'a':['tags','#mytags'], 'b':['tags','#mytags']}
# Note: You cannot create a search term that is a duplicate of an existing term.
# Such duplicates will be silently ignored. Also note that search terms ignore
# case. 'MySearch' and 'mysearch' are the same term.
grouped_search_terms = {}
#: Control how tags are applied when copying books to another library #: Control how tags are applied when copying books to another library
# Set this to True to ensure that tags in 'Tags to add when adding # Set this to True to ensure that tags in 'Tags to add when adding
# a book' are added when copying books to another library # a book' are added when copying books to another library

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 834 B

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
adevarul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Adevarul(BasicNewsRecipe):
title = u'Adev\u0103rul'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Adevarul'
category = 'Ziare,Stiri,Romania'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
,dict(name='div', attrs={'class':'bd'})
]
remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
,dict(name='form', attrs={'id':'bb-comment-create-form'})
]
remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

0
resources/recipes/aprospect.recipe Executable file → Normal file
View File

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
capital.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Capital(BasicNewsRecipe):
title = 'Capital'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
remove_javascript = True
publisher = 'Capital'
cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'single one_article'})
]
remove_tags = [ dict(name='div', attrs={'class':'single_details'})
, dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'})
]
feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
catavencu.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Catavencu(BasicNewsRecipe):
title = u'Academia Ca\u0163avencu'
__author__ = u'Silviu Cotoar\u0103'
description = 'Tagma cum laude'
publisher = 'Catavencu'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare'
encoding = 'utf-8'
cover_url = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='ul', attrs={'class':'articles'})
]
remove_tags = [
dict(name='div', attrs={'class':['tools']})
, dict(name='div', attrs={'class':['share']})
, dict(name='div', attrs={'class':['category']})
, dict(name='div', attrs={'id':['comments']})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comments'})
]
feeds = [
(u'Feeds', u'http://catavencu.ro/feed/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
standaard.be standaard.be
''' '''
@ -9,14 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DeStandaard(BasicNewsRecipe): class DeStandaard(BasicNewsRecipe):
title = u'De Standaard' title = u'De Standaard'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
language = 'nl_BE' language = 'nl_BE'
description = u'News from Belgium in Dutch' description = u'News from Belgium in Dutch'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'http://www.standaard.be/extra/css/images/masthead/logo_340x45.png'
publication_type = 'newspaper'
keep_only_tags = [dict(name='div' , attrs={'id':['intro','continued']})] keep_only_tags = [dict(name='div' , attrs={'id':['intro','continued']})]
@ -27,4 +28,4 @@ class DeStandaard(BasicNewsRecipe):
return article.get('guid', None) return article.get('guid', None)
def print_version(self, url): def print_version(self, url):
return url.replace('/Detail.aspx?','/PrintArtikel.aspx?') return url.replace('/artikel/detail.aspx?','/Artikel/PrintArtikel.aspx?')

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
gandul.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gandul(BasicNewsRecipe):
title = u'G\u00E2ndul'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Gandul'
description = 'Cotidian Online'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
remove_tags = [
dict(name='a', attrs={'class':'photo'})
, dict(name='div', attrs={'class':'ad'})
]
feeds = [
(u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
hotnews.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hotnews(BasicNewsRecipe):
title = 'Hotnews'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Hotnews'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.hotnews.ro/images/new/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'title'})
,dict(name='div', attrs={'id':'articleContent'})
]
feeds = [ (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate')
,(u'English', u'http://www.hotnews.ro/rss/english')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
jurnalul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class JurnalulNational(BasicNewsRecipe):
title = u'Jurnalul Na\u0163ional'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Jurnalul National'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.jurnalul.ro/images/sigla.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'h3 art_title'})
,dict(name='div', attrs={'class':'only_text'})
]
feeds = [
(u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html')
,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html')
,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html')
,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html')
,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html')
,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html')
,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html')
,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html')
,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Lifehacker(BasicNewsRecipe): class Lifehacker(BasicNewsRecipe):
title = 'Lifehacker' title = 'Lifehacker'
__author__ = 'NA' __author__ = 'Kovid Goyal'
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live." description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
publisher = 'lifehacker.com' publisher = 'lifehacker.com'
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy' category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
@ -32,14 +32,20 @@ class Lifehacker(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
remove_attributes = ['width','height'] remove_attributes = ['width', 'height', 'style']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1') remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})] keep_only_tags = [dict(id='container')]
remove_tags_after = dict(attrs={'class':'contactinfo'}) remove_tags_after = dict(attrs={'class':'post-body'})
remove_tags = [
dict(id="sharemenu"),
{'class': 'related'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')] feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
return self.adeify_images(soup) return self.adeify_images(soup)
def print_version(self, url):
return url.replace('#!', '?_escaped_fragment_=')

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
mediafax.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Mediafax(BasicNewsRecipe):
title = 'Mediafax'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Mediafax'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'news tabs-container'})
]
remove_tags = [
dict(name='ul', attrs={'class':['CategoryNews']})
,dict(name='div', attrs={'class':['read']})
]
remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ]
feeds = [
(u'Actualitate', u'http://www.mediafax.ro/rss/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
money.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoneyRo(BasicNewsRecipe):
title = 'Money Ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'MoneyRo'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://assets.moneyweb.ro/images/logo_money.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'id':'titluArticol'})
, dict(name='img', attrs={'id':'objImage'})
, dict(name='div', attrs={'class':'leftColumnArticle'})
]
remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ]
remove_tags = [ dict(name='div', attrs={'id':'ads'})
, dict(name='div', attrs={'id':'aus'})
, dict(name='div', attrs={'id':'bb-comment-create-form'})
, dict(name='div', attrs={'id':'articleTags'})
, dict(name='div', attrs={'class':'breadcrumb'})
]
feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -38,7 +38,7 @@ class Pagina12(BasicNewsRecipe):
keep_only_tags = [dict(attrs={'class':'uitstekendekeus'})] keep_only_tags = [dict(attrs={'class':'uitstekendekeus'})]
remove_tags = [ remove_tags = [
dict(name=['meta','base','link','object','embed']) dict(name=['meta','base','link','object','embed'])
,dict(attrs={'class':['reclamespace','tags-and-sharing']}) ,dict(attrs={'class':['reclamespace','tags-and-sharing','sharing-is-caring']})
] ]
remove_attributes=['lang'] remove_attributes=['lang']

View File

@ -668,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
try: try:
#remove "Related content" bar #remove "Related content" bar
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ']}) runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ','articleInline runaroundLeft lastArticleInline']})
if runAroundsFound: if runAroundsFound:
for runAround in runAroundsFound: for runAround in runAroundsFound:
#find all section headers #find all section headers

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
prosport.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Prosport(BasicNewsRecipe):
title = 'Prosport'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Prosport'
description = u'\u0218tiri Sportive din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania,Sport'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'a-title'})
,dict(name='div', attrs={'class':'a-entry'})
]
remove_tags = [ dict(name='div', attrs={'class':'utils'})
,dict(name='div', attrs={'class':'g-slide'})
]
feeds = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
realitatea.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Realitatea(BasicNewsRecipe):
title = 'Realitatea'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Realitatea'
description = u'\u0218tiri din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://assets.realitatea.ro/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'articleTitle '})
,dict(name='div', attrs={'class':'articleBody'})
]
remove_tags = [ dict(name='div', attrs={'id':'aus'}) ]
feeds = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
smh.com.au smh.com.au
''' '''
@ -22,7 +22,11 @@ class Smh_au(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg' masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = ' h1{font-family: Georgia,"Times New Roman",Times,serif } body{font-family: Arial,Helvetica,sans-serif} .cT-imageLandscape{font-size: x-small} ' extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
body{font-family: Arial,Helvetica,sans-serif}
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -38,7 +42,11 @@ class Smh_au(BasicNewsRecipe):
] ]
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})] remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
keep_only_tags = [dict(name='div',attrs={'id':'content'})] keep_only_tags = [dict(name='div',attrs={'id':'content'})]
remove_attributes = ['width','height'] remove_tags = [
dict(attrs={'class':'hidden'}),
dict(name=['link','meta','base','embed','object','iframe'])
]
remove_attributes = ['width','height','lang']
def parse_index(self): def parse_index(self):
articles = [] articles = []
@ -66,3 +74,14 @@ class Smh_au(BasicNewsRecipe):
,'description':description ,'description':description
}) })
return [(self.tag_to_string(soup.find('title')), articles)] return [(self.tag_to_string(soup.find('title')), articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('bod'):
item.name = 'div'
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
standard.money.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class StandardMoneyRo(BasicNewsRecipe):
title = 'Standard Money Ro'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Standard Money'
description = 'Portal de Business'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'post-title'})
, dict(name='div', attrs={'class':'content_post'})
]
feeds = [
(u'Actualitate', u'http://standard.money.ro/feed')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
zf.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ZiarulFinanciar(BasicNewsRecipe):
title = 'Ziarul Financiar'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Business'
publisher = 'Ziarul Financiar'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
feeds = [
(u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -6,9 +6,10 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, cPickle import os, cPickle, re, anydbm, shutil
from zlib import compress
from setup import Command, basenames from setup import Command, basenames, __appname__
def get_opts_from_parser(parser): def get_opts_from_parser(parser):
def do_opt(opt): def do_opt(opt):
@ -26,6 +27,9 @@ class Resources(Command):
description = 'Compile various needed calibre resources' description = 'Compile various needed calibre resources'
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
'ebooks', 'unihandecode', 'pykakasi')
def run(self, opts): def run(self, opts):
scripts = {} scripts = {}
for x in ('console', 'gui'): for x in ('console', 'gui'):
@ -101,11 +105,107 @@ class Resources(Command):
import json import json
json.dump(function_dict, open(dest, 'wb'), indent=4) json.dump(function_dict, open(dest, 'wb'), indent=4)
self.run_kakasi(opts)
def run_kakasi(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
dic = anydbm.open(out, 'c')
for (k, v) in self.records.iteritems():
dic[k] = compress(cPickle.dumps(v, -1))
dic.close()
def clean(self): def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'): for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle') x = self.j(self.RESOURCES, x+'.pickle')
if os.path.exists(x): if os.path.exists(x):
os.remove(x) os.remove(x)
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
if os.path.exists(kakasi):
shutil.rmtree(kakasi)

View File

@ -2,11 +2,14 @@ import os.path
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import textwrap, os, glob, functools import textwrap, os, glob, functools, re
from calibre import guess_type
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
from calibre.constants import numeric_version from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.oeb.base import OEB_IMAGES
# To archive plugins {{{ # To archive plugins {{{
class HTML2ZIP(FileTypePlugin): class HTML2ZIP(FileTypePlugin):
@ -82,6 +85,77 @@ class PML2PMLZ(FileTypePlugin):
return of.name return of.name
class TXT2TXTZ(FileTypePlugin):
name = 'TXT to TXTZ'
author = 'John Schember'
description = _('Create a TXTZ archive when a TXT file is imported '
'containing Markdown or Textile references to images. The referenced '
'images as well as the TXT file are added to the archive.')
version = numeric_version
file_types = set(['txt'])
supported_platforms = ['windows', 'osx', 'linux']
on_import = True
def _get_image_references(self, txt, base_dir):
images = []
# Textile
for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown inline
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown reference
refs = {}
for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
if m.group('id') and m.group('path'):
refs[m.group('id')] = m.group('path')
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt):
path = refs.get(m.group('id'), None)
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Remove duplicates
return list(set(images))
def run(self, path_to_ebook):
with open(path_to_ebook, 'rb') as ebf:
txt = ebf.read()
base_dir = os.path.dirname(path_to_ebook)
images = self._get_image_references(txt, base_dir)
if images:
# Create TXTZ and put file plus images inside of it.
import zipfile
of = self.temporary_file('_plugin_txt2txtz.txtz')
txtz = zipfile.ZipFile(of.name, 'w')
# Add selected TXT file to archive.
txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
# metadata.opf
if os.path.exists(os.path.join(base_dir, 'metadata.opf')):
txtz.write(os.path.join(base_dir, 'metadata.opf'), 'metadata.opf', zipfile.ZIP_DEFLATED)
else:
from calibre.ebooks.metadata.txt import get_metadata
with open(path_to_ebook, 'rb') as ebf:
mi = get_metadata(ebf)
opf = metadata_to_opf(mi)
txtz.writestr('metadata.opf', opf, zipfile.ZIP_DEFLATED)
# images
for image in images:
txtz.write(os.path.join(base_dir, image), image)
txtz.close()
return of.name
else:
# No images so just import the TXT file.
return path_to_ebook
# }}} # }}}
# Metadata reader plugins {{{ # Metadata reader plugins {{{
@ -511,14 +585,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers AmazonCovers, DoubanCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers, Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
NiceBooksCovers] NiceBooksCovers]
plugins += [ plugins += [
ComicInput, ComicInput,

View File

@ -24,6 +24,7 @@ from calibre.utils.logging import Log
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from PIL import Image as PILImage from PIL import Image as PILImage
from lxml import etree
if isosx: if isosx:
try: try:
@ -2515,23 +2516,23 @@ class ITUNES(DriverBase):
fnames = zf_opf.namelist() fnames = zf_opf.namelist()
opf = [x for x in fnames if '.opf' in x][0] opf = [x for x in fnames if '.opf' in x][0]
if opf: if opf:
opf_raw = cStringIO.StringIO(zf_opf.read(opf)) opf_tree = etree.fromstring(zf_opf.read(opf))
soup = BeautifulSoup(opf_raw.getvalue()) ns_map = opf_tree.nsmap.keys()
opf_raw.close() for item in ns_map:
ns = opf_tree.nsmap[item]
# Touch existing calibre timestamp md_el = opf_tree.find(".//{%s}metadata" % ns)
md = soup.find('metadata') if md_el is not None:
if md: ts = md_el.find('.//{%s}meta[@name="calibre:timestamp"]')
ts = md.find('meta',attrs={'name':'calibre:timestamp'}) if ts:
if ts: timestamp = ts.get('content')
timestamp = ts['content'] old_ts = parse_date(timestamp)
old_ts = parse_date(timestamp) metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour, old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo) else:
else: metadata.timestamp = now()
metadata.timestamp = now() if DEBUG:
if DEBUG: self.log.info(" add timestamp: %s" % metadata.timestamp)
self.log.info(" add timestamp: %s" % metadata.timestamp) break
else: else:
metadata.timestamp = now() metadata.timestamp = now()
if DEBUG: if DEBUG:
@ -2839,7 +2840,7 @@ class ITUNES(DriverBase):
def _xform_metadata_via_plugboard(self, book, format): def _xform_metadata_via_plugboard(self, book, format):
''' Transform book metadata from plugboard templates ''' ''' Transform book metadata from plugboard templates '''
if DEBUG: if DEBUG:
self.log.info(" ITUNES._update_metadata_from_plugboard()") self.log.info(" ITUNES._xform_metadata_via_plugboard()")
if self.plugboard_func: if self.plugboard_func:
pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards) pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)

View File

@ -11,44 +11,42 @@ Generates and writes an APNX page mapping file.
import struct import struct
import uuid import uuid
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.utils.logging import default_log
class APNXBuilder(object): class APNXBuilder(object):
''' '''
2300 characters of uncompressed text per page. This is Create an APNX file using a pseudo page mapping.
not meant to map 1 to 1 to a print book but to be a
close enough measure.
A test book was chosen and the characters were counted
on one page. This number was round to 2240 then 60
characters of markup were added to the total giving
2300.
Uncompressed text length is used because it's easily
accessible in MOBI files (part of the header). Also,
It's faster to work off of the length then to
decompress and parse the actual text.
A better but much more resource intensive and slower
method to calculate the page length would be to parse
the uncompressed text. For each paragraph we would
want to find how many lines it would occupy in a paper
back book. 70 characters per line and 32 lines per page.
So divide the number of characters (minus markup) in
each paragraph by 70. If there are less than 70
characters in the paragraph then it is 1 line. Then,
count every 32 lines and mark that location as a page.
''' '''
def write_apnx(self, mobi_file_path, apnx_path): def write_apnx(self, mobi_file_path, apnx_path, accurate=True):
# Check that this is really a MOBI file.
with open(mobi_file_path, 'rb') as mf: with open(mobi_file_path, 'rb') as mf:
phead = PdbHeaderReader(mf) ident = PdbHeaderReader(mf).identity()
r0 = phead.section_data(0) if ident != 'BOOKMOBI':
text_length = struct.unpack('>I', r0[4:8])[0] raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
pages = self.get_pages(text_length) # Get the pages depending on the chosen parser
pages = []
if accurate:
try:
pages = self.get_pages_accurate(mobi_file_path)
except:
# Fall back to the fast parser if we can't
# use the accurate one. Typically this is
# due to the file having DRM.
pages = self.get_pages_fast(mobi_file_path)
else:
pages = self.get_pages_fast(mobi_file_path)
if not pages:
raise Exception(_('Could not generate page mapping.'))
# Generate the APNX file from the page mapping.
apnx = self.generate_apnx(pages) apnx = self.generate_apnx(pages)
# Write the APNX.
with open(apnx_path, 'wb') as apnxf: with open(apnx_path, 'wb') as apnxf:
apnxf.write(apnx) apnxf.write(apnx)
@ -73,18 +71,126 @@ class APNXBuilder(object):
apnx += struct.pack('>H', 32) apnx += struct.pack('>H', 32)
apnx += page_header apnx += page_header
# write page values to apnx # Write page values to APNX.
for page in pages: for page in pages:
apnx += struct.pack('>L', page) apnx += struct.pack('>I', page)
return apnx return apnx
def get_pages(self, text_length): def get_pages_fast(self, mobi_file_path):
'''
2300 characters of uncompressed text per page. This is
not meant to map 1 to 1 to a print book but to be a
close enough measure.
A test book was chosen and the characters were counted
on one page. This number was round to 2240 then 60
characters of markup were added to the total giving
2300.
Uncompressed text length is used because it's easily
accessible in MOBI files (part of the header). Also,
It's faster to work off of the length then to
decompress and parse the actual text.
'''
text_length = 0
pages = [] pages = []
count = 0 count = 0
with open(mobi_file_path, 'rb') as mf:
phead = PdbHeaderReader(mf)
r0 = phead.section_data(0)
text_length = struct.unpack('>I', r0[4:8])[0]
while count < text_length: while count < text_length:
pages.append(count) pages.append(count)
count += 2300 count += 2300
return pages return pages
def get_pages_accurate(self, mobi_file_path):
'''
A more accurate but much more resource intensive and slower
method to calculate the page length.
Parses the uncompressed text. In an average paper back book
There are 32 lines per page and a maximum of 70 characters
per line.
Each paragraph starts a new line and every 70 characters
(minus markup) in a paragraph starts a new line. The
position after every 30 lines will be marked as a new
page.
This can be make more accurate by accounting for
<div class="mbp_pagebreak" /> as a new page marker.
And <br> elements as an empty line.
'''
pages = []
# Get the MOBI html.
mr = MobiReader(mobi_file_path, default_log)
if mr.book_header.encryption_type != 0:
# DRMed book
return self.get_pages_fast(mobi_file_path)
mr.extract_text()
# States
in_tag = False
in_p = False
check_p = False
closing = False
p_char_count = 0
# Get positions of every line
# A line is either a paragraph starting
# or every 70 characters in a paragraph.
lines = []
pos = -1
# We want this to be as fast as possible so we
# are going to do one pass across the text. re
# and string functions will parse the text each
# time they are called.
#
# We can can use .lower() here because we are
# not modifying the text. In this case the case
# doesn't matter just the absolute character and
# the position within the stream.
for c in mr.mobi_html.lower():
pos += 1
# Check if we are starting or stopping a p tag.
if check_p:
if c == '/':
closing = True
continue
elif c == 'p':
if closing:
in_p = False
else:
in_p = True
lines.append(pos - 2)
check_p = False
closing = False
continue
if c == '<':
in_tag = True
check_p = True
continue
elif c == '>':
in_tag = False
check_p = False
continue
if in_p and not in_tag:
p_char_count += 1
if p_char_count == 70:
lines.append(pos)
p_char_count = 0
# Every 30 lines is a new page
for i in xrange(0, len(lines), 32):
pages.append(lines[i])
return pages

View File

@ -176,6 +176,28 @@ class KINDLE2(KINDLE):
PRODUCT_ID = [0x0002, 0x0004] PRODUCT_ID = [0x0002, 0x0004]
BCD = [0x0100] BCD = [0x0100]
EXTRA_CUSTOMIZATION_MESSAGE = [
_('Send page number information when sending books') +
':::' +
_('The Kindle 3 and newer versions can use page number information '
'in MOBI files. With this option, calibre will calculate and send'
' this information to the Kindle when uploading MOBI files by'
' USB. Note that the page numbers do not correspond to any paper'
' book.'),
_('Use slower but more accurate page number generation') +
':::' +
_('There are two ways to generate the page number information. Using the more accurate '
'generator will produce pages that correspond better to a printed book. '
'However, this method is slower and will slow down sending files '
'to the Kindle.'),
]
EXTRA_CUSTOMIZATION_DEFAULT = [
True,
False,
]
OPT_APNX = 0
OPT_APNX_ACCURATE = 1
def books(self, oncard=None, end_session=True): def books(self, oncard=None, end_session=True):
bl = USBMS.books(self, oncard=oncard, end_session=end_session) bl = USBMS.books(self, oncard=oncard, end_session=end_session)
# Read collections information # Read collections information
@ -212,13 +234,17 @@ class KINDLE2(KINDLE):
''' '''
Hijacking this function to write the apnx file. Hijacking this function to write the apnx file.
''' '''
if not filepath.lower().endswith('.mobi'): opts = self.settings()
if not opts.extra_customization[self.OPT_APNX]:
return
if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'):
return return
apnx_path = '%s.apnx' % os.path.join(path, filename) apnx_path = '%s.apnx' % os.path.join(path, filename)
apnx_builder = APNXBuilder() apnx_builder = APNXBuilder()
try: try:
apnx_builder.write_apnx(filepath, apnx_path) apnx_builder.write_apnx(filepath, apnx_path, accurate=opts.extra_customization[self.OPT_APNX_ACCURATE])
except: except:
print 'Failed to generate APNX' print 'Failed to generate APNX'
import traceback import traceback

View File

@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
stream.seek(0) stream.seek(0)
return ans return ans
def normalize(x):
if isinstance(x, unicode):
import unicodedata
x = unicodedata.normalize('NFKC', x)
return x
def calibre_cover(title, author_string, series_string=None, def calibre_cover(title, author_string, series_string=None,
output_format='jpg', title_size=46, author_size=36): output_format='jpg', title_size=46, author_size=36):
title = normalize(title)
author_string = normalize(author_string)
series_string = normalize(series_string)
from calibre.utils.magick.draw import create_cover_page, TextLine from calibre.utils.magick.draw import create_cover_page, TextLine
lines = [TextLine(title, title_size), TextLine(author_string, author_size)] lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
if series_string: if series_string:

View File

@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
'with "Mikhail Gorbachiov". Also, note that in ' 'with "Mikhail Gorbachiov". Also, note that in '
'cases where there are multiple representations of a character ' 'cases where there are multiple representations of a character '
'(characters shared by Chinese and Japanese for instance) the ' '(characters shared by Chinese and Japanese for instance) the '
'representation used by the largest number of people will be ' 'representation based on the current calibre interface language will be '
'used (Chinese in the previous example).')%\ 'used.')%\
u'\u041c\u0438\u0445\u0430\u0438\u043b ' u'\u041c\u0438\u0445\u0430\u0438\u043b '
u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432' u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
) )

View File

@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
html = XMLDECL_RE.sub('', html) html = XMLDECL_RE.sub('', html)
if getattr(self.extra_opts, 'asciiize', False): if getattr(self.extra_opts, 'asciiize', False):
from calibre.ebooks.unidecode.unidecoder import Unidecoder from calibre.utils.localization import get_udc
unidecoder = Unidecoder() unihandecoder = get_udc()
html = unidecoder.decode(html) html = unihandecoder.decode(html)
if getattr(self.extra_opts, 'enable_heuristics', False): if getattr(self.extra_opts, 'enable_heuristics', False):
from calibre.ebooks.conversion.utils import HeuristicProcessor from calibre.ebooks.conversion.utils import HeuristicProcessor
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
if unsupported_unicode_chars: if unsupported_unicode_chars:
from calibre.ebooks.unidecode.unidecoder import Unidecoder from calibre.utils.localization import get_udc
unidecoder = Unidecoder() unihandecoder = get_udc()
for char in unsupported_unicode_chars: for char in unsupported_unicode_chars:
asciichar = unidecoder.decode(char) asciichar = unihandecoder.decode(char)
html = html.replace(char, asciichar) html = html.replace(char, asciichar)
return html return html

View File

@ -271,6 +271,8 @@ def check_isbn13(isbn):
return None return None
def check_isbn(isbn): def check_isbn(isbn):
if not isbn:
return None
isbn = re.sub(r'[^0-9X]', '', isbn.upper()) isbn = re.sub(r'[^0-9X]', '', isbn.upper())
if len(isbn) == 10: if len(isbn) == 10:
return check_isbn10(isbn) return check_isbn10(isbn)

View File

@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
Fetch metadata using Amazon AWS Fetch metadata using Amazon AWS
''' '''
import sys, re import sys, re
from threading import RLock
from lxml import html from lxml import html
from lxml.html import soupparser from lxml.html import soupparser
@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html from calibre.library.comments import sanitize_comments_html
asin_cache = {}
cover_url_cache = {}
cache_lock = RLock()
def find_asin(br, isbn): def find_asin(br, isbn):
q = 'http://www.amazon.com/s?field-keywords='+isbn q = 'http://www.amazon.com/s?field-keywords='+isbn
raw = br.open_novisit(q).read() raw = br.open_novisit(q).read()
@ -29,6 +34,12 @@ def find_asin(br, isbn):
return revs[0] return revs[0]
def to_asin(br, isbn): def to_asin(br, isbn):
with cache_lock:
ans = asin_cache.get(isbn, None)
if ans:
return ans
if ans is False:
return None
if len(isbn) == 13: if len(isbn) == 13:
try: try:
asin = find_asin(br, isbn) asin = find_asin(br, isbn)
@ -38,8 +49,11 @@ def to_asin(br, isbn):
asin = None asin = None
else: else:
asin = isbn asin = isbn
with cache_lock:
asin_cache[isbn] = ans if ans else False
return asin return asin
def get_social_metadata(title, authors, publisher, isbn): def get_social_metadata(title, authors, publisher, isbn):
mi = Metadata(title, authors) mi = Metadata(title, authors)
if not isbn: if not isbn:
@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
return mi return mi
return mi return mi
def get_cover_url(isbn, br):
isbn = check_isbn(isbn)
if not isbn:
return None
with cache_lock:
ans = cover_url_cache.get(isbn, None)
if ans:
return ans
if ans is False:
return None
asin = to_asin(br, isbn)
if asin:
ans = _get_cover_url(br, asin)
if ans:
with cache_lock:
cover_url_cache[isbn] = ans
return ans
from calibre.ebooks.metadata.xisbn import xisbn
for i in xisbn.get_associated_isbns(isbn):
asin = to_asin(br, i)
if asin:
ans = _get_cover_url(br, asin)
if ans:
with cache_lock:
cover_url_cache[isbn] = ans
cover_url_cache[i] = ans
return ans
with cache_lock:
cover_url_cache[isbn] = False
return None
def _get_cover_url(br, asin):
q = 'http://amzn.com/'+asin
try:
raw = br.open_novisit(q).read()
except Exception, e:
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
raise
if '<title>404 - ' in raw:
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
root = soupparser.fromstring(raw)
except:
return False
imgs = root.xpath('//img[@id="prodImage" and @src]')
if imgs:
src = imgs[0].get('src')
parts = src.split('/')
if len(parts) > 3:
bn = parts[-1]
sparts = bn.split('_')
if len(sparts) > 2:
bn = sparts[0] + sparts[-1]
return ('/'.join(parts[:-1]))+'/'+bn
return None
def get_metadata(br, asin, mi): def get_metadata(br, asin, mi):
q = 'http://amzn.com/'+asin q = 'http://amzn.com/'+asin
try: try:
@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):
def main(args=sys.argv): def main(args=sys.argv):
# Test xisbn import tempfile, os
print get_social_metadata('Learning Python', None, None, '8324616489') tdir = tempfile.gettempdir()
print br = browser()
for title, isbn in [
('Learning Python', '8324616489'), # Test xisbn
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
# Random tests
('Star Trek: Destiny: Mere Mortals', '9781416551720'),
('The Great Gatsby', '0743273567'),
]:
cpath = os.path.join(tdir, title+'.jpg')
curl = get_cover_url(isbn, br)
if curl is None:
print 'No cover found for', title
else:
open(cpath, 'wb').write(br.open_novisit(curl).read())
print 'Cover for', title, 'saved to', cpath
# Test sophisticated comment formatting print get_social_metadata(title, None, None, isbn)
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
print
# Random tests
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
print
print get_social_metadata('The Great Gatsby', None, None, '0743273567')
return 0 return 0

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import traceback, socket, re, sys import traceback, socket, sys
from functools import partial from functools import partial
from threading import Thread, Event from threading import Thread, Event
from Queue import Queue, Empty from Queue import Queue, Empty
@ -15,7 +15,6 @@ import mechanize
from calibre.customize import Plugin from calibre.customize import Plugin
from calibre import browser, prints from calibre import browser, prints
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.constants import preferred_encoding, DEBUG from calibre.constants import preferred_encoding, DEBUG
class CoverDownload(Plugin): class CoverDownload(Plugin):
@ -112,73 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{
# }}} # }}}
class LibraryThingCovers(CoverDownload): # {{{ class AmazonCovers(CoverDownload): # {{{
name = 'librarything.com covers' name = 'amazon.com covers'
description = _('Download covers from librarything.com') description = _('Download covers from amazon.com')
author = 'Kovid Goyal' author = 'Kovid Goyal'
LIBRARYTHING = 'http://www.librarything.com/isbn/'
def get_cover_url(self, isbn, br, timeout=5.):
try:
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
timeout=timeout).read().decode('utf-8', 'replace')
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = Exception(_('LibraryThing.com timed out. Try again later.'))
raise err
else:
if '/wiki/index.php/HelpThing:Verify' in src:
raise Exception('LibraryThing is blocking calibre.')
s = BeautifulSoup(src)
url = s.find('td', attrs={'class':'left'})
if url is None:
if s.find('div', attrs={'class':'highloadwarning'}) is not None:
raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
raise Exception(_('ISBN: %s not found')%isbn)
url = url.find('img')
if url is None:
raise Exception(_('LibraryThing.com server error. Try again later.'))
url = re.sub(r'_S[XY]\d+', '', url['src'])
return url
def has_cover(self, mi, ans, timeout=5.): def has_cover(self, mi, ans, timeout=5.):
return False if not mi.isbn:
if not mi.isbn or not self.site_customization:
return False return False
from calibre.ebooks.metadata.library_thing import get_browser, login from calibre.ebooks.metadata.amazon import get_cover_url
br = get_browser() br = browser()
un, _, pw = self.site_customization.partition(':')
login(br, un, pw)
try: try:
self.get_cover_url(mi.isbn, br, timeout=timeout) get_cover_url(mi.isbn, br)
self.debug('cover for', mi.isbn, 'found') self.debug('cover for', mi.isbn, 'found')
ans.set() ans.set()
except Exception, e: except Exception, e:
self.debug(e) self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.): def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn or not self.site_customization: if not mi.isbn:
return return
from calibre.ebooks.metadata.library_thing import get_browser, login from calibre.ebooks.metadata.amazon import get_cover_url
br = get_browser() br = browser()
un, _, pw = self.site_customization.partition(':')
login(br, un, pw)
try: try:
url = self.get_cover_url(mi.isbn, br, timeout=timeout) url = get_cover_url(mi.isbn, br)
cover_data = br.open_novisit(url).read() cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name)) result_queue.put((True, cover_data, 'jpg', self.name))
except Exception, e: except Exception, e:
result_queue.put((False, self.exception_to_string(e), result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name)) traceback.format_exc(), self.name))
def customization_help(self, gui=False):
ans = _('To use librarything.com you must sign up for a %sfree account%s '
'and enter your username and password separated by a : below.')
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
# }}} # }}}
def check_for_cover(mi, timeout=5.): # {{{ def check_for_cover(mi, timeout=5.): # {{{

View File

@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
from struct import pack, unpack from struct import pack, unpack
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks import normalize
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
@ -311,6 +312,7 @@ class MetadataUpdater(object):
return StreamSlicer(self.stream, start, stop) return StreamSlicer(self.stream, start, stop)
def update(self, mi): def update(self, mi):
mi.title = normalize(mi.title)
def update_exth_record(rec): def update_exth_record(rec):
recs.append(rec) recs.append(rec)
if rec[0] in self.original_exth_records: if rec[0] in self.original_exth_records:
@ -331,12 +333,12 @@ class MetadataUpdater(object):
kindle_pdoc = None kindle_pdoc = None
if mi.author_sort and pas: if mi.author_sort and pas:
authors = mi.author_sort authors = mi.author_sort
update_exth_record((100, authors.encode(self.codec, 'replace'))) update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
elif mi.authors: elif mi.authors:
authors = ';'.join(mi.authors) authors = ';'.join(mi.authors)
update_exth_record((100, authors.encode(self.codec, 'replace'))) update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
if mi.publisher: if mi.publisher:
update_exth_record((101, mi.publisher.encode(self.codec, 'replace'))) update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
if mi.comments: if mi.comments:
# Strip user annotations # Strip user annotations
a_offset = mi.comments.find('<div class="user_annotations">') a_offset = mi.comments.find('<div class="user_annotations">')
@ -345,12 +347,12 @@ class MetadataUpdater(object):
mi.comments = mi.comments[:a_offset] mi.comments = mi.comments[:a_offset]
if ad_offset >= 0: if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset] mi.comments = mi.comments[:ad_offset]
update_exth_record((103, mi.comments.encode(self.codec, 'replace'))) update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
if mi.isbn: if mi.isbn:
update_exth_record((104, mi.isbn.encode(self.codec, 'replace'))) update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
if mi.tags: if mi.tags:
subjects = '; '.join(mi.tags) subjects = '; '.join(mi.tags)
update_exth_record((105, subjects.encode(self.codec, 'replace'))) update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
if kindle_pdoc and kindle_pdoc in mi.tags: if kindle_pdoc and kindle_pdoc in mi.tags:
update_exth_record((501, str('PDOC'))) update_exth_record((501, str('PDOC')))

View File

@ -367,6 +367,9 @@ class MobiMLizer(object):
istate.attrib['src'] = elem.attrib['src'] istate.attrib['src'] = elem.attrib['src']
istate.attrib['align'] = 'baseline' istate.attrib['align'] = 'baseline'
cssdict = style.cssdict() cssdict = style.cssdict()
valign = cssdict.get('vertical-align', None)
if valign in ('top', 'bottom', 'middle'):
istate.attrib['align'] = valign
for prop in ('width', 'height'): for prop in ('width', 'height'):
if cssdict[prop] != 'auto': if cssdict[prop] != 'auto':
value = style[prop] value = style[prop]
@ -451,8 +454,11 @@ class MobiMLizer(object):
text = COLLAPSE.sub(' ', elem.text) text = COLLAPSE.sub(' ', elem.text)
valign = style['vertical-align'] valign = style['vertical-align']
not_baseline = valign in ('super', 'sub', 'text-top', not_baseline = valign in ('super', 'sub', 'text-top',
'text-bottom') 'text-bottom') or (
vtag = 'sup' if valign in ('super', 'text-top') else 'sub' isinstance(valign, (float, int)) and abs(valign) != 0)
issup = valign in ('super', 'text-top') or (
isinstance(valign, (float, int)) and valign > 0)
vtag = 'sup' if issup else 'sub'
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock: if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
vbstate = BlockState(etree.SubElement(nroot, XHTML('body'))) vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))

View File

@ -14,8 +14,9 @@ import re
from struct import pack from struct import pack
import time import time
from urlparse import urldefrag from urlparse import urldefrag
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks import normalize
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.oeb.base import OEB_DOCS from calibre.ebooks.oeb.base import OEB_DOCS
@ -1365,7 +1366,7 @@ class MobiWriter(object):
self._text_length, self._text_length,
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf) self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff) uid = random.randint(0, 0xffffffff)
title = unicode(metadata.title[0]).encode('utf-8') title = normalize(unicode(metadata.title[0])).encode('utf-8')
# The MOBI Header # The MOBI Header
# 0x0 - 0x3 # 0x0 - 0x3
@ -1523,12 +1524,12 @@ class MobiWriter(object):
items = oeb.metadata[term] items = oeb.metadata[term]
if term == 'creator': if term == 'creator':
if self._prefer_author_sort: if self._prefer_author_sort:
creators = [unicode(c.file_as or c) for c in items] creators = [normalize(unicode(c.file_as or c)) for c in items]
else: else:
creators = [unicode(c) for c in items] creators = [normalize(unicode(c)) for c in items]
items = ['; '.join(creators)] items = ['; '.join(creators)]
for item in items: for item in items:
data = self.COLLAPSE_RE.sub(' ', unicode(item)) data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
if term == 'identifier': if term == 'identifier':
if data.lower().startswith('urn:isbn:'): if data.lower().startswith('urn:isbn:'):
data = data[9:] data = data[9:]
@ -1542,7 +1543,7 @@ class MobiWriter(object):
nrecs += 1 nrecs += 1
if term == 'rights' : if term == 'rights' :
try: try:
rights = unicode(oeb.metadata.rights[0]).encode('utf-8') rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
except: except:
rights = 'Unknown' rights = 'Unknown'
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))

View File

@ -207,7 +207,14 @@ class CSSFlattener(object):
font_size = self.sbase if self.sbase is not None else \ font_size = self.sbase if self.sbase is not None else \
self.context.source.fbase self.context.source.fbase
if 'align' in node.attrib: if 'align' in node.attrib:
cssdict['text-align'] = node.attrib['align'] if tag != 'img':
cssdict['text-align'] = node.attrib['align']
else:
val = node.attrib['align']
if val in ('middle', 'bottom', 'top'):
cssdict['vertical-align'] = val
elif val in ('left', 'right'):
cssdict['text-align'] = val
del node.attrib['align'] del node.attrib['align']
if node.tag == XHTML('font'): if node.tag == XHTML('font'):
node.tag = XHTML('span') node.tag = XHTML('span')

View File

@ -4,10 +4,9 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import glob
import os import os
from calibre import _ent_pat, xml_entity_to_unicode from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect from calibre.ebooks.chardet import detect
@ -16,7 +15,6 @@ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \ normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
separate_hard_scene_breaks separate_hard_scene_breaks
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
class TXTInput(InputFormatPlugin): class TXTInput(InputFormatPlugin):
@ -28,20 +26,23 @@ class TXTInput(InputFormatPlugin):
options = set([ options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto', OptionRecommendation(name='paragraph_type', recommended_value='auto',
choices=['auto', 'block', 'single', 'print', 'unformatted'], choices=['auto', 'block', 'single', 'print', 'unformatted', 'off'],
help=_('Paragraph structure.\n' help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n' 'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\', \'off\']\n'
'* auto: Try to auto detect paragraph type.\n' '* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n' '* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n' '* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab ' '* print: Assume every line starting with 2+ spaces or a tab '
'starts a paragraph.' 'starts a paragraph.\n'
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')), '* unformatted: Most lines have hard line breaks, few/no blank lines or indents. '
'Tries to determine structure and reformat the differentiate elements.\n'
'* off: Don\'t modify the paragraph structure. This is useful when combined with '
'Markdown or Textile formatting to ensure no formatting is lost.')),
OptionRecommendation(name='formatting_type', recommended_value='auto', OptionRecommendation(name='formatting_type', recommended_value='auto',
choices=['auto', 'none', 'heuristic', 'textile', 'markdown'], choices=['auto', 'plain', 'heuristic', 'textile', 'markdown'],
help=_('Formatting used within the document.' help=_('Formatting used within the document.'
'* auto: Automatically decide which formatting processor to use.\n' '* auto: Automatically decide which formatting processor to use.\n'
'* none: Do not process the document formatting. Everything is a ' '* plain: Do not process the document formatting. Everything is a '
'paragraph and no styling is applied.\n' 'paragraph and no styling is applied.\n'
'* heuristic: Process using heuristics to determine formatting such ' '* heuristic: Process using heuristics to determine formatting such '
'as chapter headings and italic text.\n' 'as chapter headings and italic text.\n'
@ -64,18 +65,17 @@ class TXTInput(InputFormatPlugin):
txt = '' txt = ''
log.debug('Reading text from file...') log.debug('Reading text from file...')
length = 0 length = 0
# [(u'path', mime),]
# Extract content from zip archive. # Extract content from zip archive.
if file_ext == 'txtz': if file_ext == 'txtz':
log.debug('De-compressing content to temporary directory...') zf = ZipFile(stream)
with TemporaryDirectory('_untxtz') as tdir: zf.extractall('.')
zf = ZipFile(stream)
zf.extractall(tdir)
txts = glob.glob(os.path.join(tdir, '*.txt')) for x in walk('.'):
for t in txts: if os.path.splitext(x)[1].lower() == '.txt':
with open(t, 'rb') as tf: with open(x, 'rb') as tf:
txt += tf.read() txt += tf.read() + '\n\n'
else: else:
txt = stream.read() txt = stream.read()
@ -134,7 +134,7 @@ class TXTInput(InputFormatPlugin):
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
txt = preprocessor.punctuation_unwrap(length, txt, 'txt') txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
txt = separate_paragraphs_single_line(txt) txt = separate_paragraphs_single_line(txt)
else: elif options.paragraph_type == 'block':
txt = separate_hard_scene_breaks(txt) txt = separate_hard_scene_breaks(txt)
txt = block_to_single_line(txt) txt = block_to_single_line(txt)
@ -178,7 +178,7 @@ class TXTInput(InputFormatPlugin):
setattr(options, opt.option.name, opt.recommended_value) setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8' options.input_encoding = 'utf-8'
base = os.getcwdu() base = os.getcwdu()
if hasattr(stream, 'name'): if file_ext != 'txtz' and hasattr(stream, 'name'):
base = os.path.dirname(stream.name) base = os.path.dirname(stream.name)
fname = os.path.join(base, 'index.html') fname = os.path.join(base, 'index.html')
c = 0 c = 0
@ -190,7 +190,7 @@ class TXTInput(InputFormatPlugin):
htmlfile.write(html.encode('utf-8')) htmlfile.write(html.encode('utf-8'))
odi = options.debug_pipeline odi = options.debug_pipeline
options.debug_pipeline = None options.debug_pipeline = None
# Generate oeb from htl conversion. # Generate oeb from html conversion.
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
{}) {})
options.debug_pipeline = odi options.debug_pipeline = odi

View File

@ -126,7 +126,7 @@ def separate_hard_scene_breaks(txt):
return '\n%s\n' % line return '\n%s\n' % line
else: else:
return line return line
txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt) txt = re.sub(u'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
return txt return txt
def block_to_single_line(txt): def block_to_single_line(txt):

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
__all__ = ["Unihandecoder"]
'''
Decode unicode text to an ASCII representation of the text.
Translate unicode characters to ASCII.
Inspired from John Schember's unidecode library which was created as part
of calibre.
Copyright(c) 2009, John Schember
Tranliterate the string from unicode characters to ASCII in Chinese and others.
'''
import unicodedata
class Unihandecoder(object):
preferred_encoding = None
decoder = None
def __init__(self, lang="zh", encoding='utf-8'):
self.preferred_encoding = encoding
lang = lang.lower()
if lang[:2] == u'ja':
from calibre.ebooks.unihandecode.jadecoder import Jadecoder
self.decoder = Jadecoder()
elif lang[:2] == u'kr' or lang == u'korean':
from calibre.ebooks.unihandecode.krdecoder import Krdecoder
self.decoder = Krdecoder()
elif lang[:2] == u'vn' or lang == u'vietnum':
from calibre.ebooks.unihandecode.vndecoder import Vndecoder
self.decoder = Vndecoder()
else: #zh and others
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
self.decoder = Unidecoder()
def decode(self, text):
try:
unicode # python2
if not isinstance(text, unicode):
try:
text = unicode(text)
except:
try:
text = text.decode(self.preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
except: # python3, str is unicode
pass
#at first unicode normalize it. (see Unicode standards)
ntext = unicodedata.normalize('NFKC', text)
return self.decoder.decode(ntext)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
# coding:utf8
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text for Japanese.
Translate unicode string to ASCII roman string.
API is based on the python unidecode,
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
and perl module Text::Unidecode
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
This functionality is owned by Kakasi Japanese processing engine.
Copyright (c) 2010 Hiroshi Miura
'''
import re
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
class Jadecoder(Unidecoder):
kakasi = None
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(JACODES)
self.kakasi = kakasi()
def decode(self, text):
try:
result=self.kakasi.do(text)
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
except:
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text in Korean.
Based on unidecoder.
'''
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
class Krdecoder(Unidecoder):
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(HANCODES)

View File

@ -0,0 +1,5 @@
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
kakasi
__all__ = ["pykakasi"]

View File

@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
# h2a.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
class H2a (object):
H2a_table = {
u"\u3041":"a", u"\u3042":"a",
u"\u3043":"i", u"\u3044":"i",
u"\u3045":"u", u"\u3046":"u",
u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
u"\u3046\u309b\u3049":"vo",
u"\u3047":"e", u"\u3048":"e",
u"\u3049":"o", u"\u304a":"o",
u"\u304b":"ka", u"\u304c":"ga",
u"\u304d":"ki", u"\u304d\u3041":"kya",
u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
u"\u304e":"gi", u"\u3050\u3083":"gya",
u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
u"\u304f":"ku", u"\u3050":"gu",
u"\u3051":"ke", u"\u3052":"ge",
u"\u3053":"ko", u"\u3054":"go",
u"\u3055":"sa", u"\u3056":"za",
u"\u3057":"shi", u"\u3057\u3083":"sha",
u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
u"\u3058":"ji", u"\u3058\u3083":"ja",
u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
u"\u3059":"su", u"\u305a":"zu",
u"\u305b":"se", u"\u305c":"ze",
u"\u305d":"so", u"\u305e":"zo",
u"\u305f":"ta", u"\u3060":"da",
u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
u"\u3062":"ji", u"\u3062\u3083":"ja",
u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
u"\u3063":"tsu",
u"\u3063\u3046\u309b":"vvu",
u"\u3063\u3046\u309b\u3041":"vva",
u"\u3063\u3046\u309b\u3043":"vvi",
u"\u3063\u3046\u309b\u3047":"vve",
u"\u3063\u3046\u309b\u3049":"vvo",
u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
u"\u3063\u3071":"ppa",
u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
u"\u3063\u3075\u3049":"ffo",
u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
u"\u3063\u307a":"ppe",
u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
u"\u3063\u307d":"ppo",
u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
u"\u3063\u3088":"yyo",
u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
u"\u3063\u308a\u3087":"rryo",
u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
u"\u3063\u308d":"rro",
u"\u3064":"tsu", u"\u3065":"zu",
u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
u"\u3068":"to", u"\u3069":"do",
u"\u306a":"na",
u"\u306b":"ni", u"\u306b\u3083":"nya",
u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
u"\u3072":"hi", u"\u3072\u3083":"hya",
u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
u"\u3073":"bi", u"\u3073\u3083":"bya",
u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
u"\u3074":"pi", u"\u3074\u3083":"pya",
u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
u"\u3075":"fu", u"\u3075\u3041":"fa",
u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
u"\u3075\u3049":"fo",
u"\u3076":"bu", u"\u3077":"pu",
u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
u"\u307e":"ma",
u"\u307f":"mi", u"\u307f\u3083":"mya",
u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
u"\u3083":"ya", u"\u3084":"ya",
u"\u3085":"yu", u"\u3086":"yu",
u"\u3087":"yo", u"\u3088":"yo",
u"\u3089":"ra",
u"\u308a":"ri", u"\u308a\u3083":"rya",
u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
u"\u308e":"wa", u"\u308f":"wa",
u"\u3090":"i", u"\u3091":"e",
u"\u3092":"wo", u"\u3093":"n",
u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
u"\u3093\u304a":"n'o",
}
# this class is Borg
_shared_state = {}
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
return self
def isHiragana(self, char):
return ( 0x3040 < ord(char) and ord(char) < 0x3094)
def convert(self, text):
Hstr = ""
max_len = -1
r = min(4, len(text)+1)
for x in xrange(r):
if text[:x] in self.H2a_table:
if max_len < x:
max_len = x
Hstr = self.H2a_table[text[:x]]
return (Hstr, max_len)

View File

@ -0,0 +1,564 @@
芦蘆
壱一
苅刈
舘館
曽曾
菟兎
島嶋
盃杯
冨富
峯峰
亘亙
弌一
乘乗
亂乱
豫予
亊事
弍二
亞亜
亰京
从従
仭仞
佛仏
來来
儘侭
伜倅
假仮
會会
做作
傳伝
僞偽
價価
儉倹
兒児
兔兎
竸競
兩両
囘回
册冊
冢塚
冩写
决決
冱冴
冰氷
况況
凉涼
處処
凾函
刄刃
刔抉
刧劫
剩剰
劍剣
劔剣
劒剣
剱剣
劑剤
辨弁
勞労
勳勲
勵励
勸勧
區区
卆卒
丗世
凖準
夘卯
卻却
卷巻
厠廁
厦廈
厮廝
厰廠
參参
雙双
咒呪
單単
噐器
營営
嚏嚔
嚴厳
囑嘱
囓齧
圀国
圈圏
國国
圍囲
圓円
團団
圖図
埀垂
埓埒
塲場
壞壊
墮堕
壓圧
壘塁
壥廛
壤壌
壯壮
壺壷
壹一
壻婿
壽寿
夂夊
夛多
梦夢
竒奇
奧奥
奬奨
侫佞
姙妊
嫻嫺
孃嬢
學学
斈学
寃冤
寇冦
寢寝
寫写
寶宝
寳宝
尅剋
將将
專専
對対
尓爾
尢尤
屆届
屬属
峽峡
嶌嶋
嵜崎
崙崘
嵳嵯
嶽岳
巛川
巵卮
帋紙
帶帯
幤幣
廐厩
廏厩
廣広
廚厨
廢廃
廳庁
廰庁
廸迪
弃棄
弉奘
彜彝
彈弾
彌弥
弯彎
徃往
徑径
從従
徠来
悳徳
恠怪
恆恒
悧俐
惡悪
惠恵
忰悴
惱悩
愼慎
愽博
慘惨
慚慙
憇憩
應応
懷懐
懴懺
戀恋
戞戛
戰戦
戲戯
拔抜
拏拿
擔担
拜拝
拂払
挾挟
搜捜
插挿
搖揺
攝摂
攪撹
據拠
擇択
擧拳
舉拳
抬擡
擴拡
攜携
攵攴
攷考
收収
效効
敕勅
敍叙
敘叙
數数
變変
斷断
旙旛
昜陽
晄晃
晉晋
晝昼
晰晢
暎映
曉暁
暸瞭
昿曠
曵曳
朖朗
朞期
霸覇
杤栃
杰傑
枩松
檜桧
條条
檮梼
梹檳
棊棋
棧桟
棕椶
楙茂
榮栄
槨椁
樂楽
權権
樞枢
樣様
樓楼
橢楕
檢検
櫻桜
鬱欝
盜盗
飮飲
歐嘔
歡歓
歸帰
殘残
殱殲
殼殻
毆殴
毓育
氣気
沒没
泪涙
濤涛
渕淵
渊淵
淨浄
淺浅
滿満
溂剌
溪渓
灌潅
滯滞
澁渋
澀渋
潛潜
濳潜
澂澄
澑溜
澤沢
濟済
濕湿
濱浜
濾滬
灣湾
烱炯
烟煙
熈煕
熏燻
燒焼
爐炉
爭争
爲為
爼俎
犁犂
犹猶
犲豺
狹狭
獎奨
默黙
獨独
獸獣
獵猟
獻献
珎珍
璢瑠
瑯琅
珱瓔
瓣弁
甞嘗
甼町
畄留
畍界
畊耕
畆畝
畧略
畫画
當当
畴疇
疊畳
疉畳
疂畳
癡痴
發発
皃猊
皈帰
皹皸
盖蓋
盡尽
蘯盪
眞真
眦眥
礦鉱
礪砺
碎砕
碯瑙
祕秘
祿禄
齋斎
禪禅
禮礼
禀稟
稱称
稻稲
稾稿
穗穂
穩穏
龝穐
穰穣
窗窓
竈竃
窰窯
竊窃
竝並
筺筐
笋筍
箟箘
筝箏
簔蓑
籠篭
籘籐
籖籤
粹粋
糺糾
絲糸
經経
總総
緜綿
縣県
縱縦
繪絵
繩縄
繼継
緕纃
續続
纖繊
纎繊
纜繿
缺欠
罐缶
罸罰
羃冪
羣群
羮羹
譱善
翆翠
翦剪
耻恥
聟婿
聨聯
聲声
聰聡
聽聴
肅粛
冐冒
脉脈
腦脳
腟膣
膓腸
膸髄
膽胆
臈臘
臟臓
臺台
與与
舊旧
舍舎
舖舗
舩船
艢檣
舮艫
艷艶
莖茎
莊荘
莵兎
菷帚
萠萌
蕚萼
蒂蔕
萬万
葢蓋
蘂蕊
蕋蕊
藪薮
藏蔵
藝芸
藥薬
蘓蘇
乕虎
號号
蠣蛎
蝨虱
蠅蝿
螢蛍
蟆蟇
蟲虫
蠏蟹
蟷螳
蟒蠎
蠶蚕
蠧蠹
蠻蛮
衂衄
衞衛
袵衽
裝装
襃褒
褝襌
覩睹
覺覚
覽覧
觀観
觧解
觸触
誡戒
謌歌
諡謚
謠謡
證証
譛譖
譯訳
譽誉
讀読
讓譲
讚賛
豐豊
貉狢
貍狸
貎猊
豼貔
貘獏
戝財
貭質
貳弐
貮弐
賤賎
賣売
贊賛
賍贓
赱走
踈疎
踴踊
躰体
軆体
軈軅
軣轟
輕軽
輙輒
輌輛
轉転
辭辞
辯弁
迯逃
逹達
逎遒
遞逓
遲遅
邊辺
邉辺
邨村
鄰隣
醉酔
醫医
釀醸
釋釈
釡釜
釼剣
銕鉄
錢銭
鎭鎮
鐵鉄
鐡鉄
鑒鑑
鑄鋳
鑛鉱
鈩鑪
鑚鑽
閇閉
濶闊
關関
阯址
陷陥
險険
隱隠
隸隷
襍雑
雜雑
靈霊
靜静
靱靭
韭韮
韲齏
韵韻
顏顔
顯顕
飃飄
餘余
餝飾
餠餅
騷騒
驅駆
驛駅
驗験
髓髄
體体
髮髪
鬪闘
鰺鯵
鰛鰮
鳬鳧
鳫鴈
鵄鴟
鵞鵝
鷄鶏
鷏鷆
鹽塩
麥麦
麸麩
麪麺
點点
黨党
皷鼓
鼡鼠
齊斉
齒歯
齡齢
龜亀
槇槙
遙遥
瑤瑶
凜凛
熙煕

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
# j2h.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original Copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
import re
class J2H (object):
kanwa = None
cl_table = [
"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
"k", "", "", "", "", "", "", "", "", ""]
def __init__(self):
self.kanwa = jisyo()
def isKanji(self, c):
return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
def isCletter(self, l, c):
if (ord(u"") <= ord(c) and ord(c) <= 0x309f) and ( l in self.cl_table[ord(c) - ord(u"")-1]):
return True
return False
def itaiji_conv(self, text):
r = []
for c in text:
if c in self.kanwa.itaijidict:
r.append(c)
for c in r:
text = re.sub(c, self.kanwa.itaijidict[c], text)
return text
def convert(self, text):
max_len = 0
Hstr = ""
table = self.kanwa.load_jisyo(text[0])
if table is None:
return ("", 0)
for (k,v) in table.iteritems():
length = len(k)
if len(text) >= length:
if text.startswith(k):
for (yomi, tail) in v:
if tail is '':
if max_len < length:
Hstr = yomi
max_len = length
elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
Hstr=''.join([yomi,text[length]])
max_len = length+1
return (Hstr, max_len)

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# jisyo.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
from cPickle import load
import anydbm,marshal
from zlib import decompress
import os
import calibre.utils.resources as resources
class jisyo (object):
kanwadict = None
itaijidict = None
kanadict = None
jisyo_table = {}
# this class is Borg
_shared_state = {}
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
return self
def __init__(self):
if self.kanwadict is None:
dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
self.kanwadict = anydbm.open(dictpath,'r')
if self.itaijidict is None:
itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
itaiji_pkl = open(itaijipath, 'rb')
self.itaijidict = load(itaiji_pkl)
if self.kanadict is None:
kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
kanadict_pkl = open(kanadictpath, 'rb')
self.kanadict = load(kanadict_pkl)
def load_jisyo(self, char):
try:#python2
key = "%04x"%ord(unicode(char))
except:#python3
key = "%04x"%ord(char)
try: #already exist?
table = self.jisyo_table[key]
except:
try:
table = self.jisyo_table[key] = marshal.loads(decompress(self.kanwadict[key]))
except:
return None
return table

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
# k2a.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
class K2a (object):
kanwa = None
def __init__(self):
self.kanwa = jisyo()
def isKatakana(self, char):
return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
def convert(self, text):
Hstr = ""
max_len = -1
r = min(10, len(text)+1)
for x in xrange(r):
if text[:x] in self.kanwa.kanadict:
if max_len < x:
max_len = x
Hstr = self.kanwa.kanadict[text[:x]]
return (Hstr, max_len)

View File

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
# kakasi.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original Copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with KAKASI, see the file COPYING. If not, write to the Free
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
# * 02111-1307, USA.
# */
from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
class kakasi(object):
j2h = None
h2a = None
k2a = None
def __init__(self):
self.j2h = J2H()
self.h2a = H2a()
self.k2a = K2a()
def do(self, text):
otext = ''
i = 0
while True:
if i >= len(text):
break
if self.j2h.isKanji(text[i]):
(t, l) = self.j2h.convert(text[i:])
if l <= 0:
otext = otext + text[i]
i = i + 1
continue
i = i + l
m = 0
tmptext = ""
while True:
if m >= len(t):
break
(s, n) = self.h2a.convert(t[m:])
if n <= 0:
break
m = m + n
tmptext = tmptext+s
if i >= len(text):
otext = otext + tmptext.capitalize()
else:
otext = otext + tmptext.capitalize() +' '
elif self.h2a.isHiragana(text[i]):
tmptext = ''
while True:
(t, l) = self.h2a.convert(text[i:])
tmptext = tmptext+t
i = i + l
if i >= len(text):
otext = otext + tmptext
break
elif not self.h2a.isHiragana(text[i]):
otext = otext + tmptext + ' '
break
elif self.k2a.isKatakana(text[i]):
tmptext = ''
while True:
(t, l) = self.k2a.convert(text[i:])
tmptext = tmptext+t
i = i + l
if i >= len(text):
otext = otext + tmptext
break
elif not self.k2a.isKatakana(text[i]):
otext = otext + tmptext + ' '
break
else:
otext = otext + text[i]
i += 1
return otext

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,317 @@
;; Kana-Alphabet mapping dictionary
;;
;; To use this mapping table,
;; you should unicode normalize NKFC form.
;;
;; basic mapping
;;
a ァ
a ア
ba バ
bba ッバ
bbe ッベ
bbi ッビ
bbo ッボ
bbu ッブ
bbya ッビャ
bbyo ッビョ
bbyu ッビュ
be ベ
bi ビ
bo ボ
bu ブ
bya ビャ
byo ビョ
byu ビュ
cha チャ
che チェ
chi チ
cho チョ
chu チュ
da ダ
dda ッダ
dde ッデ
ddo ッド
de デ
di ディ
do ド
e ェ
e エ
e ヱ
fa ファ
fe フェ
ffa ッファ
ffe ッフェ
ffi ッフィ
ffo ッフォ
ffu ッフ
fi フィ
fo フォ
fu フ
ga ガ
ge ゲ
gga ッガ
gge ッゲ
ggi ッギ
ggo ッゴ
ggu ッグ
ggya ッギャ
ggyo ッギョ
ggyu ッギュ
gi ギ
go ゴ
gu グ
gya グャ
gyo ギョ
gyu ギゥ
ha ハ
he ヘ
hha ッハ
hhe ッヘ
hhi ッヒ
hho ッホ
hhya ッヒャ
hhyo ッヒョ
hhyu ッヒュ
hi ヒ
ho ホ
hya ヒャ
hyo ヒョ
hyu ヒュ
i ィ
i イ
i ヰ
ja ジャ
ja ヂャ
ji ジ
ji ヂ
jja ッジャ
jji ッジ
jji ッヂ
jjo ッジョ
jju ッジュ
jjya ッヂャ
jjyo ッヂョ
jjyu ッヂュ
jo ジョ
jo ヂョ
ju ジュ
ju ヂュ
ka カ
ka ヵ
ke ケ
ke ヶ
ki キ
kka ッカ
kke ッケ
kki ッキ
kko ッコ
kku ック
kkya ッキャ
kkyo ッキョ
kkyu ッキュ
ko コ
ku ク
kya キァ
kyo キォ
kyu キゥ
ma マ
me メ
mi ミ
mo モ
mu ム
mya ミャ
myo ミョ
myu ミュ
n ン
n'a ンア
n'e ンエ
n'i ンイ
n'o ンオ
n'u ンウ
na ナ
ne ネ
ni ニ
no
nu ヌ
nya ニャ
nyo ニョ
nyu ニュ
o ォ
o オ
pa パ
pe ペ
pi ピ
po ポ
ppa ッパ
ppe ッペ
ppi ッピ
ppo ッポ
ppu ップ
ppya ッピャ
ppyo ッピョ
ppyu ッピュ
pu プ
pya ピャ
pyo ピョ
pyu ピュ
ra ラ
re レ
ri リ
ro ロ
rra ッラ
rre ッレ
rri ッリ
rro ッロ
rru ッル
rrya ッリャ
rryo ッリョ
rryu ッリュ
ru ル
rya リャ
ryo リョ
ryu リュ
sa サ
se セ
sha シャ
shi シ
sho ショ
shu シュ
so ソ
ssa ッサ
sse ッセ
ssha ッシャ
sshi ッシ
ssho ッショ
sshu ッシュ
sso ッソ
ssu ッス
su ス
ta タ
tcha ッチャ
tchi ッチ
tcho ッチョ
tchu ッチュ
te テ
to ト
tsu ッ
tsu ツ
tta ッタ
tte ッテ
tto ット
ttsu ッツ
u ゥ
u ウ
va ヴァ
ve ヴェ
vi ヴィ
vo ヴォ
vu ヴ
vva ッヴァ
vve ッヴェ
vvi ッヴィ
vvo ッヴォ
vvu ッヴ
wa ヮ
wa ワ
wo ヲ
ya ャ
ya ヤ
yo ョ
yo ヨ
yu ュ
yu ユ
yya ッヤ
yyo ッヨ
yyu ッユ
za ザ
ze ゼ
zo ゾ
zu ズ
zu ヅ
zza ッザ
zzo ッゾ
zzu ッズ
zzu ッヅ
;;
;; extended characters
;;
;;
;; gairai terms
;;
all オール
algrism アルゴリズム
answer アンサー
base ベース
begineer ビギナー
connection コネクション
contents コンテンツ
creator クリエーター
comic コミック
comics コミックス
culture カルチャー
debug デバッグ
debugging デバッギング
design デザイン
digital デジタル
dillenma ジレンマ
directory ディレクトリ
disk ディスク
document ドキュメント
download ダウンロード
electric エレクトリック
facebook フェイスブック
firefox ファイアーフォックス
folder フォルダ
format フォーマット
forum フォーラム
fox フォックス
free フリー
gnome ノーム
gnu グヌー
gozilla ゴジラ
guide ガイド
harvard ハーバード
help ヘルプ
highlight ハイライト
japan ジャパン
journal ジャーナル
library ライブラリ
line ライン
love ラヴ
love ラブ
mail メール
main メイン
mystery ミステリ
mozilla モジラ
network ネットワーク
next ネクスト
new ニュー
news ニュース
native ネイティブ
online オンライン
open オープン
professional プロフェッショナル
profile プロファイル
programmer プログラマ
sample サンプル
series シリーズ
share シェア
social ソーシャル
society ソサエティ
software ソフトウエア
source ソース
street ストリート
system システム
tag タグ
text テキスト
thunderbird サンダーバード
training トレーニング
twitter ツイッター
unicode ユニコード
wall ウオール
wall ウォール
welcome ウェルカム
welcome ウエルカム
wikinomics ウィキノミクス
york ヨーク

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
''' '''
Decode unicode text to an ASCII representation of the text. Transliterate Decode unicode text to an ASCII representation of the text in Chinese.
unicode characters to ASCII. Transliterate unicode characters to ASCII based on chinese pronounce.
Derived from John Schember's unidecode library. Which was created
as part of calibre.
Copyright(c) 2009, John Schember <john@nachtimwald.com>
Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
is based on the perl module Text::Unidecode is based on the perl module Text::Unidecode
@ -55,29 +60,20 @@ it under the same terms as Perl itself.
''' '''
import re import re
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
from calibre.constants import preferred_encoding
class Unidecoder(object): class Unidecoder(object):
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(HANCODES)
def decode(self, text): def decode(self, text):
'''
Tranliterate the string from unicode characters to ASCII.
'''
# The keys for CODEPOINTS is unicode characters, we want to be sure the
# input text is unicode.
if not isinstance(text, unicode):
try:
text = unicode(text)
except:
try:
text = text.decode(preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
# Replace characters larger than 127 with their ASCII equivelent. # Replace characters larger than 127 with their ASCII equivelent.
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()), return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)
text)
def replace_point(self, codepoint): def replace_point(self, codepoint):
''' '''
@ -87,7 +83,7 @@ class Unidecoder(object):
# Split the unicode character xABCD into parts 0xAB and 0xCD. # Split the unicode character xABCD into parts 0xAB and 0xCD.
# 0xAB represents the group within CODEPOINTS to query and 0xCD # 0xAB represents the group within CODEPOINTS to query and 0xCD
# represents the position in the list of characters for the group. # represents the position in the list of characters for the group.
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point( return self.codepoints[self.code_group(codepoint)][self.grouped_point(
codepoint)] codepoint)]
except: except:
return '?' return '?'
@ -97,12 +93,18 @@ class Unidecoder(object):
Find what group character is a part of. Find what group character is a part of.
''' '''
# Code groups withing CODEPOINTS take the form 'xAB' # Code groups withing CODEPOINTS take the form 'xAB'
return u'x%02x' % (ord(unicode(character)) >> 8) try:#python2
return 'x%02x' % (ord(unicode(character)) >> 8)
except:
return 'x%02x' % (ord(character) >> 8)
def grouped_point(self, character): def grouped_point(self, character):
''' '''
Return the location the replacement character is in the list for a Return the location the replacement character is in the list for a
the group character is a part of. the group character is a part of.
''' '''
return ord(unicode(character)) & 255 try:#python2
return ord(unicode(character)) & 255
except:
return ord(character) & 255

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text in Vietnamese.
'''
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
class Vndecoder(Unidecoder):
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(HANCODES)

File diff suppressed because it is too large Load Diff

View File

@ -271,11 +271,6 @@ class DeleteAction(InterfaceAction):
partial(self.library_ids_deleted, current_row=row)) partial(self.library_ids_deleted, current_row=row))
# Device view is visible. # Device view is visible.
else: else:
if not confirm('<p>'+_('The selected books will be '
'<b>permanently deleted</b> '
'from your device. Are you sure?')
+'</p>', 'device_delete_books', self.gui):
return
if self.gui.stack.currentIndex() == 1: if self.gui.stack.currentIndex() == 1:
view = self.gui.memory_view view = self.gui.memory_view
elif self.gui.stack.currentIndex() == 2: elif self.gui.stack.currentIndex() == 2:
@ -283,8 +278,14 @@ class DeleteAction(InterfaceAction):
else: else:
view = self.gui.card_b_view view = self.gui.card_b_view
paths = view.model().paths(rows) paths = view.model().paths(rows)
ids = view.model().indices(rows)
if not confirm('<p>'+_('The selected books will be '
'<b>permanently deleted</b> '
'from your device. Are you sure?')
+'</p>', 'device_delete_books', self.gui):
return
job = self.gui.remove_paths(paths) job = self.gui.remove_paths(paths)
self.delete_memory[job] = (paths, view.model()) self.delete_memory[job] = (paths, view.model())
view.model().mark_for_deletion(job, rows) view.model().mark_for_deletion(job, ids, rows_are_ids=True)
self.gui.status_bar.show_message(_('Deleting books from device.'), 1000) self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)

View File

@ -213,6 +213,8 @@ class CheckLibraryDialog(QDialog):
self.log = QTreeWidget(self) self.log = QTreeWidget(self)
self.log.itemChanged.connect(self.item_changed) self.log.itemChanged.connect(self.item_changed)
self.log.itemExpanded.connect(self.item_expanded_or_collapsed)
self.log.itemCollapsed.connect(self.item_expanded_or_collapsed)
self._layout.addWidget(self.log) self._layout.addWidget(self.log)
self.check_button = QPushButton(_('&Run the check again')) self.check_button = QPushButton(_('&Run the check again'))
@ -333,11 +335,15 @@ class CheckLibraryDialog(QDialog):
for check in CHECKS: for check in CHECKS:
builder(t, checker, check) builder(t, checker, check)
t.setColumnWidth(0, 200) t.resizeColumnToContents(0)
t.setColumnWidth(1, 400) t.resizeColumnToContents(1)
self.delete_button.setEnabled(False) self.delete_button.setEnabled(False)
self.text_results = '\n'.join(plaintext) self.text_results = '\n'.join(plaintext)
def item_expanded_or_collapsed(self, item):
self.log.resizeColumnToContents(0)
self.log.resizeColumnToContents(1)
def item_changed(self, item, column): def item_changed(self, item, column):
self.fix_button.setEnabled(False) self.fix_button.setEnabled(False)
for it in self.top_level_items.values(): for it in self.top_level_items.values():

View File

@ -120,11 +120,10 @@ class BooksModel(QAbstractTableModel): # {{{
def set_device_connected(self, is_connected): def set_device_connected(self, is_connected):
self.device_connected = is_connected self.device_connected = is_connected
self.refresh_ondevice()
def refresh_ondevice(self): def refresh_ondevice(self):
self.db.refresh_ondevice() self.db.refresh_ondevice()
self.refresh() # does a resort() self.resort()
self.research() self.research()
def set_book_on_device_func(self, func): def set_book_on_device_func(self, func):
@ -826,7 +825,7 @@ class BooksModel(QAbstractTableModel): # {{{
return False return False
val = int(value.toInt()[0]) if column == 'rating' else \ val = int(value.toInt()[0]) if column == 'rating' else \
value.toDate() if column in ('timestamp', 'pubdate') else \ value.toDate() if column in ('timestamp', 'pubdate') else \
unicode(value.toString()) unicode(value.toString()).strip()
id = self.db.id(row) id = self.db.id(row)
books_to_refresh = set([id]) books_to_refresh = set([id])
if column == 'rating': if column == 'rating':

View File

@ -551,8 +551,10 @@ class BooksView(QTableView): # {{{
return mods & Qt.ControlModifier or mods & Qt.ShiftModifier return mods & Qt.ControlModifier or mods & Qt.ShiftModifier
def mousePressEvent(self, event): def mousePressEvent(self, event):
if event.button() == Qt.LeftButton and not self.event_has_mods(): ep = event.pos()
self.drag_start_pos = event.pos() if self.indexAt(ep) in self.selectionModel().selectedIndexes() and \
event.button() == Qt.LeftButton and not self.event_has_mods():
self.drag_start_pos = ep
return QTableView.mousePressEvent(self, event) return QTableView.mousePressEvent(self, event)
def mouseMoveEvent(self, event): def mouseMoveEvent(self, event):

View File

@ -10,13 +10,15 @@ from PyQt4.Qt import QApplication
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \ from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
CommaSeparatedList CommaSeparatedList
from calibre.gui2.preferences.search_ui import Ui_Form from calibre.gui2.preferences.search_ui import Ui_Form
from calibre.gui2 import config from calibre.gui2 import config, error_dialog
from calibre.utils.config import prefs from calibre.utils.config import prefs
class ConfigWidget(ConfigWidgetBase, Ui_Form): class ConfigWidget(ConfigWidgetBase, Ui_Form):
def genesis(self, gui): def genesis(self, gui):
self.gui = gui self.gui = gui
db = gui.library_view.model().db
self.db = db
r = self.register r = self.register
@ -24,11 +26,153 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('highlight_search_matches', config) r('highlight_search_matches', config)
r('limit_search_columns', prefs) r('limit_search_columns', prefs)
r('limit_search_columns_to', prefs, setting=CommaSeparatedList) r('limit_search_columns_to', prefs, setting=CommaSeparatedList)
fl = gui.library_view.model().db.field_metadata.get_search_terms() fl = db.field_metadata.get_search_terms()
self.opt_limit_search_columns_to.update_items_cache(fl) self.opt_limit_search_columns_to.update_items_cache(fl)
self.clear_history_button.clicked.connect(self.clear_histories) self.clear_history_button.clicked.connect(self.clear_histories)
self.gst_explanation.setText('<p>' + _(
"<b>Grouped search terms</b> are search names that permit a query to automatically "
"search across more than one column. For example, if you create a grouped "
"search term <code>allseries</code> with the value "
"<code>series, #myseries, #myseries2</code>, then "
"the query <code>allseries:adhoc</code> will find 'adhoc' in any of the "
"columns <code>series</code>, <code>#myseries</code>, and "
"<code>#myseries2</code>.<p> Enter the name of the "
"grouped search term in the drop-down box, enter the list of columns "
"to search in the value box, then push the Save button. "
"<p>Note: Search terms are forced to lower case; <code>MySearch</code> "
"and <code>mysearch</code> are the same term."
"<p>You can have your grouped search term show up as user categories in "
" the Tag Browser. Just add the grouped search term names to the Make user "
"categories from box. You can add multiple terms separated by commas. "
"The new user category will be automatically "
"populated with all the items in the categories included in the grouped "
"search term. <p>Automatic user categories permit you to see easily "
"all the category items that "
"are in the columns contained in the grouped search term. Using the above "
"<code>allseries</code> example, the automatically-generated user category "
"will contain all the series mentioned in <code>series</code>, "
"<code>#myseries</code>, and <code>#myseries2</code>. This "
"can be useful to check for duplicates, to find which column contains "
"a particular item, or to have hierarchical categories (categories "
"that contain categories)."))
self.gst = db.prefs.get('grouped_search_terms', {})
self.orig_gst_keys = self.gst.keys()
fl = []
for f in db.all_field_keys():
fm = db.metadata_for_field(f)
if not fm['search_terms']:
continue
if not fm['is_category']:
continue
fl.append(f)
self.gst_value.update_items_cache(fl)
self.fill_gst_box(select=None)
self.gst_delete_button.setEnabled(False)
self.gst_save_button.setEnabled(False)
self.gst_names.currentIndexChanged[int].connect(self.gst_index_changed)
self.gst_names.editTextChanged.connect(self.gst_text_changed)
self.gst_value.textChanged.connect(self.gst_text_changed)
self.gst_save_button.clicked.connect(self.gst_save_clicked)
self.gst_delete_button.clicked.connect(self.gst_delete_clicked)
self.gst_changed = False
if db.prefs.get('grouped_search_make_user_categories', None) is None:
db.prefs.set('grouped_search_make_user_categories', [])
r('grouped_search_make_user_categories', db.prefs, setting=CommaSeparatedList)
self.muc_changed = False
self.opt_grouped_search_make_user_categories.editingFinished.connect(
self.muc_box_changed)
def muc_box_changed(self):
self.muc_changed = True
def gst_save_clicked(self):
idx = self.gst_names.currentIndex()
name = icu_lower(unicode(self.gst_names.currentText()))
if not name:
return error_dialog(self.gui, _('Grouped Search Terms'),
_('The search term cannot be blank'),
show=True)
if idx != 0:
orig_name = unicode(self.gst_names.itemData(idx).toString())
else:
orig_name = ''
if name != orig_name:
if name in self.db.field_metadata.get_search_terms() and \
name not in self.orig_gst_keys:
return error_dialog(self.gui, _('Grouped Search Terms'),
_('That name is already used for a column or grouped search term'),
show=True)
if name in [icu_lower(p) for p in self.db.prefs.get('user_categories', {})]:
return error_dialog(self.gui, _('Grouped Search Terms'),
_('That name is already used for user category'),
show=True)
val = [v.strip() for v in unicode(self.gst_value.text()).split(',') if v.strip()]
if not val:
return error_dialog(self.gui, _('Grouped Search Terms'),
_('The value box cannot be empty'), show=True)
if orig_name and name != orig_name:
del self.gst[orig_name]
self.gst_changed = True
self.gst[name] = val
self.fill_gst_box(select=name)
self.changed_signal.emit()
def gst_delete_clicked(self):
if self.gst_names.currentIndex() == 0:
return error_dialog(self.gui, _('Grouped Search Terms'),
_('The empty grouped search term cannot be deleted'), show=True)
name = unicode(self.gst_names.currentText())
if name in self.gst:
del self.gst[name]
self.fill_gst_box(select='')
self.changed_signal.emit()
self.gst_changed = True
def fill_gst_box(self, select=None):
terms = sorted(self.gst.keys())
self.opt_grouped_search_make_user_categories.update_items_cache(terms)
self.gst_names.blockSignals(True)
self.gst_names.clear()
self.gst_names.addItem('', '')
for t in terms:
self.gst_names.addItem(t, t)
self.gst_names.blockSignals(False)
if select is not None:
if select == '':
self.gst_index_changed(0)
elif select in terms:
self.gst_names.setCurrentIndex(self.gst_names.findText(select))
def gst_text_changed(self):
self.gst_delete_button.setEnabled(False)
self.gst_save_button.setEnabled(True)
def gst_index_changed(self, idx):
self.gst_delete_button.setEnabled(idx != 0)
self.gst_save_button.setEnabled(False)
self.gst_value.blockSignals(True)
if idx == 0:
self.gst_value.setText('')
else:
name = unicode(self.gst_names.itemData(idx).toString())
self.gst_value.setText(','.join(self.gst[name]))
self.gst_value.blockSignals(False)
def commit(self):
if self.gst_changed:
self.db.prefs.set('grouped_search_terms', self.gst)
self.db.field_metadata.add_grouped_search_terms(self.gst)
return ConfigWidgetBase.commit(self)
def refresh_gui(self, gui): def refresh_gui(self, gui):
if self.muc_changed:
gui.tags_view.set_new_model()
gui.search.search_as_you_type(config['search_as_you_type']) gui.search.search_as_you_type(config['search_as_you_type'])
gui.library_view.model().set_highlight_only(config['highlight_search_matches']) gui.library_view.model().set_highlight_only(config['highlight_search_matches'])
gui.search.do_search() gui.search.do_search()

View File

@ -7,7 +7,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>670</width> <width>670</width>
<height>392</height> <height>556</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -77,19 +77,6 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item row="4" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="3" column="0"> <item row="3" column="0">
<widget class="QPushButton" name="clear_history_button"> <widget class="QPushButton" name="clear_history_button">
<property name="toolTip"> <property name="toolTip">
@ -100,6 +87,120 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0">
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>Grouped Search Terms</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="0">
<layout class="QHBoxLayout" name="l12">
<item>
<widget class="QLabel" name="la10">
<property name="text">
<string>&amp;Names:</string>
</property>
<property name="buddy">
<cstring>gst_names</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="gst_names">
<property name="editable">
<bool>true</bool>
</property>
<property name="minimumContentsLength">
<number>10</number>
</property>
<property name="toolTip">
<string>Contains the names of the currently-defined group search terms.
Create a new name by entering it into the empty box, then
pressing Save. Rename a search term by selecting it then
changing the name and pressing Save. Change the value of
a search term by changing the value box then pressing Save.</string>
</property>
</widget>
</item>
<item>
<widget class="QToolButton" name="gst_delete_button">
<property name="toolTip">
<string>Delete the current search term</string>
</property>
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
</property>
</widget>
</item>
<item>
<widget class="MultiCompleteLineEdit" name="gst_value"/>
</item>
<item>
<widget class="QToolButton" name="gst_save_button">
<property name="toolTip">
<string>Save the current search term. You can rename a search term by
changing the name then pressing Save. You can change the value
of a search term by changing the value box then pressing Save.</string>
</property>
<property name="text">
<string>&amp;Save</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="1" rowspan="3">
<widget class="QTextBrowser" name="gst_explanation">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>100</verstretch>
</sizepolicy>
</property>
</widget>
</item>
<item row="1" column="0">
<layout class="QHBoxLayout">
<item>
<widget class="QLabel" name="l11">
<property name="text">
<string>Make &amp;user categories from:</string>
</property>
<property name="buddy">
<cstring>opt_grouped_search_make_user_categories</cstring>
</property>
</widget>
</item>
<item>
<widget class="MultiCompleteLineEdit" name="opt_grouped_search_make_user_categories">
<property name="toolTip">
<string>Enter the names of any grouped search terms you wish
to be shown as user categories</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<customwidgets> <customwidgets>
@ -109,6 +210,8 @@
<header>calibre/gui2/complete.h</header> <header>calibre/gui2/complete.h</header>
</customwidget> </customwidget>
</customwidgets> </customwidgets>
<resources/> <resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections/> <connections/>
</ui> </ui>

View File

@ -466,10 +466,7 @@ class TagTreeItem(object): # {{{
icon_map[0] = data.icon icon_map[0] = data.icon
self.tag, self.icon_state_map = data, list(map(QVariant, icon_map)) self.tag, self.icon_state_map = data, list(map(QVariant, icon_map))
if tooltip: if tooltip:
if tooltip.endswith(':'): self.tooltip = tooltip + ' '
self.tooltip = tooltip + ' '
else:
self.tooltip = tooltip + ': '
else: else:
self.tooltip = '' self.tooltip = ''
@ -589,11 +586,17 @@ class TagsModel(QAbstractItemModel): # {{{
# get_node_tree cannot return None here, because row_map is empty # get_node_tree cannot return None here, because row_map is empty
data = self.get_node_tree(config['sort_tags_by']) data = self.get_node_tree(config['sort_tags_by'])
gst = db.prefs.get('grouped_search_terms', {})
self.root_item = TagTreeItem() self.root_item = TagTreeItem()
for i, r in enumerate(self.row_map): for i, r in enumerate(self.row_map):
if self.hidden_categories and self.categories[i] in self.hidden_categories: if self.hidden_categories and self.categories[i] in self.hidden_categories:
continue continue
tt = _(u'The lookup/search name is "{0}"').format(r) if r.startswith('@') and r[1:] in gst:
tt = _(u'The grouped search term name is "{0}"').format(r[1:])
elif r == 'news':
tt = ''
else:
tt = _(u'The lookup/search name is "{0}"').format(r)
TagTreeItem(parent=self.root_item, TagTreeItem(parent=self.root_item,
data=self.categories[i], data=self.categories[i],
category_icon=self.category_icon_map[r], category_icon=self.category_icon_map[r],
@ -735,6 +738,14 @@ class TagsModel(QAbstractItemModel): # {{{
self.row_map = [] self.row_map = []
self.categories = [] self.categories = []
# Get the categories
if self.search_restriction:
data = self.db.get_categories(sort=sort,
icon_map=self.category_icon_map,
ids=self.db.search('', return_matches=True))
else:
data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map)
# Reconstruct the user categories, putting them into metadata # Reconstruct the user categories, putting them into metadata
self.db.field_metadata.remove_dynamic_categories() self.db.field_metadata.remove_dynamic_categories()
tb_cats = self.db.field_metadata tb_cats = self.db.field_metadata
@ -746,17 +757,16 @@ class TagsModel(QAbstractItemModel): # {{{
except ValueError: except ValueError:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
for cat in sorted(self.db.prefs.get('grouped_search_terms', {}),
key=sort_key):
if (u'@' + cat) in data:
tb_cats.add_user_category(label=u'@' + cat, name=cat)
self.db.data.change_search_locations(self.db.field_metadata.get_search_terms())
if len(saved_searches().names()): if len(saved_searches().names()):
tb_cats.add_search_category(label='search', name=_('Searches')) tb_cats.add_search_category(label='search', name=_('Searches'))
# Now get the categories
if self.search_restriction:
data = self.db.get_categories(sort=sort,
icon_map=self.category_icon_map,
ids=self.db.search('', return_matches=True))
else:
data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map)
if self.filter_categories_by: if self.filter_categories_by:
for category in data.keys(): for category in data.keys():
data[category] = [t for t in data[category] data[category] = [t for t in data[category]
@ -767,6 +777,7 @@ class TagsModel(QAbstractItemModel): # {{{
if category in data: # The search category can come and go if category in data: # The search category can come and go
self.row_map.append(category) self.row_map.append(category)
self.categories.append(tb_categories[category]['name']) self.categories.append(tb_categories[category]['name'])
if len(old_row_map) != 0 and len(old_row_map) != len(self.row_map): if len(old_row_map) != 0 and len(old_row_map) != len(self.row_map):
# A category has been added or removed. We must force a rebuild of # A category has been added or removed. We must force a rebuild of
# the model # the model
@ -822,6 +833,7 @@ class TagsModel(QAbstractItemModel): # {{{
not self.db.field_metadata[r]['is_custom'] and \ not self.db.field_metadata[r]['is_custom'] and \
not self.db.field_metadata[r]['kind'] == 'user' \ not self.db.field_metadata[r]['kind'] == 'user' \
else False else False
tt = r if self.db.field_metadata[r]['kind'] == 'user' else None
for idx,tag in enumerate(data[r]): for idx,tag in enumerate(data[r]):
if clear_rating: if clear_rating:
tag.avg_rating = None tag.avg_rating = None
@ -861,10 +873,10 @@ class TagsModel(QAbstractItemModel): # {{{
category_icon = category_node.icon, category_icon = category_node.icon,
tooltip = None, tooltip = None,
category_key=category_node.category_key) category_key=category_node.category_key)
t = TagTreeItem(parent=sub_cat, data=tag, tooltip=r, t = TagTreeItem(parent=sub_cat, data=tag, tooltip=tt,
icon_map=self.icon_state_map) icon_map=self.icon_state_map)
else: else:
t = TagTreeItem(parent=category, data=tag, tooltip=r, t = TagTreeItem(parent=category, data=tag, tooltip=tt,
icon_map=self.icon_state_map) icon_map=self.icon_state_map)
self.endInsertRows() self.endInsertRows()
return True return True

View File

@ -433,6 +433,10 @@ class ResultCache(SearchQueryParser): # {{{
if len(candidates) == 0: if len(candidates) == 0:
return matches return matches
if len(location) > 2 and location.startswith('@') and \
location[1:] in self.db_prefs['grouped_search_terms']:
location = location[1:]
if query and query.strip(): if query and query.strip():
# get metadata key associated with the search term. Eliminates # get metadata key associated with the search term. Eliminates
# dealing with plurals and other aliases # dealing with plurals and other aliases
@ -440,9 +444,16 @@ class ResultCache(SearchQueryParser): # {{{
# grouped search terms # grouped search terms
if isinstance(location, list): if isinstance(location, list):
if allow_recursion: if allow_recursion:
if query.lower() == 'false':
invert = True
query = 'true'
else:
invert = False
for loc in location: for loc in location:
matches |= self.get_matches(loc, query, matches |= self.get_matches(loc, query,
candidates=candidates, allow_recursion=False) candidates=candidates, allow_recursion=False)
if invert:
matches = self.universal_set() - matches
return matches return matches
raise ParseException(query, len(query), 'Recursive query group detected', self) raise ParseException(query, len(query), 'Recursive query group detected', self)

View File

@ -1481,23 +1481,36 @@ class EPUB_MOBI(CatalogPlugin):
current_author = authors[0] current_author = authors[0]
for (i,author) in enumerate(authors): for (i,author) in enumerate(authors):
if author != current_author and i: if author != current_author and i:
# Exit if author matches previous, but author_sort doesn't match
if author[0] == current_author[0]: if author[0] == current_author[0]:
error_msg = _(''' if self.opts.fmt == 'mobi':
Inconsistent Author Sort values for Author '{0}': # Exit if building MOBI
'{1}' <> '{2}', error_msg = _(
unable to build catalog.\n '''Inconsistent Author Sort values for
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, Author '{0}':
then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) '{1}' <> '{2}'
self.opts.log.warn('\n*** Metadata error ***') Unable to build MOBI catalog.\n
self.opts.log.warn(error_msg) Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
self.opts.log.warn('\n*** Metadata error ***')
self.opts.log.warn(error_msg)
self.error.append('Author Sort mismatch')
self.error.append(error_msg)
return False
else:
# Warning if building non-MOBI
if not self.error:
self.error.append('Author Sort mismatch')
error_msg = _(
'''Warning: inconsistent Author Sort values for
Author '{0}':
'{1}' <> '{2}'\n''').format(author[0],author[1],current_author[1])
self.opts.log.warn('\n*** Metadata warning ***')
self.opts.log.warn(error_msg)
self.error.append(error_msg)
self.error.append('Metadata error')
self.error.append(error_msg)
return False
current_author = author current_author = author
self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort) self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort)
# Build the unique_authors set from existing data # Build the unique_authors set from existing data
@ -2135,7 +2148,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
if author_count == 1: if author_count == 1:
divOpeningTag.insert(dotc, pBookTag) divOpeningTag.insert(dotc, pBookTag)
dotc += 1 dotc += 1
else: elif divRunningTag:
divRunningTag.insert(drtc,pBookTag) divRunningTag.insert(drtc,pBookTag)
drtc += 1 drtc += 1

View File

@ -188,6 +188,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
migrate_preference('saved_searches', {}) migrate_preference('saved_searches', {})
set_saved_searches(self, 'saved_searches') set_saved_searches(self, 'saved_searches')
# migrate grouped_search_terms
if self.prefs.get('grouped_search_terms', None) is None:
try:
ogst = tweaks.get('grouped_search_terms', {})
ngst = {}
for t in ogst:
ngst[icu_lower(t)] = ogst[t]
self.prefs.set('grouped_search_terms', ngst)
except:
pass
# Rename any user categories with names that differ only in case # Rename any user categories with names that differ only in case
user_cats = self.prefs.get('user_categories', []) user_cats = self.prefs.get('user_categories', [])
catmap = {} catmap = {}
@ -349,12 +360,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if len(saved_searches().names()): if len(saved_searches().names()):
tb_cats.add_search_category(label='search', name=_('Searches')) tb_cats.add_search_category(label='search', name=_('Searches'))
gst = tweaks['grouped_search_terms'] self.field_metadata.add_grouped_search_terms(
for t in gst: self.prefs.get('grouped_search_terms', {}))
try:
self.field_metadata._add_search_terms_to_map(gst[t], [t])
except ValueError:
traceback.print_exc()
self.book_on_device_func = None self.book_on_device_func = None
self.data = ResultCache(self.FIELD_MAP, self.field_metadata, db_prefs=self.prefs) self.data = ResultCache(self.FIELD_MAP, self.field_metadata, db_prefs=self.prefs)
@ -1293,7 +1300,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
# icon_map is not None if get_categories is to store an icon and # icon_map is not None if get_categories is to store an icon and
# possibly a tooltip in the tag structure. # possibly a tooltip in the tag structure.
icon = None icon = None
tooltip = '' tooltip = '(' + category + ')'
label = tb_cats.key_to_label(category) label = tb_cats.key_to_label(category)
if icon_map: if icon_map:
if not tb_cats.is_custom_field(category): if not tb_cats.is_custom_field(category):
@ -1379,7 +1386,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
categories['formats'].sort(key = lambda x:x.name) categories['formats'].sort(key = lambda x:x.name)
#### Now do the user-defined categories. #### #### Now do the user-defined categories. ####
user_categories = self.prefs['user_categories'] user_categories = dict.copy(self.prefs['user_categories'])
# We want to use same node in the user category as in the source # We want to use same node in the user category as in the source
# category. To do that, we need to find the original Tag node. There is # category. To do that, we need to find the original Tag node. There is
@ -1390,6 +1397,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
for c in categories.keys(): for c in categories.keys():
taglist[c] = dict(map(lambda t:(t.name, t), categories[c])) taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
muc = self.prefs.get('grouped_search_make_user_categories', [])
gst = self.prefs.get('grouped_search_terms', {})
for c in gst:
if c not in muc:
continue
user_categories[c] = []
for sc in gst[c]:
if sc in categories.keys():
for t in categories[sc]:
user_categories[c].append([t.name, sc, 0])
for user_cat in sorted(user_categories.keys(), key=sort_key): for user_cat in sorted(user_categories.keys(), key=sort_key):
items = [] items = []
for (name,label,ign) in user_categories[user_cat]: for (name,label,ign) in user_categories[user_cat]:

View File

@ -3,7 +3,7 @@ Created on 25 May 2010
@author: charles @author: charles
''' '''
import copy import copy, traceback
from calibre.utils.ordered_dict import OrderedDict from calibre.utils.ordered_dict import OrderedDict
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
@ -488,6 +488,20 @@ class FieldMetadata(dict):
del self._search_term_map[k] del self._search_term_map[k]
del self._tb_cats[key] del self._tb_cats[key]
def _remove_grouped_search_terms(self):
to_remove = [v for v in self._search_term_map
if isinstance(self._search_term_map[v], list)]
for v in to_remove:
del self._search_term_map[v]
def add_grouped_search_terms(self, gst):
self._remove_grouped_search_terms()
for t in gst:
try:
self._add_search_terms_to_map(gst[t], [t])
except ValueError:
traceback.print_exc()
def cc_series_index_column_for(self, key): def cc_series_index_column_for(self, key):
return self._tb_cats[key]['rec_index'] + 1 return self._tb_cats[key]['rec_index'] + 1

File diff suppressed because it is too large Load Diff

View File

@ -6,12 +6,12 @@ meaning as possible.
import os import os
from math import ceil from math import ceil
from calibre.ebooks.unidecode.unidecoder import Unidecoder
from calibre import sanitize_file_name from calibre import sanitize_file_name
from calibre.constants import preferred_encoding, iswindows from calibre.constants import preferred_encoding, iswindows
udc = Unidecoder() from calibre.utils.localization import get_udc
def ascii_text(orig): def ascii_text(orig):
udc = get_udc()
try: try:
ascii = udc.decode(orig) ascii = udc.decode(orig)
except: except:

View File

@ -169,3 +169,13 @@ def set_qt_translator(translator):
return translator.load(p) return translator.load(p)
return False return False
_udc = None
def get_udc():
global _udc
if _udc is None:
from calibre.ebooks.unihandecode import Unihandecoder
_udc = Unihandecoder(lang=get_lang())
return _udc