mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
3d20aadbd3
27
COPYRIGHT
27
COPYRIGHT
@ -193,6 +193,33 @@ License: GPL-3
|
|||||||
The full text of the GPL is distributed as in
|
The full text of the GPL is distributed as in
|
||||||
/usr/share/common-licenses/GPL-3 on Debian systems.
|
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||||
|
|
||||||
|
Files: src/calibre/ebooks/unihandecode/pykakasi/*
|
||||||
|
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
|
Copyright: 1992, Hironobu Takahashi
|
||||||
|
License: GPL-2+
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL on Debian systems.
|
||||||
|
|
||||||
|
Files: resources/kanwadict2.db
|
||||||
|
Files: resources/itaijidict2.pickle
|
||||||
|
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
|
Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
|
||||||
|
Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
|
||||||
|
Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
|
||||||
|
Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
|
||||||
|
License: GPL-2+
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL on Debian systems.
|
||||||
|
|
||||||
|
Files: src/calibre/ebooks/unihandecode/*
|
||||||
|
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
|
Copyright: 2009, John Schember
|
||||||
|
Copyright: 2007, Russell Norris
|
||||||
|
Copyright: 2001, Sean M. Burke
|
||||||
|
License: GPL-3, Perl
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||||
|
|
||||||
Files: src/encutils/__init__.py
|
Files: src/encutils/__init__.py
|
||||||
Copyright: 2005-2008: Christof Hoeke
|
Copyright: 2005-2008: Christof Hoeke
|
||||||
License: LGPL-3+, CC-BY-3.0
|
License: LGPL-3+, CC-BY-3.0
|
||||||
|
@ -13,12 +13,12 @@
|
|||||||
id="Layer_1"
|
id="Layer_1"
|
||||||
x="0px"
|
x="0px"
|
||||||
y="0px"
|
y="0px"
|
||||||
width="134.77701"
|
width="200"
|
||||||
height="199.99901"
|
height="200"
|
||||||
viewBox="0 0 134.777 199.999"
|
viewBox="0 0 199.99999 199.99999"
|
||||||
enable-background="new 0 0 595.28 841.89"
|
enable-background="new 0 0 595.28 841.89"
|
||||||
xml:space="preserve"
|
xml:space="preserve"
|
||||||
inkscape:version="0.47 r22583"
|
inkscape:version="0.48.0 r9654"
|
||||||
sodipodi:docname="news.svg"><metadata
|
sodipodi:docname="news.svg"><metadata
|
||||||
id="metadata26"><rdf:RDF><cc:Work
|
id="metadata26"><rdf:RDF><cc:Work
|
||||||
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
|
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
|
||||||
@ -38,22 +38,22 @@
|
|||||||
guidetolerance="10"
|
guidetolerance="10"
|
||||||
inkscape:pageopacity="0"
|
inkscape:pageopacity="0"
|
||||||
inkscape:pageshadow="2"
|
inkscape:pageshadow="2"
|
||||||
inkscape:window-width="640"
|
inkscape:window-width="1680"
|
||||||
inkscape:window-height="489"
|
inkscape:window-height="997"
|
||||||
id="namedview22"
|
id="namedview22"
|
||||||
showgrid="false"
|
showgrid="false"
|
||||||
inkscape:zoom="0.28032165"
|
inkscape:zoom="0.28032165"
|
||||||
inkscape:cx="67.389001"
|
inkscape:cx="67.389001"
|
||||||
inkscape:cy="99.722002"
|
inkscape:cy="99.722002"
|
||||||
inkscape:window-x="0"
|
inkscape:window-x="-4"
|
||||||
inkscape:window-y="41"
|
inkscape:window-y="30"
|
||||||
inkscape:window-maximized="0"
|
inkscape:window-maximized="1"
|
||||||
inkscape:current-layer="Layer_1" />
|
inkscape:current-layer="Layer_1" />
|
||||||
<g
|
<g
|
||||||
id="g3"
|
id="g3"
|
||||||
transform="translate(-230.25101,-320.668)">
|
transform="translate(-194.57771,-320.66701)">
|
||||||
<polygon
|
<polygon
|
||||||
points="360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 "
|
points="286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 "
|
||||||
id="polygon5"
|
id="polygon5"
|
||||||
style="fill:#ffffff" />
|
style="fill:#ffffff" />
|
||||||
<linearGradient
|
<linearGradient
|
||||||
@ -73,7 +73,7 @@
|
|||||||
id="stop10" />
|
id="stop10" />
|
||||||
</linearGradient>
|
</linearGradient>
|
||||||
<polygon
|
<polygon
|
||||||
points="360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 "
|
points="286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 "
|
||||||
id="polygon12"
|
id="polygon12"
|
||||||
style="fill:url(#SVGID_1_)" />
|
style="fill:url(#SVGID_1_)" />
|
||||||
<g
|
<g
|
||||||
@ -81,14 +81,16 @@
|
|||||||
<path
|
<path
|
||||||
d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
|
d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
|
||||||
id="path16"
|
id="path16"
|
||||||
style="fill:#993720" />
|
style="fill:#993720"
|
||||||
|
inkscape:connector-curvature="0" />
|
||||||
</g>
|
</g>
|
||||||
<g
|
<g
|
||||||
id="g18">
|
id="g18">
|
||||||
<path
|
<path
|
||||||
d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
|
d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
|
||||||
id="path20"
|
id="path20"
|
||||||
style="fill:#f0efef" />
|
style="fill:#f0efef"
|
||||||
|
inkscape:connector-curvature="0" />
|
||||||
</g>
|
</g>
|
||||||
</g>
|
</g>
|
||||||
</svg>
|
</svg>
|
Before Width: | Height: | Size: 4.1 KiB After Width: | Height: | Size: 4.2 KiB |
@ -105,7 +105,7 @@ bool_custom_columns_are_tristate = 'yes'
|
|||||||
# title within authors.
|
# title within authors.
|
||||||
sort_columns_at_startup = None
|
sort_columns_at_startup = None
|
||||||
|
|
||||||
#; Control how dates are displayed
|
#: Control how dates are displayed
|
||||||
# Format to be used for publication date and the timestamp (date).
|
# Format to be used for publication date and the timestamp (date).
|
||||||
# A string controlling how the publication date is displayed in the GUI
|
# A string controlling how the publication date is displayed in the GUI
|
||||||
# d the day as number without a leading zero (1 to 31)
|
# d the day as number without a leading zero (1 to 31)
|
||||||
@ -245,19 +245,6 @@ sony_collection_name_template='{value}{category:| (|)}'
|
|||||||
sony_collection_sorting_rules = []
|
sony_collection_sorting_rules = []
|
||||||
|
|
||||||
|
|
||||||
#: Create search terms to apply a query across several built-in search terms.
|
|
||||||
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
|
|
||||||
# Example: create the term 'myseries' that when used as myseries:foo would
|
|
||||||
# search all of the search categories 'series', '#myseries', and '#myseries2':
|
|
||||||
# grouped_search_terms={'myseries':['series','#myseries', '#myseries2']}
|
|
||||||
# Example: two search terms 'a' and 'b' both that search 'tags' and '#mytags':
|
|
||||||
# grouped_search_terms={'a':['tags','#mytags'], 'b':['tags','#mytags']}
|
|
||||||
# Note: You cannot create a search term that is a duplicate of an existing term.
|
|
||||||
# Such duplicates will be silently ignored. Also note that search terms ignore
|
|
||||||
# case. 'MySearch' and 'mysearch' are the same term.
|
|
||||||
grouped_search_terms = {}
|
|
||||||
|
|
||||||
|
|
||||||
#: Control how tags are applied when copying books to another library
|
#: Control how tags are applied when copying books to another library
|
||||||
# Set this to True to ensure that tags in 'Tags to add when adding
|
# Set this to True to ensure that tags in 'Tags to add when adding
|
||||||
# a book' are added when copying books to another library
|
# a book' are added when copying books to another library
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 6.2 KiB |
BIN
resources/images/news/de_standaard.png
Normal file
BIN
resources/images/news/de_standaard.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 834 B |
50
resources/recipes/adevarul.recipe
Normal file
50
resources/recipes/adevarul.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
adevarul.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Adevarul(BasicNewsRecipe):
|
||||||
|
title = u'Adev\u0103rul'
|
||||||
|
language = 'ro'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
publisher = 'Adevarul'
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
|
||||||
|
,dict(name='div', attrs={'class':'bd'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
|
||||||
|
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
|
||||||
|
,dict(name='form', attrs={'id':'bb-comment-create-form'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
|
||||||
|
|
||||||
|
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
0
resources/recipes/aprospect.recipe
Executable file → Normal file
0
resources/recipes/aprospect.recipe
Executable file → Normal file
44
resources/recipes/capital.recipe
Normal file
44
resources/recipes/capital.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
capital.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Capital(BasicNewsRecipe):
|
||||||
|
title = 'Capital'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_javascript = True
|
||||||
|
publisher = 'Capital'
|
||||||
|
cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [ dict(name='div', attrs={'class':'single one_article'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='div', attrs={'class':'single_details'})
|
||||||
|
, dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
53
resources/recipes/catavencu.recipe
Normal file
53
resources/recipes/catavencu.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
catavencu.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Catavencu(BasicNewsRecipe):
|
||||||
|
title = u'Academia Ca\u0163avencu'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Tagma cum laude'
|
||||||
|
publisher = 'Catavencu'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='ul', attrs={'class':'articles'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['tools']})
|
||||||
|
, dict(name='div', attrs={'class':['share']})
|
||||||
|
, dict(name='div', attrs={'class':['category']})
|
||||||
|
, dict(name='div', attrs={'id':['comments']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'id':'comments'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://catavencu.ro/feed/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
standaard.be
|
standaard.be
|
||||||
'''
|
'''
|
||||||
@ -9,14 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class DeStandaard(BasicNewsRecipe):
|
class DeStandaard(BasicNewsRecipe):
|
||||||
title = u'De Standaard'
|
title = u'De Standaard'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
language = 'nl_BE'
|
language = 'nl_BE'
|
||||||
|
|
||||||
description = u'News from Belgium in Dutch'
|
description = u'News from Belgium in Dutch'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
masthead_url = 'http://www.standaard.be/extra/css/images/masthead/logo_340x45.png'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div' , attrs={'id':['intro','continued']})]
|
keep_only_tags = [dict(name='div' , attrs={'id':['intro','continued']})]
|
||||||
|
|
||||||
@ -27,4 +28,4 @@ class DeStandaard(BasicNewsRecipe):
|
|||||||
return article.get('guid', None)
|
return article.get('guid', None)
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/Detail.aspx?','/PrintArtikel.aspx?')
|
return url.replace('/artikel/detail.aspx?','/Artikel/PrintArtikel.aspx?')
|
||||||
|
47
resources/recipes/gandul.recipe
Normal file
47
resources/recipes/gandul.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
gandul.info
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gandul(BasicNewsRecipe):
|
||||||
|
title = u'G\u00E2ndul'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
publisher = 'Gandul'
|
||||||
|
description = 'Cotidian Online'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'article'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='a', attrs={'class':'photo'})
|
||||||
|
, dict(name='div', attrs={'class':'ad'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
46
resources/recipes/hotnews.recipe
Normal file
46
resources/recipes/hotnews.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
hotnews.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Hotnews(BasicNewsRecipe):
|
||||||
|
title = 'Hotnews'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
publisher = 'Hotnews'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.hotnews.ro/images/new/logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':'title'})
|
||||||
|
,dict(name='div', attrs={'id':'articleContent'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [ (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate')
|
||||||
|
,(u'English', u'http://www.hotnews.ro/rss/english')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
54
resources/recipes/jurnalulnational.recipe
Normal file
54
resources/recipes/jurnalulnational.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
jurnalul.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class JurnalulNational(BasicNewsRecipe):
|
||||||
|
title = u'Jurnalul Na\u0163ional'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
publisher = 'Jurnalul National'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.jurnalul.ro/images/sigla.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':'h3 art_title'})
|
||||||
|
,dict(name='div', attrs={'class':'only_text'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html')
|
||||||
|
,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html')
|
||||||
|
,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html')
|
||||||
|
,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html')
|
||||||
|
,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html')
|
||||||
|
,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html')
|
||||||
|
,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html')
|
||||||
|
,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html')
|
||||||
|
,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Lifehacker(BasicNewsRecipe):
|
class Lifehacker(BasicNewsRecipe):
|
||||||
title = 'Lifehacker'
|
title = 'Lifehacker'
|
||||||
__author__ = 'NA'
|
__author__ = 'Kovid Goyal'
|
||||||
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
|
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
|
||||||
publisher = 'lifehacker.com'
|
publisher = 'lifehacker.com'
|
||||||
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
|
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
|
||||||
@ -32,14 +32,20 @@ class Lifehacker(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width', 'height', 'style']
|
||||||
keep_only_tags = [dict(attrs={'class':'content permalink'})]
|
|
||||||
remove_tags_before = dict(name='h1')
|
remove_tags_before = dict(name='h1')
|
||||||
remove_tags = [dict(attrs={'class':'contactinfo'})]
|
keep_only_tags = [dict(id='container')]
|
||||||
remove_tags_after = dict(attrs={'class':'contactinfo'})
|
remove_tags_after = dict(attrs={'class':'post-body'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(id="sharemenu"),
|
||||||
|
{'class': 'related'},
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
|
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('#!', '?_escaped_fragment_=')
|
||||||
|
|
||||||
|
52
resources/recipes/mediafax.recipe
Normal file
52
resources/recipes/mediafax.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
mediafax.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Mediafax(BasicNewsRecipe):
|
||||||
|
title = 'Mediafax'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
publisher = 'Mediafax'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'news tabs-container'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='ul', attrs={'class':['CategoryNews']})
|
||||||
|
,dict(name='div', attrs={'class':['read']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Actualitate', u'http://www.mediafax.ro/rss/')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
54
resources/recipes/moneyro.recipe
Normal file
54
resources/recipes/moneyro.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
money.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MoneyRo(BasicNewsRecipe):
|
||||||
|
title = 'Money Ro'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
publisher = 'MoneyRo'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://assets.moneyweb.ro/images/logo_money.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [ dict(name='div', attrs={'id':'titluArticol'})
|
||||||
|
, dict(name='img', attrs={'id':'objImage'})
|
||||||
|
, dict(name='div', attrs={'class':'leftColumnArticle'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ]
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='div', attrs={'id':'ads'})
|
||||||
|
, dict(name='div', attrs={'id':'aus'})
|
||||||
|
, dict(name='div', attrs={'id':'bb-comment-create-form'})
|
||||||
|
, dict(name='div', attrs={'id':'articleTags'})
|
||||||
|
, dict(name='div', attrs={'class':'breadcrumb'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
keep_only_tags = [dict(attrs={'class':'uitstekendekeus'})]
|
keep_only_tags = [dict(attrs={'class':'uitstekendekeus'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','base','link','object','embed'])
|
dict(name=['meta','base','link','object','embed'])
|
||||||
,dict(attrs={'class':['reclamespace','tags-and-sharing']})
|
,dict(attrs={'class':['reclamespace','tags-and-sharing','sharing-is-caring']})
|
||||||
]
|
]
|
||||||
remove_attributes=['lang']
|
remove_attributes=['lang']
|
||||||
|
|
||||||
|
@ -668,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
#remove "Related content" bar
|
#remove "Related content" bar
|
||||||
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ']})
|
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ','articleInline runaroundLeft lastArticleInline']})
|
||||||
if runAroundsFound:
|
if runAroundsFound:
|
||||||
for runAround in runAroundsFound:
|
for runAround in runAroundsFound:
|
||||||
#find all section headers
|
#find all section headers
|
||||||
|
49
resources/recipes/prosport.recipe
Normal file
49
resources/recipes/prosport.recipe
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
prosport.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Prosport(BasicNewsRecipe):
|
||||||
|
title = 'Prosport'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
publisher = 'Prosport'
|
||||||
|
description = u'\u0218tiri Sportive din Rom\u00e2nia'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania,Sport'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':'a-title'})
|
||||||
|
,dict(name='div', attrs={'class':'a-entry'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='div', attrs={'class':'utils'})
|
||||||
|
,dict(name='div', attrs={'class':'g-slide'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
45
resources/recipes/realitatea.recipe
Normal file
45
resources/recipes/realitatea.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
realitatea.net
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Realitatea(BasicNewsRecipe):
|
||||||
|
title = 'Realitatea'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
publisher = 'Realitatea'
|
||||||
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://assets.realitatea.ro/images/logo.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'articleTitle '})
|
||||||
|
,dict(name='div', attrs={'class':'articleBody'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='div', attrs={'id':'aus'}) ]
|
||||||
|
feeds = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
smh.com.au
|
smh.com.au
|
||||||
'''
|
'''
|
||||||
@ -22,7 +22,11 @@ class Smh_au(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
|
masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
extra_css = ' h1{font-family: Georgia,"Times New Roman",Times,serif } body{font-family: Arial,Helvetica,sans-serif} .cT-imageLandscape{font-size: x-small} '
|
extra_css = """
|
||||||
|
h1{font-family: Georgia,"Times New Roman",Times,serif }
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif}
|
||||||
|
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -38,7 +42,11 @@ class Smh_au(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
|
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
|
||||||
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
|
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
|
||||||
remove_attributes = ['width','height']
|
remove_tags = [
|
||||||
|
dict(attrs={'class':'hidden'}),
|
||||||
|
dict(name=['link','meta','base','embed','object','iframe'])
|
||||||
|
]
|
||||||
|
remove_attributes = ['width','height','lang']
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
articles = []
|
||||||
@ -66,3 +74,14 @@ class Smh_au(BasicNewsRecipe):
|
|||||||
,'description':description
|
,'description':description
|
||||||
})
|
})
|
||||||
return [(self.tag_to_string(soup.find('title')), articles)]
|
return [(self.tag_to_string(soup.find('title')), articles)]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('bod'):
|
||||||
|
item.name = 'div'
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
|
46
resources/recipes/standardmoney.recipe
Normal file
46
resources/recipes/standardmoney.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
standard.money.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class StandardMoneyRo(BasicNewsRecipe):
|
||||||
|
title = 'Standard Money Ro'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
publisher = 'Standard Money'
|
||||||
|
description = 'Portal de Business'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':'post-title'})
|
||||||
|
, dict(name='div', attrs={'class':'content_post'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Actualitate', u'http://standard.money.ro/feed')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
45
resources/recipes/ziarulfinanciar.recipe
Normal file
45
resources/recipes/ziarulfinanciar.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
zf.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ZiarulFinanciar(BasicNewsRecipe):
|
||||||
|
title = 'Ziarul Financiar'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiri din Business'
|
||||||
|
publisher = 'Ziarul Financiar'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Romania'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'article'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -6,9 +6,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, cPickle
|
import os, cPickle, re, anydbm, shutil
|
||||||
|
from zlib import compress
|
||||||
|
|
||||||
from setup import Command, basenames
|
from setup import Command, basenames, __appname__
|
||||||
|
|
||||||
def get_opts_from_parser(parser):
|
def get_opts_from_parser(parser):
|
||||||
def do_opt(opt):
|
def do_opt(opt):
|
||||||
@ -26,6 +27,9 @@ class Resources(Command):
|
|||||||
|
|
||||||
description = 'Compile various needed calibre resources'
|
description = 'Compile various needed calibre resources'
|
||||||
|
|
||||||
|
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
|
||||||
|
'ebooks', 'unihandecode', 'pykakasi')
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
scripts = {}
|
scripts = {}
|
||||||
for x in ('console', 'gui'):
|
for x in ('console', 'gui'):
|
||||||
@ -101,11 +105,107 @@ class Resources(Command):
|
|||||||
import json
|
import json
|
||||||
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
||||||
|
|
||||||
|
self.run_kakasi(opts)
|
||||||
|
|
||||||
|
def run_kakasi(self, opts):
|
||||||
|
self.records = {}
|
||||||
|
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||||
|
dest = self.j(self.RESOURCES, 'localization',
|
||||||
|
'pykakasi','kanwadict2.db')
|
||||||
|
base = os.path.dirname(dest)
|
||||||
|
if not os.path.exists(base):
|
||||||
|
os.makedirs(base)
|
||||||
|
|
||||||
|
if self.newer(dest, src):
|
||||||
|
self.info('\tGenerating Kanwadict')
|
||||||
|
|
||||||
|
for line in open(src, "r"):
|
||||||
|
self.parsekdict(line)
|
||||||
|
self.kanwaout(dest)
|
||||||
|
|
||||||
|
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||||
|
dest = self.j(self.RESOURCES, 'localization',
|
||||||
|
'pykakasi','itaijidict2.pickle')
|
||||||
|
|
||||||
|
if self.newer(dest, src):
|
||||||
|
self.info('\tGenerating Itaijidict')
|
||||||
|
self.mkitaiji(src, dest)
|
||||||
|
|
||||||
|
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||||
|
dest = self.j(self.RESOURCES, 'localization',
|
||||||
|
'pykakasi','kanadict2.pickle')
|
||||||
|
|
||||||
|
if self.newer(dest, src):
|
||||||
|
self.info('\tGenerating kanadict')
|
||||||
|
self.mkkanadict(src, dest)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def mkitaiji(self, src, dst):
|
||||||
|
dic = {}
|
||||||
|
for line in open(src, "r"):
|
||||||
|
line = line.decode("utf-8").strip()
|
||||||
|
if line.startswith(';;'): # skip comment
|
||||||
|
continue
|
||||||
|
if re.match(r"^$",line):
|
||||||
|
continue
|
||||||
|
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||||
|
dic[pair[0]] = pair[1]
|
||||||
|
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||||
|
|
||||||
|
def mkkanadict(self, src, dst):
|
||||||
|
dic = {}
|
||||||
|
for line in open(src, "r"):
|
||||||
|
line = line.decode("utf-8").strip()
|
||||||
|
if line.startswith(';;'): # skip comment
|
||||||
|
continue
|
||||||
|
if re.match(r"^$",line):
|
||||||
|
continue
|
||||||
|
(alpha, kana) = line.split(' ')
|
||||||
|
dic[kana] = alpha
|
||||||
|
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||||
|
|
||||||
|
def parsekdict(self, line):
|
||||||
|
line = line.decode("utf-8").strip()
|
||||||
|
if line.startswith(';;'): # skip comment
|
||||||
|
return
|
||||||
|
(yomi, kanji) = line.split(' ')
|
||||||
|
if ord(yomi[-1:]) <= ord('z'):
|
||||||
|
tail = yomi[-1:]
|
||||||
|
yomi = yomi[:-1]
|
||||||
|
else:
|
||||||
|
tail = ''
|
||||||
|
self.updaterec(kanji, yomi, tail)
|
||||||
|
|
||||||
|
def updaterec(self, kanji, yomi, tail):
|
||||||
|
key = "%04x"%ord(kanji[0])
|
||||||
|
if key in self.records:
|
||||||
|
if kanji in self.records[key]:
|
||||||
|
rec = self.records[key][kanji]
|
||||||
|
rec.append((yomi,tail))
|
||||||
|
self.records[key].update( {kanji: rec} )
|
||||||
|
else:
|
||||||
|
self.records[key][kanji]=[(yomi, tail)]
|
||||||
|
else:
|
||||||
|
self.records[key] = {}
|
||||||
|
self.records[key][kanji]=[(yomi, tail)]
|
||||||
|
|
||||||
|
def kanwaout(self, out):
|
||||||
|
dic = anydbm.open(out, 'c')
|
||||||
|
for (k, v) in self.records.iteritems():
|
||||||
|
dic[k] = compress(cPickle.dumps(v, -1))
|
||||||
|
dic.close()
|
||||||
|
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
||||||
x = self.j(self.RESOURCES, x+'.pickle')
|
x = self.j(self.RESOURCES, x+'.pickle')
|
||||||
if os.path.exists(x):
|
if os.path.exists(x):
|
||||||
os.remove(x)
|
os.remove(x)
|
||||||
|
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||||
|
if os.path.exists(kakasi):
|
||||||
|
shutil.rmtree(kakasi)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,11 +2,14 @@ import os.path
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import textwrap, os, glob, functools
|
import textwrap, os, glob, functools, re
|
||||||
|
from calibre import guess_type
|
||||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
|
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||||
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
|
|
||||||
# To archive plugins {{{
|
# To archive plugins {{{
|
||||||
class HTML2ZIP(FileTypePlugin):
|
class HTML2ZIP(FileTypePlugin):
|
||||||
@ -82,6 +85,77 @@ class PML2PMLZ(FileTypePlugin):
|
|||||||
|
|
||||||
return of.name
|
return of.name
|
||||||
|
|
||||||
|
class TXT2TXTZ(FileTypePlugin):
|
||||||
|
name = 'TXT to TXTZ'
|
||||||
|
author = 'John Schember'
|
||||||
|
description = _('Create a TXTZ archive when a TXT file is imported '
|
||||||
|
'containing Markdown or Textile references to images. The referenced '
|
||||||
|
'images as well as the TXT file are added to the archive.')
|
||||||
|
version = numeric_version
|
||||||
|
file_types = set(['txt'])
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
on_import = True
|
||||||
|
|
||||||
|
def _get_image_references(self, txt, base_dir):
|
||||||
|
images = []
|
||||||
|
|
||||||
|
# Textile
|
||||||
|
for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
|
||||||
|
path = m.group('path')
|
||||||
|
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||||
|
images.append(path)
|
||||||
|
|
||||||
|
# Markdown inline
|
||||||
|
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
|
||||||
|
path = m.group('path')
|
||||||
|
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||||
|
images.append(path)
|
||||||
|
|
||||||
|
# Markdown reference
|
||||||
|
refs = {}
|
||||||
|
for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
|
||||||
|
if m.group('id') and m.group('path'):
|
||||||
|
refs[m.group('id')] = m.group('path')
|
||||||
|
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt):
|
||||||
|
path = refs.get(m.group('id'), None)
|
||||||
|
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||||
|
images.append(path)
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
return list(set(images))
|
||||||
|
|
||||||
|
def run(self, path_to_ebook):
|
||||||
|
with open(path_to_ebook, 'rb') as ebf:
|
||||||
|
txt = ebf.read()
|
||||||
|
base_dir = os.path.dirname(path_to_ebook)
|
||||||
|
images = self._get_image_references(txt, base_dir)
|
||||||
|
|
||||||
|
if images:
|
||||||
|
# Create TXTZ and put file plus images inside of it.
|
||||||
|
import zipfile
|
||||||
|
of = self.temporary_file('_plugin_txt2txtz.txtz')
|
||||||
|
txtz = zipfile.ZipFile(of.name, 'w')
|
||||||
|
# Add selected TXT file to archive.
|
||||||
|
txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
|
||||||
|
# metadata.opf
|
||||||
|
if os.path.exists(os.path.join(base_dir, 'metadata.opf')):
|
||||||
|
txtz.write(os.path.join(base_dir, 'metadata.opf'), 'metadata.opf', zipfile.ZIP_DEFLATED)
|
||||||
|
else:
|
||||||
|
from calibre.ebooks.metadata.txt import get_metadata
|
||||||
|
with open(path_to_ebook, 'rb') as ebf:
|
||||||
|
mi = get_metadata(ebf)
|
||||||
|
opf = metadata_to_opf(mi)
|
||||||
|
txtz.writestr('metadata.opf', opf, zipfile.ZIP_DEFLATED)
|
||||||
|
# images
|
||||||
|
for image in images:
|
||||||
|
txtz.write(os.path.join(base_dir, image), image)
|
||||||
|
txtz.close()
|
||||||
|
|
||||||
|
return of.name
|
||||||
|
else:
|
||||||
|
# No images so just import the TXT file.
|
||||||
|
return path_to_ebook
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Metadata reader plugins {{{
|
# Metadata reader plugins {{{
|
||||||
@ -511,14 +585,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
|||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
||||||
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
||||||
LibraryThingCovers, DoubanCovers
|
AmazonCovers, DoubanCovers
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||||
|
|
||||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
||||||
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||||
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
|
Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
|
||||||
NiceBooksCovers]
|
NiceBooksCovers]
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
|
@ -24,6 +24,7 @@ from calibre.utils.logging import Log
|
|||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
if isosx:
|
if isosx:
|
||||||
try:
|
try:
|
||||||
@ -2515,23 +2516,23 @@ class ITUNES(DriverBase):
|
|||||||
fnames = zf_opf.namelist()
|
fnames = zf_opf.namelist()
|
||||||
opf = [x for x in fnames if '.opf' in x][0]
|
opf = [x for x in fnames if '.opf' in x][0]
|
||||||
if opf:
|
if opf:
|
||||||
opf_raw = cStringIO.StringIO(zf_opf.read(opf))
|
opf_tree = etree.fromstring(zf_opf.read(opf))
|
||||||
soup = BeautifulSoup(opf_raw.getvalue())
|
ns_map = opf_tree.nsmap.keys()
|
||||||
opf_raw.close()
|
for item in ns_map:
|
||||||
|
ns = opf_tree.nsmap[item]
|
||||||
# Touch existing calibre timestamp
|
md_el = opf_tree.find(".//{%s}metadata" % ns)
|
||||||
md = soup.find('metadata')
|
if md_el is not None:
|
||||||
if md:
|
ts = md_el.find('.//{%s}meta[@name="calibre:timestamp"]')
|
||||||
ts = md.find('meta',attrs={'name':'calibre:timestamp'})
|
if ts:
|
||||||
if ts:
|
timestamp = ts.get('content')
|
||||||
timestamp = ts['content']
|
old_ts = parse_date(timestamp)
|
||||||
old_ts = parse_date(timestamp)
|
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
else:
|
||||||
else:
|
metadata.timestamp = now()
|
||||||
metadata.timestamp = now()
|
if DEBUG:
|
||||||
if DEBUG:
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
break
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = now()
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -2839,7 +2840,7 @@ class ITUNES(DriverBase):
|
|||||||
def _xform_metadata_via_plugboard(self, book, format):
|
def _xform_metadata_via_plugboard(self, book, format):
|
||||||
''' Transform book metadata from plugboard templates '''
|
''' Transform book metadata from plugboard templates '''
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.info(" ITUNES._update_metadata_from_plugboard()")
|
self.log.info(" ITUNES._xform_metadata_via_plugboard()")
|
||||||
|
|
||||||
if self.plugboard_func:
|
if self.plugboard_func:
|
||||||
pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
|
pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
|
||||||
|
@ -11,44 +11,42 @@ Generates and writes an APNX page mapping file.
|
|||||||
import struct
|
import struct
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.reader import MobiReader
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
|
||||||
class APNXBuilder(object):
|
class APNXBuilder(object):
|
||||||
'''
|
'''
|
||||||
2300 characters of uncompressed text per page. This is
|
Create an APNX file using a pseudo page mapping.
|
||||||
not meant to map 1 to 1 to a print book but to be a
|
|
||||||
close enough measure.
|
|
||||||
|
|
||||||
A test book was chosen and the characters were counted
|
|
||||||
on one page. This number was round to 2240 then 60
|
|
||||||
characters of markup were added to the total giving
|
|
||||||
2300.
|
|
||||||
|
|
||||||
Uncompressed text length is used because it's easily
|
|
||||||
accessible in MOBI files (part of the header). Also,
|
|
||||||
It's faster to work off of the length then to
|
|
||||||
decompress and parse the actual text.
|
|
||||||
|
|
||||||
A better but much more resource intensive and slower
|
|
||||||
method to calculate the page length would be to parse
|
|
||||||
the uncompressed text. For each paragraph we would
|
|
||||||
want to find how many lines it would occupy in a paper
|
|
||||||
back book. 70 characters per line and 32 lines per page.
|
|
||||||
So divide the number of characters (minus markup) in
|
|
||||||
each paragraph by 70. If there are less than 70
|
|
||||||
characters in the paragraph then it is 1 line. Then,
|
|
||||||
count every 32 lines and mark that location as a page.
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def write_apnx(self, mobi_file_path, apnx_path):
|
def write_apnx(self, mobi_file_path, apnx_path, accurate=True):
|
||||||
|
# Check that this is really a MOBI file.
|
||||||
with open(mobi_file_path, 'rb') as mf:
|
with open(mobi_file_path, 'rb') as mf:
|
||||||
phead = PdbHeaderReader(mf)
|
ident = PdbHeaderReader(mf).identity()
|
||||||
r0 = phead.section_data(0)
|
if ident != 'BOOKMOBI':
|
||||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
|
||||||
|
|
||||||
pages = self.get_pages(text_length)
|
# Get the pages depending on the chosen parser
|
||||||
|
pages = []
|
||||||
|
if accurate:
|
||||||
|
try:
|
||||||
|
pages = self.get_pages_accurate(mobi_file_path)
|
||||||
|
except:
|
||||||
|
# Fall back to the fast parser if we can't
|
||||||
|
# use the accurate one. Typically this is
|
||||||
|
# due to the file having DRM.
|
||||||
|
pages = self.get_pages_fast(mobi_file_path)
|
||||||
|
else:
|
||||||
|
pages = self.get_pages_fast(mobi_file_path)
|
||||||
|
|
||||||
|
if not pages:
|
||||||
|
raise Exception(_('Could not generate page mapping.'))
|
||||||
|
|
||||||
|
# Generate the APNX file from the page mapping.
|
||||||
apnx = self.generate_apnx(pages)
|
apnx = self.generate_apnx(pages)
|
||||||
|
|
||||||
|
# Write the APNX.
|
||||||
with open(apnx_path, 'wb') as apnxf:
|
with open(apnx_path, 'wb') as apnxf:
|
||||||
apnxf.write(apnx)
|
apnxf.write(apnx)
|
||||||
|
|
||||||
@ -73,18 +71,126 @@ class APNXBuilder(object):
|
|||||||
apnx += struct.pack('>H', 32)
|
apnx += struct.pack('>H', 32)
|
||||||
apnx += page_header
|
apnx += page_header
|
||||||
|
|
||||||
# write page values to apnx
|
# Write page values to APNX.
|
||||||
for page in pages:
|
for page in pages:
|
||||||
apnx += struct.pack('>L', page)
|
apnx += struct.pack('>I', page)
|
||||||
|
|
||||||
return apnx
|
return apnx
|
||||||
|
|
||||||
def get_pages(self, text_length):
|
def get_pages_fast(self, mobi_file_path):
|
||||||
|
'''
|
||||||
|
2300 characters of uncompressed text per page. This is
|
||||||
|
not meant to map 1 to 1 to a print book but to be a
|
||||||
|
close enough measure.
|
||||||
|
|
||||||
|
A test book was chosen and the characters were counted
|
||||||
|
on one page. This number was round to 2240 then 60
|
||||||
|
characters of markup were added to the total giving
|
||||||
|
2300.
|
||||||
|
|
||||||
|
Uncompressed text length is used because it's easily
|
||||||
|
accessible in MOBI files (part of the header). Also,
|
||||||
|
It's faster to work off of the length then to
|
||||||
|
decompress and parse the actual text.
|
||||||
|
'''
|
||||||
|
text_length = 0
|
||||||
pages = []
|
pages = []
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
|
with open(mobi_file_path, 'rb') as mf:
|
||||||
|
phead = PdbHeaderReader(mf)
|
||||||
|
r0 = phead.section_data(0)
|
||||||
|
text_length = struct.unpack('>I', r0[4:8])[0]
|
||||||
|
|
||||||
while count < text_length:
|
while count < text_length:
|
||||||
pages.append(count)
|
pages.append(count)
|
||||||
count += 2300
|
count += 2300
|
||||||
|
|
||||||
return pages
|
return pages
|
||||||
|
|
||||||
|
def get_pages_accurate(self, mobi_file_path):
|
||||||
|
'''
|
||||||
|
A more accurate but much more resource intensive and slower
|
||||||
|
method to calculate the page length.
|
||||||
|
|
||||||
|
Parses the uncompressed text. In an average paper back book
|
||||||
|
There are 32 lines per page and a maximum of 70 characters
|
||||||
|
per line.
|
||||||
|
|
||||||
|
Each paragraph starts a new line and every 70 characters
|
||||||
|
(minus markup) in a paragraph starts a new line. The
|
||||||
|
position after every 30 lines will be marked as a new
|
||||||
|
page.
|
||||||
|
|
||||||
|
This can be make more accurate by accounting for
|
||||||
|
<div class="mbp_pagebreak" /> as a new page marker.
|
||||||
|
And <br> elements as an empty line.
|
||||||
|
'''
|
||||||
|
pages = []
|
||||||
|
|
||||||
|
# Get the MOBI html.
|
||||||
|
mr = MobiReader(mobi_file_path, default_log)
|
||||||
|
if mr.book_header.encryption_type != 0:
|
||||||
|
# DRMed book
|
||||||
|
return self.get_pages_fast(mobi_file_path)
|
||||||
|
mr.extract_text()
|
||||||
|
|
||||||
|
# States
|
||||||
|
in_tag = False
|
||||||
|
in_p = False
|
||||||
|
check_p = False
|
||||||
|
closing = False
|
||||||
|
p_char_count = 0
|
||||||
|
|
||||||
|
# Get positions of every line
|
||||||
|
# A line is either a paragraph starting
|
||||||
|
# or every 70 characters in a paragraph.
|
||||||
|
lines = []
|
||||||
|
pos = -1
|
||||||
|
# We want this to be as fast as possible so we
|
||||||
|
# are going to do one pass across the text. re
|
||||||
|
# and string functions will parse the text each
|
||||||
|
# time they are called.
|
||||||
|
#
|
||||||
|
# We can can use .lower() here because we are
|
||||||
|
# not modifying the text. In this case the case
|
||||||
|
# doesn't matter just the absolute character and
|
||||||
|
# the position within the stream.
|
||||||
|
for c in mr.mobi_html.lower():
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
# Check if we are starting or stopping a p tag.
|
||||||
|
if check_p:
|
||||||
|
if c == '/':
|
||||||
|
closing = True
|
||||||
|
continue
|
||||||
|
elif c == 'p':
|
||||||
|
if closing:
|
||||||
|
in_p = False
|
||||||
|
else:
|
||||||
|
in_p = True
|
||||||
|
lines.append(pos - 2)
|
||||||
|
check_p = False
|
||||||
|
closing = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if c == '<':
|
||||||
|
in_tag = True
|
||||||
|
check_p = True
|
||||||
|
continue
|
||||||
|
elif c == '>':
|
||||||
|
in_tag = False
|
||||||
|
check_p = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if in_p and not in_tag:
|
||||||
|
p_char_count += 1
|
||||||
|
if p_char_count == 70:
|
||||||
|
lines.append(pos)
|
||||||
|
p_char_count = 0
|
||||||
|
|
||||||
|
# Every 30 lines is a new page
|
||||||
|
for i in xrange(0, len(lines), 32):
|
||||||
|
pages.append(lines[i])
|
||||||
|
|
||||||
|
return pages
|
||||||
|
@ -176,6 +176,28 @@ class KINDLE2(KINDLE):
|
|||||||
PRODUCT_ID = [0x0002, 0x0004]
|
PRODUCT_ID = [0x0002, 0x0004]
|
||||||
BCD = [0x0100]
|
BCD = [0x0100]
|
||||||
|
|
||||||
|
EXTRA_CUSTOMIZATION_MESSAGE = [
|
||||||
|
_('Send page number information when sending books') +
|
||||||
|
':::' +
|
||||||
|
_('The Kindle 3 and newer versions can use page number information '
|
||||||
|
'in MOBI files. With this option, calibre will calculate and send'
|
||||||
|
' this information to the Kindle when uploading MOBI files by'
|
||||||
|
' USB. Note that the page numbers do not correspond to any paper'
|
||||||
|
' book.'),
|
||||||
|
_('Use slower but more accurate page number generation') +
|
||||||
|
':::' +
|
||||||
|
_('There are two ways to generate the page number information. Using the more accurate '
|
||||||
|
'generator will produce pages that correspond better to a printed book. '
|
||||||
|
'However, this method is slower and will slow down sending files '
|
||||||
|
'to the Kindle.'),
|
||||||
|
]
|
||||||
|
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
]
|
||||||
|
OPT_APNX = 0
|
||||||
|
OPT_APNX_ACCURATE = 1
|
||||||
|
|
||||||
def books(self, oncard=None, end_session=True):
|
def books(self, oncard=None, end_session=True):
|
||||||
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
|
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
|
||||||
# Read collections information
|
# Read collections information
|
||||||
@ -212,13 +234,17 @@ class KINDLE2(KINDLE):
|
|||||||
'''
|
'''
|
||||||
Hijacking this function to write the apnx file.
|
Hijacking this function to write the apnx file.
|
||||||
'''
|
'''
|
||||||
if not filepath.lower().endswith('.mobi'):
|
opts = self.settings()
|
||||||
|
if not opts.extra_customization[self.OPT_APNX]:
|
||||||
|
return
|
||||||
|
|
||||||
|
if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'):
|
||||||
return
|
return
|
||||||
|
|
||||||
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
||||||
apnx_builder = APNXBuilder()
|
apnx_builder = APNXBuilder()
|
||||||
try:
|
try:
|
||||||
apnx_builder.write_apnx(filepath, apnx_path)
|
apnx_builder.write_apnx(filepath, apnx_path, accurate=opts.extra_customization[self.OPT_APNX_ACCURATE])
|
||||||
except:
|
except:
|
||||||
print 'Failed to generate APNX'
|
print 'Failed to generate APNX'
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
|
|||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def normalize(x):
|
||||||
|
if isinstance(x, unicode):
|
||||||
|
import unicodedata
|
||||||
|
x = unicodedata.normalize('NFKC', x)
|
||||||
|
return x
|
||||||
|
|
||||||
def calibre_cover(title, author_string, series_string=None,
|
def calibre_cover(title, author_string, series_string=None,
|
||||||
output_format='jpg', title_size=46, author_size=36):
|
output_format='jpg', title_size=46, author_size=36):
|
||||||
|
title = normalize(title)
|
||||||
|
author_string = normalize(author_string)
|
||||||
|
series_string = normalize(series_string)
|
||||||
from calibre.utils.magick.draw import create_cover_page, TextLine
|
from calibre.utils.magick.draw import create_cover_page, TextLine
|
||||||
lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
|
lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
|
||||||
if series_string:
|
if series_string:
|
||||||
|
@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
|
|||||||
'with "Mikhail Gorbachiov". Also, note that in '
|
'with "Mikhail Gorbachiov". Also, note that in '
|
||||||
'cases where there are multiple representations of a character '
|
'cases where there are multiple representations of a character '
|
||||||
'(characters shared by Chinese and Japanese for instance) the '
|
'(characters shared by Chinese and Japanese for instance) the '
|
||||||
'representation used by the largest number of people will be '
|
'representation based on the current calibre interface language will be '
|
||||||
'used (Chinese in the previous example).')%\
|
'used.')%\
|
||||||
u'\u041c\u0438\u0445\u0430\u0438\u043b '
|
u'\u041c\u0438\u0445\u0430\u0438\u043b '
|
||||||
u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
|
u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
|
||||||
)
|
)
|
||||||
|
@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
|
|||||||
html = XMLDECL_RE.sub('', html)
|
html = XMLDECL_RE.sub('', html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'asciiize', False):
|
if getattr(self.extra_opts, 'asciiize', False):
|
||||||
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
from calibre.utils.localization import get_udc
|
||||||
unidecoder = Unidecoder()
|
unihandecoder = get_udc()
|
||||||
html = unidecoder.decode(html)
|
html = unihandecoder.decode(html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'enable_heuristics', False):
|
if getattr(self.extra_opts, 'enable_heuristics', False):
|
||||||
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
||||||
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
|
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
|
||||||
if unsupported_unicode_chars:
|
if unsupported_unicode_chars:
|
||||||
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
from calibre.utils.localization import get_udc
|
||||||
unidecoder = Unidecoder()
|
unihandecoder = get_udc()
|
||||||
for char in unsupported_unicode_chars:
|
for char in unsupported_unicode_chars:
|
||||||
asciichar = unidecoder.decode(char)
|
asciichar = unihandecoder.decode(char)
|
||||||
html = html.replace(char, asciichar)
|
html = html.replace(char, asciichar)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
@ -271,6 +271,8 @@ def check_isbn13(isbn):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def check_isbn(isbn):
|
def check_isbn(isbn):
|
||||||
|
if not isbn:
|
||||||
|
return None
|
||||||
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
|
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
|
||||||
if len(isbn) == 10:
|
if len(isbn) == 10:
|
||||||
return check_isbn10(isbn)
|
return check_isbn10(isbn)
|
||||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Fetch metadata using Amazon AWS
|
Fetch metadata using Amazon AWS
|
||||||
'''
|
'''
|
||||||
import sys, re
|
import sys, re
|
||||||
|
from threading import RLock
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.library.comments import sanitize_comments_html
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
|
asin_cache = {}
|
||||||
|
cover_url_cache = {}
|
||||||
|
cache_lock = RLock()
|
||||||
|
|
||||||
def find_asin(br, isbn):
|
def find_asin(br, isbn):
|
||||||
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
||||||
raw = br.open_novisit(q).read()
|
raw = br.open_novisit(q).read()
|
||||||
@ -29,6 +34,12 @@ def find_asin(br, isbn):
|
|||||||
return revs[0]
|
return revs[0]
|
||||||
|
|
||||||
def to_asin(br, isbn):
|
def to_asin(br, isbn):
|
||||||
|
with cache_lock:
|
||||||
|
ans = asin_cache.get(isbn, None)
|
||||||
|
if ans:
|
||||||
|
return ans
|
||||||
|
if ans is False:
|
||||||
|
return None
|
||||||
if len(isbn) == 13:
|
if len(isbn) == 13:
|
||||||
try:
|
try:
|
||||||
asin = find_asin(br, isbn)
|
asin = find_asin(br, isbn)
|
||||||
@ -38,8 +49,11 @@ def to_asin(br, isbn):
|
|||||||
asin = None
|
asin = None
|
||||||
else:
|
else:
|
||||||
asin = isbn
|
asin = isbn
|
||||||
|
with cache_lock:
|
||||||
|
asin_cache[isbn] = ans if ans else False
|
||||||
return asin
|
return asin
|
||||||
|
|
||||||
|
|
||||||
def get_social_metadata(title, authors, publisher, isbn):
|
def get_social_metadata(title, authors, publisher, isbn):
|
||||||
mi = Metadata(title, authors)
|
mi = Metadata(title, authors)
|
||||||
if not isbn:
|
if not isbn:
|
||||||
@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
|
|||||||
return mi
|
return mi
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
def get_cover_url(isbn, br):
|
||||||
|
isbn = check_isbn(isbn)
|
||||||
|
if not isbn:
|
||||||
|
return None
|
||||||
|
with cache_lock:
|
||||||
|
ans = cover_url_cache.get(isbn, None)
|
||||||
|
if ans:
|
||||||
|
return ans
|
||||||
|
if ans is False:
|
||||||
|
return None
|
||||||
|
asin = to_asin(br, isbn)
|
||||||
|
if asin:
|
||||||
|
ans = _get_cover_url(br, asin)
|
||||||
|
if ans:
|
||||||
|
with cache_lock:
|
||||||
|
cover_url_cache[isbn] = ans
|
||||||
|
return ans
|
||||||
|
from calibre.ebooks.metadata.xisbn import xisbn
|
||||||
|
for i in xisbn.get_associated_isbns(isbn):
|
||||||
|
asin = to_asin(br, i)
|
||||||
|
if asin:
|
||||||
|
ans = _get_cover_url(br, asin)
|
||||||
|
if ans:
|
||||||
|
with cache_lock:
|
||||||
|
cover_url_cache[isbn] = ans
|
||||||
|
cover_url_cache[i] = ans
|
||||||
|
return ans
|
||||||
|
with cache_lock:
|
||||||
|
cover_url_cache[isbn] = False
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_cover_url(br, asin):
|
||||||
|
q = 'http://amzn.com/'+asin
|
||||||
|
try:
|
||||||
|
raw = br.open_novisit(q).read()
|
||||||
|
except Exception, e:
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return None
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return None
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
root = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
imgs = root.xpath('//img[@id="prodImage" and @src]')
|
||||||
|
if imgs:
|
||||||
|
src = imgs[0].get('src')
|
||||||
|
parts = src.split('/')
|
||||||
|
if len(parts) > 3:
|
||||||
|
bn = parts[-1]
|
||||||
|
sparts = bn.split('_')
|
||||||
|
if len(sparts) > 2:
|
||||||
|
bn = sparts[0] + sparts[-1]
|
||||||
|
return ('/'.join(parts[:-1]))+'/'+bn
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_metadata(br, asin, mi):
|
def get_metadata(br, asin, mi):
|
||||||
q = 'http://amzn.com/'+asin
|
q = 'http://amzn.com/'+asin
|
||||||
try:
|
try:
|
||||||
@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):
|
|||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
# Test xisbn
|
import tempfile, os
|
||||||
print get_social_metadata('Learning Python', None, None, '8324616489')
|
tdir = tempfile.gettempdir()
|
||||||
print
|
br = browser()
|
||||||
|
for title, isbn in [
|
||||||
|
('Learning Python', '8324616489'), # Test xisbn
|
||||||
|
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
|
||||||
|
# Random tests
|
||||||
|
('Star Trek: Destiny: Mere Mortals', '9781416551720'),
|
||||||
|
('The Great Gatsby', '0743273567'),
|
||||||
|
]:
|
||||||
|
cpath = os.path.join(tdir, title+'.jpg')
|
||||||
|
curl = get_cover_url(isbn, br)
|
||||||
|
if curl is None:
|
||||||
|
print 'No cover found for', title
|
||||||
|
else:
|
||||||
|
open(cpath, 'wb').write(br.open_novisit(curl).read())
|
||||||
|
print 'Cover for', title, 'saved to', cpath
|
||||||
|
|
||||||
# Test sophisticated comment formatting
|
print get_social_metadata(title, None, None, isbn)
|
||||||
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
|
|
||||||
print
|
|
||||||
|
|
||||||
# Random tests
|
|
||||||
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
|
|
||||||
print
|
|
||||||
print get_social_metadata('The Great Gatsby', None, None, '0743273567')
|
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import traceback, socket, re, sys
|
import traceback, socket, sys
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
@ -15,7 +15,6 @@ import mechanize
|
|||||||
|
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
from calibre import browser, prints
|
from calibre import browser, prints
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.constants import preferred_encoding, DEBUG
|
from calibre.constants import preferred_encoding, DEBUG
|
||||||
|
|
||||||
class CoverDownload(Plugin):
|
class CoverDownload(Plugin):
|
||||||
@ -112,73 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class LibraryThingCovers(CoverDownload): # {{{
|
class AmazonCovers(CoverDownload): # {{{
|
||||||
|
|
||||||
name = 'librarything.com covers'
|
name = 'amazon.com covers'
|
||||||
description = _('Download covers from librarything.com')
|
description = _('Download covers from amazon.com')
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
LIBRARYTHING = 'http://www.librarything.com/isbn/'
|
|
||||||
|
|
||||||
def get_cover_url(self, isbn, br, timeout=5.):
|
|
||||||
|
|
||||||
try:
|
|
||||||
src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
|
|
||||||
timeout=timeout).read().decode('utf-8', 'replace')
|
|
||||||
except Exception, err:
|
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
|
||||||
err = Exception(_('LibraryThing.com timed out. Try again later.'))
|
|
||||||
raise err
|
|
||||||
else:
|
|
||||||
if '/wiki/index.php/HelpThing:Verify' in src:
|
|
||||||
raise Exception('LibraryThing is blocking calibre.')
|
|
||||||
s = BeautifulSoup(src)
|
|
||||||
url = s.find('td', attrs={'class':'left'})
|
|
||||||
if url is None:
|
|
||||||
if s.find('div', attrs={'class':'highloadwarning'}) is not None:
|
|
||||||
raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
|
|
||||||
raise Exception(_('ISBN: %s not found')%isbn)
|
|
||||||
url = url.find('img')
|
|
||||||
if url is None:
|
|
||||||
raise Exception(_('LibraryThing.com server error. Try again later.'))
|
|
||||||
url = re.sub(r'_S[XY]\d+', '', url['src'])
|
|
||||||
return url
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
def has_cover(self, mi, ans, timeout=5.):
|
||||||
return False
|
if not mi.isbn:
|
||||||
if not mi.isbn or not self.site_customization:
|
|
||||||
return False
|
return False
|
||||||
from calibre.ebooks.metadata.library_thing import get_browser, login
|
from calibre.ebooks.metadata.amazon import get_cover_url
|
||||||
br = get_browser()
|
br = browser()
|
||||||
un, _, pw = self.site_customization.partition(':')
|
|
||||||
login(br, un, pw)
|
|
||||||
try:
|
try:
|
||||||
self.get_cover_url(mi.isbn, br, timeout=timeout)
|
get_cover_url(mi.isbn, br)
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
self.debug('cover for', mi.isbn, 'found')
|
||||||
ans.set()
|
ans.set()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.debug(e)
|
self.debug(e)
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
||||||
if not mi.isbn or not self.site_customization:
|
if not mi.isbn:
|
||||||
return
|
return
|
||||||
from calibre.ebooks.metadata.library_thing import get_browser, login
|
from calibre.ebooks.metadata.amazon import get_cover_url
|
||||||
br = get_browser()
|
br = browser()
|
||||||
un, _, pw = self.site_customization.partition(':')
|
|
||||||
login(br, un, pw)
|
|
||||||
try:
|
try:
|
||||||
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
|
url = get_cover_url(mi.isbn, br)
|
||||||
cover_data = br.open_novisit(url).read()
|
cover_data = br.open_novisit(url).read()
|
||||||
result_queue.put((True, cover_data, 'jpg', self.name))
|
result_queue.put((True, cover_data, 'jpg', self.name))
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
result_queue.put((False, self.exception_to_string(e),
|
result_queue.put((False, self.exception_to_string(e),
|
||||||
traceback.format_exc(), self.name))
|
traceback.format_exc(), self.name))
|
||||||
|
|
||||||
def customization_help(self, gui=False):
|
|
||||||
ans = _('To use librarything.com you must sign up for a %sfree account%s '
|
|
||||||
'and enter your username and password separated by a : below.')
|
|
||||||
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def check_for_cover(mi, timeout=5.): # {{{
|
def check_for_cover(mi, timeout=5.): # {{{
|
||||||
|
@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
from calibre.ebooks import normalize
|
||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
@ -311,6 +312,7 @@ class MetadataUpdater(object):
|
|||||||
return StreamSlicer(self.stream, start, stop)
|
return StreamSlicer(self.stream, start, stop)
|
||||||
|
|
||||||
def update(self, mi):
|
def update(self, mi):
|
||||||
|
mi.title = normalize(mi.title)
|
||||||
def update_exth_record(rec):
|
def update_exth_record(rec):
|
||||||
recs.append(rec)
|
recs.append(rec)
|
||||||
if rec[0] in self.original_exth_records:
|
if rec[0] in self.original_exth_records:
|
||||||
@ -331,12 +333,12 @@ class MetadataUpdater(object):
|
|||||||
kindle_pdoc = None
|
kindle_pdoc = None
|
||||||
if mi.author_sort and pas:
|
if mi.author_sort and pas:
|
||||||
authors = mi.author_sort
|
authors = mi.author_sort
|
||||||
update_exth_record((100, authors.encode(self.codec, 'replace')))
|
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
|
||||||
elif mi.authors:
|
elif mi.authors:
|
||||||
authors = ';'.join(mi.authors)
|
authors = ';'.join(mi.authors)
|
||||||
update_exth_record((100, authors.encode(self.codec, 'replace')))
|
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
|
||||||
if mi.publisher:
|
if mi.publisher:
|
||||||
update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
|
update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
|
||||||
if mi.comments:
|
if mi.comments:
|
||||||
# Strip user annotations
|
# Strip user annotations
|
||||||
a_offset = mi.comments.find('<div class="user_annotations">')
|
a_offset = mi.comments.find('<div class="user_annotations">')
|
||||||
@ -345,12 +347,12 @@ class MetadataUpdater(object):
|
|||||||
mi.comments = mi.comments[:a_offset]
|
mi.comments = mi.comments[:a_offset]
|
||||||
if ad_offset >= 0:
|
if ad_offset >= 0:
|
||||||
mi.comments = mi.comments[:ad_offset]
|
mi.comments = mi.comments[:ad_offset]
|
||||||
update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
|
update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
|
||||||
if mi.isbn:
|
if mi.isbn:
|
||||||
update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
|
update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
subjects = '; '.join(mi.tags)
|
subjects = '; '.join(mi.tags)
|
||||||
update_exth_record((105, subjects.encode(self.codec, 'replace')))
|
update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
|
||||||
|
|
||||||
if kindle_pdoc and kindle_pdoc in mi.tags:
|
if kindle_pdoc and kindle_pdoc in mi.tags:
|
||||||
update_exth_record((501, str('PDOC')))
|
update_exth_record((501, str('PDOC')))
|
||||||
|
@ -367,6 +367,9 @@ class MobiMLizer(object):
|
|||||||
istate.attrib['src'] = elem.attrib['src']
|
istate.attrib['src'] = elem.attrib['src']
|
||||||
istate.attrib['align'] = 'baseline'
|
istate.attrib['align'] = 'baseline'
|
||||||
cssdict = style.cssdict()
|
cssdict = style.cssdict()
|
||||||
|
valign = cssdict.get('vertical-align', None)
|
||||||
|
if valign in ('top', 'bottom', 'middle'):
|
||||||
|
istate.attrib['align'] = valign
|
||||||
for prop in ('width', 'height'):
|
for prop in ('width', 'height'):
|
||||||
if cssdict[prop] != 'auto':
|
if cssdict[prop] != 'auto':
|
||||||
value = style[prop]
|
value = style[prop]
|
||||||
@ -451,8 +454,11 @@ class MobiMLizer(object):
|
|||||||
text = COLLAPSE.sub(' ', elem.text)
|
text = COLLAPSE.sub(' ', elem.text)
|
||||||
valign = style['vertical-align']
|
valign = style['vertical-align']
|
||||||
not_baseline = valign in ('super', 'sub', 'text-top',
|
not_baseline = valign in ('super', 'sub', 'text-top',
|
||||||
'text-bottom')
|
'text-bottom') or (
|
||||||
vtag = 'sup' if valign in ('super', 'text-top') else 'sub'
|
isinstance(valign, (float, int)) and abs(valign) != 0)
|
||||||
|
issup = valign in ('super', 'text-top') or (
|
||||||
|
isinstance(valign, (float, int)) and valign > 0)
|
||||||
|
vtag = 'sup' if issup else 'sub'
|
||||||
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
|
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
|
||||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||||
vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
|
vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
|
||||||
|
@ -14,8 +14,9 @@ import re
|
|||||||
from struct import pack
|
from struct import pack
|
||||||
import time
|
import time
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
|
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
from calibre.ebooks import normalize
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
@ -1365,7 +1366,7 @@ class MobiWriter(object):
|
|||||||
self._text_length,
|
self._text_length,
|
||||||
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
|
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
|
||||||
uid = random.randint(0, 0xffffffff)
|
uid = random.randint(0, 0xffffffff)
|
||||||
title = unicode(metadata.title[0]).encode('utf-8')
|
title = normalize(unicode(metadata.title[0])).encode('utf-8')
|
||||||
# The MOBI Header
|
# The MOBI Header
|
||||||
|
|
||||||
# 0x0 - 0x3
|
# 0x0 - 0x3
|
||||||
@ -1523,12 +1524,12 @@ class MobiWriter(object):
|
|||||||
items = oeb.metadata[term]
|
items = oeb.metadata[term]
|
||||||
if term == 'creator':
|
if term == 'creator':
|
||||||
if self._prefer_author_sort:
|
if self._prefer_author_sort:
|
||||||
creators = [unicode(c.file_as or c) for c in items]
|
creators = [normalize(unicode(c.file_as or c)) for c in items]
|
||||||
else:
|
else:
|
||||||
creators = [unicode(c) for c in items]
|
creators = [normalize(unicode(c)) for c in items]
|
||||||
items = ['; '.join(creators)]
|
items = ['; '.join(creators)]
|
||||||
for item in items:
|
for item in items:
|
||||||
data = self.COLLAPSE_RE.sub(' ', unicode(item))
|
data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
|
||||||
if term == 'identifier':
|
if term == 'identifier':
|
||||||
if data.lower().startswith('urn:isbn:'):
|
if data.lower().startswith('urn:isbn:'):
|
||||||
data = data[9:]
|
data = data[9:]
|
||||||
@ -1542,7 +1543,7 @@ class MobiWriter(object):
|
|||||||
nrecs += 1
|
nrecs += 1
|
||||||
if term == 'rights' :
|
if term == 'rights' :
|
||||||
try:
|
try:
|
||||||
rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
|
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
|
||||||
except:
|
except:
|
||||||
rights = 'Unknown'
|
rights = 'Unknown'
|
||||||
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||||
|
@ -207,7 +207,14 @@ class CSSFlattener(object):
|
|||||||
font_size = self.sbase if self.sbase is not None else \
|
font_size = self.sbase if self.sbase is not None else \
|
||||||
self.context.source.fbase
|
self.context.source.fbase
|
||||||
if 'align' in node.attrib:
|
if 'align' in node.attrib:
|
||||||
cssdict['text-align'] = node.attrib['align']
|
if tag != 'img':
|
||||||
|
cssdict['text-align'] = node.attrib['align']
|
||||||
|
else:
|
||||||
|
val = node.attrib['align']
|
||||||
|
if val in ('middle', 'bottom', 'top'):
|
||||||
|
cssdict['vertical-align'] = val
|
||||||
|
elif val in ('left', 'right'):
|
||||||
|
cssdict['text-align'] = val
|
||||||
del node.attrib['align']
|
del node.attrib['align']
|
||||||
if node.tag == XHTML('font'):
|
if node.tag == XHTML('font'):
|
||||||
node.tag = XHTML('span')
|
node.tag = XHTML('span')
|
||||||
|
@ -4,10 +4,9 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import glob
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre import _ent_pat, xml_entity_to_unicode
|
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||||
from calibre.ebooks.chardet import detect
|
from calibre.ebooks.chardet import detect
|
||||||
@ -16,7 +15,6 @@ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
|||||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
||||||
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
|
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
|
||||||
separate_hard_scene_breaks
|
separate_hard_scene_breaks
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
@ -28,20 +26,23 @@ class TXTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||||
choices=['auto', 'block', 'single', 'print', 'unformatted'],
|
choices=['auto', 'block', 'single', 'print', 'unformatted', 'off'],
|
||||||
help=_('Paragraph structure.\n'
|
help=_('Paragraph structure.\n'
|
||||||
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
|
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\', \'off\']\n'
|
||||||
'* auto: Try to auto detect paragraph type.\n'
|
'* auto: Try to auto detect paragraph type.\n'
|
||||||
'* block: Treat a blank line as a paragraph break.\n'
|
'* block: Treat a blank line as a paragraph break.\n'
|
||||||
'* single: Assume every line is a paragraph.\n'
|
'* single: Assume every line is a paragraph.\n'
|
||||||
'* print: Assume every line starting with 2+ spaces or a tab '
|
'* print: Assume every line starting with 2+ spaces or a tab '
|
||||||
'starts a paragraph.'
|
'starts a paragraph.\n'
|
||||||
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
|
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents. '
|
||||||
|
'Tries to determine structure and reformat the differentiate elements.\n'
|
||||||
|
'* off: Don\'t modify the paragraph structure. This is useful when combined with '
|
||||||
|
'Markdown or Textile formatting to ensure no formatting is lost.')),
|
||||||
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
||||||
choices=['auto', 'none', 'heuristic', 'textile', 'markdown'],
|
choices=['auto', 'plain', 'heuristic', 'textile', 'markdown'],
|
||||||
help=_('Formatting used within the document.'
|
help=_('Formatting used within the document.'
|
||||||
'* auto: Automatically decide which formatting processor to use.\n'
|
'* auto: Automatically decide which formatting processor to use.\n'
|
||||||
'* none: Do not process the document formatting. Everything is a '
|
'* plain: Do not process the document formatting. Everything is a '
|
||||||
'paragraph and no styling is applied.\n'
|
'paragraph and no styling is applied.\n'
|
||||||
'* heuristic: Process using heuristics to determine formatting such '
|
'* heuristic: Process using heuristics to determine formatting such '
|
||||||
'as chapter headings and italic text.\n'
|
'as chapter headings and italic text.\n'
|
||||||
@ -64,18 +65,17 @@ class TXTInput(InputFormatPlugin):
|
|||||||
txt = ''
|
txt = ''
|
||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
||||||
length = 0
|
length = 0
|
||||||
|
# [(u'path', mime),]
|
||||||
|
|
||||||
# Extract content from zip archive.
|
# Extract content from zip archive.
|
||||||
if file_ext == 'txtz':
|
if file_ext == 'txtz':
|
||||||
log.debug('De-compressing content to temporary directory...')
|
zf = ZipFile(stream)
|
||||||
with TemporaryDirectory('_untxtz') as tdir:
|
zf.extractall('.')
|
||||||
zf = ZipFile(stream)
|
|
||||||
zf.extractall(tdir)
|
|
||||||
|
|
||||||
txts = glob.glob(os.path.join(tdir, '*.txt'))
|
for x in walk('.'):
|
||||||
for t in txts:
|
if os.path.splitext(x)[1].lower() == '.txt':
|
||||||
with open(t, 'rb') as tf:
|
with open(x, 'rb') as tf:
|
||||||
txt += tf.read()
|
txt += tf.read() + '\n\n'
|
||||||
else:
|
else:
|
||||||
txt = stream.read()
|
txt = stream.read()
|
||||||
|
|
||||||
@ -134,7 +134,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
|
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
|
||||||
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
||||||
txt = separate_paragraphs_single_line(txt)
|
txt = separate_paragraphs_single_line(txt)
|
||||||
else:
|
elif options.paragraph_type == 'block':
|
||||||
txt = separate_hard_scene_breaks(txt)
|
txt = separate_hard_scene_breaks(txt)
|
||||||
txt = block_to_single_line(txt)
|
txt = block_to_single_line(txt)
|
||||||
|
|
||||||
@ -178,7 +178,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
setattr(options, opt.option.name, opt.recommended_value)
|
setattr(options, opt.option.name, opt.recommended_value)
|
||||||
options.input_encoding = 'utf-8'
|
options.input_encoding = 'utf-8'
|
||||||
base = os.getcwdu()
|
base = os.getcwdu()
|
||||||
if hasattr(stream, 'name'):
|
if file_ext != 'txtz' and hasattr(stream, 'name'):
|
||||||
base = os.path.dirname(stream.name)
|
base = os.path.dirname(stream.name)
|
||||||
fname = os.path.join(base, 'index.html')
|
fname = os.path.join(base, 'index.html')
|
||||||
c = 0
|
c = 0
|
||||||
@ -190,7 +190,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
htmlfile.write(html.encode('utf-8'))
|
htmlfile.write(html.encode('utf-8'))
|
||||||
odi = options.debug_pipeline
|
odi = options.debug_pipeline
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
# Generate oeb from htl conversion.
|
# Generate oeb from html conversion.
|
||||||
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
|
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
|
||||||
{})
|
{})
|
||||||
options.debug_pipeline = odi
|
options.debug_pipeline = odi
|
||||||
|
@ -126,7 +126,7 @@ def separate_hard_scene_breaks(txt):
|
|||||||
return '\n%s\n' % line
|
return '\n%s\n' % line
|
||||||
else:
|
else:
|
||||||
return line
|
return line
|
||||||
txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt)
|
txt = re.sub(u'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
def block_to_single_line(txt):
|
def block_to_single_line(txt):
|
||||||
|
File diff suppressed because it is too large
Load Diff
57
src/calibre/ebooks/unihandecode/__init__.py
Normal file
57
src/calibre/ebooks/unihandecode/__init__.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
__all__ = ["Unihandecoder"]
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text.
|
||||||
|
Translate unicode characters to ASCII.
|
||||||
|
|
||||||
|
Inspired from John Schember's unidecode library which was created as part
|
||||||
|
of calibre.
|
||||||
|
|
||||||
|
Copyright(c) 2009, John Schember
|
||||||
|
|
||||||
|
Tranliterate the string from unicode characters to ASCII in Chinese and others.
|
||||||
|
|
||||||
|
'''
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
class Unihandecoder(object):
|
||||||
|
preferred_encoding = None
|
||||||
|
decoder = None
|
||||||
|
|
||||||
|
def __init__(self, lang="zh", encoding='utf-8'):
|
||||||
|
self.preferred_encoding = encoding
|
||||||
|
lang = lang.lower()
|
||||||
|
if lang[:2] == u'ja':
|
||||||
|
from calibre.ebooks.unihandecode.jadecoder import Jadecoder
|
||||||
|
self.decoder = Jadecoder()
|
||||||
|
elif lang[:2] == u'kr' or lang == u'korean':
|
||||||
|
from calibre.ebooks.unihandecode.krdecoder import Krdecoder
|
||||||
|
self.decoder = Krdecoder()
|
||||||
|
elif lang[:2] == u'vn' or lang == u'vietnum':
|
||||||
|
from calibre.ebooks.unihandecode.vndecoder import Vndecoder
|
||||||
|
self.decoder = Vndecoder()
|
||||||
|
else: #zh and others
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
self.decoder = Unidecoder()
|
||||||
|
|
||||||
|
def decode(self, text):
|
||||||
|
try:
|
||||||
|
unicode # python2
|
||||||
|
if not isinstance(text, unicode):
|
||||||
|
try:
|
||||||
|
text = unicode(text)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
text = text.decode(self.preferred_encoding)
|
||||||
|
except:
|
||||||
|
text = text.decode('utf-8', 'replace')
|
||||||
|
except: # python3, str is unicode
|
||||||
|
pass
|
||||||
|
#at first unicode normalize it. (see Unicode standards)
|
||||||
|
ntext = unicodedata.normalize('NFKC', text)
|
||||||
|
return self.decoder.decode(ntext)
|
5251
src/calibre/ebooks/unihandecode/jacodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/jacodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
41
src/calibre/ebooks/unihandecode/jadecoder.py
Normal file
41
src/calibre/ebooks/unihandecode/jadecoder.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# coding:utf8
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text for Japanese.
|
||||||
|
Translate unicode string to ASCII roman string.
|
||||||
|
|
||||||
|
API is based on the python unidecode,
|
||||||
|
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
|
||||||
|
and perl module Text::Unidecode
|
||||||
|
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
|
||||||
|
|
||||||
|
This functionality is owned by Kakasi Japanese processing engine.
|
||||||
|
|
||||||
|
Copyright (c) 2010 Hiroshi Miura
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
|
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
|
||||||
|
|
||||||
|
class Jadecoder(Unidecoder):
|
||||||
|
kakasi = None
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(JACODES)
|
||||||
|
self.kakasi = kakasi()
|
||||||
|
|
||||||
|
def decode(self, text):
|
||||||
|
try:
|
||||||
|
result=self.kakasi.do(text)
|
||||||
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
|
||||||
|
except:
|
||||||
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
|
||||||
|
|
5251
src/calibre/ebooks/unihandecode/krcodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/krcodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
24
src/calibre/ebooks/unihandecode/krdecoder.py
Normal file
24
src/calibre/ebooks/unihandecode/krdecoder.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text in Korean.
|
||||||
|
Based on unidecoder.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
|
|
||||||
|
class Krdecoder(Unidecoder):
|
||||||
|
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(HANCODES)
|
||||||
|
|
5
src/calibre/ebooks/unihandecode/pykakasi/__init__.py
Normal file
5
src/calibre/ebooks/unihandecode/pykakasi/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
|
||||||
|
kakasi
|
||||||
|
|
||||||
|
__all__ = ["pykakasi"]
|
||||||
|
|
185
src/calibre/ebooks/unihandecode/pykakasi/h2a.py
Normal file
185
src/calibre/ebooks/unihandecode/pykakasi/h2a.py
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# h2a.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
class H2a (object):
|
||||||
|
|
||||||
|
H2a_table = {
|
||||||
|
u"\u3041":"a", u"\u3042":"a",
|
||||||
|
u"\u3043":"i", u"\u3044":"i",
|
||||||
|
u"\u3045":"u", u"\u3046":"u",
|
||||||
|
u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
|
||||||
|
u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
|
||||||
|
u"\u3046\u309b\u3049":"vo",
|
||||||
|
u"\u3047":"e", u"\u3048":"e",
|
||||||
|
u"\u3049":"o", u"\u304a":"o",
|
||||||
|
|
||||||
|
u"\u304b":"ka", u"\u304c":"ga",
|
||||||
|
u"\u304d":"ki", u"\u304d\u3041":"kya",
|
||||||
|
u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
|
||||||
|
u"\u304e":"gi", u"\u3050\u3083":"gya",
|
||||||
|
u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
|
||||||
|
u"\u304f":"ku", u"\u3050":"gu",
|
||||||
|
u"\u3051":"ke", u"\u3052":"ge",
|
||||||
|
u"\u3053":"ko", u"\u3054":"go",
|
||||||
|
|
||||||
|
u"\u3055":"sa", u"\u3056":"za",
|
||||||
|
u"\u3057":"shi", u"\u3057\u3083":"sha",
|
||||||
|
u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
|
||||||
|
u"\u3058":"ji", u"\u3058\u3083":"ja",
|
||||||
|
u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
|
||||||
|
u"\u3059":"su", u"\u305a":"zu",
|
||||||
|
u"\u305b":"se", u"\u305c":"ze",
|
||||||
|
u"\u305d":"so", u"\u305e":"zo",
|
||||||
|
|
||||||
|
u"\u305f":"ta", u"\u3060":"da",
|
||||||
|
u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
|
||||||
|
u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
|
||||||
|
u"\u3062":"ji", u"\u3062\u3083":"ja",
|
||||||
|
u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
|
||||||
|
|
||||||
|
u"\u3063":"tsu",
|
||||||
|
u"\u3063\u3046\u309b":"vvu",
|
||||||
|
u"\u3063\u3046\u309b\u3041":"vva",
|
||||||
|
u"\u3063\u3046\u309b\u3043":"vvi",
|
||||||
|
u"\u3063\u3046\u309b\u3047":"vve",
|
||||||
|
u"\u3063\u3046\u309b\u3049":"vvo",
|
||||||
|
u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
|
||||||
|
u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
|
||||||
|
u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
|
||||||
|
u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
|
||||||
|
u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
|
||||||
|
u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
|
||||||
|
u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
|
||||||
|
u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
|
||||||
|
u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
|
||||||
|
u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
|
||||||
|
u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
|
||||||
|
u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
|
||||||
|
u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
|
||||||
|
u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
|
||||||
|
u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
|
||||||
|
u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
|
||||||
|
u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
|
||||||
|
u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
|
||||||
|
u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
|
||||||
|
u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
|
||||||
|
u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
|
||||||
|
u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
|
||||||
|
u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
|
||||||
|
u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
|
||||||
|
u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
|
||||||
|
u"\u3063\u3071":"ppa",
|
||||||
|
u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
|
||||||
|
u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
|
||||||
|
u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
|
||||||
|
u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
|
||||||
|
u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
|
||||||
|
u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
|
||||||
|
u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
|
||||||
|
u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
|
||||||
|
u"\u3063\u3075\u3049":"ffo",
|
||||||
|
u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
|
||||||
|
u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
|
||||||
|
u"\u3063\u307a":"ppe",
|
||||||
|
u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
|
||||||
|
u"\u3063\u307d":"ppo",
|
||||||
|
u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
|
||||||
|
u"\u3063\u3088":"yyo",
|
||||||
|
u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
|
||||||
|
u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
|
||||||
|
u"\u3063\u308a\u3087":"rryo",
|
||||||
|
u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
|
||||||
|
u"\u3063\u308d":"rro",
|
||||||
|
|
||||||
|
u"\u3064":"tsu", u"\u3065":"zu",
|
||||||
|
u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
|
||||||
|
u"\u3068":"to", u"\u3069":"do",
|
||||||
|
|
||||||
|
u"\u306a":"na",
|
||||||
|
u"\u306b":"ni", u"\u306b\u3083":"nya",
|
||||||
|
u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
|
||||||
|
u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
|
||||||
|
|
||||||
|
u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
|
||||||
|
u"\u3072":"hi", u"\u3072\u3083":"hya",
|
||||||
|
u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
|
||||||
|
u"\u3073":"bi", u"\u3073\u3083":"bya",
|
||||||
|
u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
|
||||||
|
u"\u3074":"pi", u"\u3074\u3083":"pya",
|
||||||
|
u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
|
||||||
|
u"\u3075":"fu", u"\u3075\u3041":"fa",
|
||||||
|
u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
|
||||||
|
u"\u3075\u3049":"fo",
|
||||||
|
u"\u3076":"bu", u"\u3077":"pu",
|
||||||
|
u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
|
||||||
|
u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
|
||||||
|
|
||||||
|
u"\u307e":"ma",
|
||||||
|
u"\u307f":"mi", u"\u307f\u3083":"mya",
|
||||||
|
u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
|
||||||
|
u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
|
||||||
|
|
||||||
|
u"\u3083":"ya", u"\u3084":"ya",
|
||||||
|
u"\u3085":"yu", u"\u3086":"yu",
|
||||||
|
u"\u3087":"yo", u"\u3088":"yo",
|
||||||
|
|
||||||
|
u"\u3089":"ra",
|
||||||
|
u"\u308a":"ri", u"\u308a\u3083":"rya",
|
||||||
|
u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
|
||||||
|
u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
|
||||||
|
|
||||||
|
u"\u308e":"wa", u"\u308f":"wa",
|
||||||
|
u"\u3090":"i", u"\u3091":"e",
|
||||||
|
u"\u3092":"wo", u"\u3093":"n",
|
||||||
|
|
||||||
|
u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
|
||||||
|
u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
|
||||||
|
u"\u3093\u304a":"n'o",
|
||||||
|
}
|
||||||
|
|
||||||
|
# this class is Borg
|
||||||
|
_shared_state = {}
|
||||||
|
|
||||||
|
def __new__(cls, *p, **k):
|
||||||
|
self = object.__new__(cls, *p, **k)
|
||||||
|
self.__dict__ = cls._shared_state
|
||||||
|
return self
|
||||||
|
|
||||||
|
def isHiragana(self, char):
|
||||||
|
return ( 0x3040 < ord(char) and ord(char) < 0x3094)
|
||||||
|
|
||||||
|
def convert(self, text):
|
||||||
|
Hstr = ""
|
||||||
|
max_len = -1
|
||||||
|
r = min(4, len(text)+1)
|
||||||
|
for x in xrange(r):
|
||||||
|
if text[:x] in self.H2a_table:
|
||||||
|
if max_len < x:
|
||||||
|
max_len = x
|
||||||
|
Hstr = self.H2a_table[text[:x]]
|
||||||
|
return (Hstr, max_len)
|
||||||
|
|
564
src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
Normal file
564
src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
Normal file
@ -0,0 +1,564 @@
|
|||||||
|
芦蘆
|
||||||
|
壱一
|
||||||
|
苅刈
|
||||||
|
舘館
|
||||||
|
曽曾
|
||||||
|
菟兎
|
||||||
|
島嶋
|
||||||
|
盃杯
|
||||||
|
冨富
|
||||||
|
峯峰
|
||||||
|
亘亙
|
||||||
|
弌一
|
||||||
|
乘乗
|
||||||
|
亂乱
|
||||||
|
豫予
|
||||||
|
亊事
|
||||||
|
弍二
|
||||||
|
亞亜
|
||||||
|
亰京
|
||||||
|
从従
|
||||||
|
仭仞
|
||||||
|
佛仏
|
||||||
|
來来
|
||||||
|
儘侭
|
||||||
|
伜倅
|
||||||
|
假仮
|
||||||
|
會会
|
||||||
|
做作
|
||||||
|
傳伝
|
||||||
|
僞偽
|
||||||
|
價価
|
||||||
|
儉倹
|
||||||
|
兒児
|
||||||
|
兔兎
|
||||||
|
竸競
|
||||||
|
兩両
|
||||||
|
囘回
|
||||||
|
册冊
|
||||||
|
冢塚
|
||||||
|
冩写
|
||||||
|
决決
|
||||||
|
冱冴
|
||||||
|
冰氷
|
||||||
|
况況
|
||||||
|
凉涼
|
||||||
|
處処
|
||||||
|
凾函
|
||||||
|
刄刃
|
||||||
|
刔抉
|
||||||
|
刧劫
|
||||||
|
剩剰
|
||||||
|
劍剣
|
||||||
|
劔剣
|
||||||
|
劒剣
|
||||||
|
剱剣
|
||||||
|
劑剤
|
||||||
|
辨弁
|
||||||
|
勞労
|
||||||
|
勳勲
|
||||||
|
勵励
|
||||||
|
勸勧
|
||||||
|
區区
|
||||||
|
卆卒
|
||||||
|
丗世
|
||||||
|
凖準
|
||||||
|
夘卯
|
||||||
|
卻却
|
||||||
|
卷巻
|
||||||
|
厠廁
|
||||||
|
厦廈
|
||||||
|
厮廝
|
||||||
|
厰廠
|
||||||
|
參参
|
||||||
|
雙双
|
||||||
|
咒呪
|
||||||
|
單単
|
||||||
|
噐器
|
||||||
|
營営
|
||||||
|
嚏嚔
|
||||||
|
嚴厳
|
||||||
|
囑嘱
|
||||||
|
囓齧
|
||||||
|
圀国
|
||||||
|
圈圏
|
||||||
|
國国
|
||||||
|
圍囲
|
||||||
|
圓円
|
||||||
|
團団
|
||||||
|
圖図
|
||||||
|
埀垂
|
||||||
|
埓埒
|
||||||
|
塲場
|
||||||
|
壞壊
|
||||||
|
墮堕
|
||||||
|
壓圧
|
||||||
|
壘塁
|
||||||
|
壥廛
|
||||||
|
壤壌
|
||||||
|
壯壮
|
||||||
|
壺壷
|
||||||
|
壹一
|
||||||
|
壻婿
|
||||||
|
壽寿
|
||||||
|
夂夊
|
||||||
|
夛多
|
||||||
|
梦夢
|
||||||
|
竒奇
|
||||||
|
奧奥
|
||||||
|
奬奨
|
||||||
|
侫佞
|
||||||
|
姙妊
|
||||||
|
嫻嫺
|
||||||
|
孃嬢
|
||||||
|
學学
|
||||||
|
斈学
|
||||||
|
寃冤
|
||||||
|
寇冦
|
||||||
|
寢寝
|
||||||
|
寫写
|
||||||
|
寶宝
|
||||||
|
寳宝
|
||||||
|
尅剋
|
||||||
|
將将
|
||||||
|
專専
|
||||||
|
對対
|
||||||
|
尓爾
|
||||||
|
尢尤
|
||||||
|
屆届
|
||||||
|
屬属
|
||||||
|
峽峡
|
||||||
|
嶌嶋
|
||||||
|
嵜崎
|
||||||
|
崙崘
|
||||||
|
嵳嵯
|
||||||
|
嶽岳
|
||||||
|
巛川
|
||||||
|
巵卮
|
||||||
|
帋紙
|
||||||
|
帶帯
|
||||||
|
幤幣
|
||||||
|
廐厩
|
||||||
|
廏厩
|
||||||
|
廣広
|
||||||
|
廚厨
|
||||||
|
廢廃
|
||||||
|
廳庁
|
||||||
|
廰庁
|
||||||
|
廸迪
|
||||||
|
弃棄
|
||||||
|
弉奘
|
||||||
|
彜彝
|
||||||
|
彈弾
|
||||||
|
彌弥
|
||||||
|
弯彎
|
||||||
|
徃往
|
||||||
|
徑径
|
||||||
|
從従
|
||||||
|
徠来
|
||||||
|
悳徳
|
||||||
|
恠怪
|
||||||
|
恆恒
|
||||||
|
悧俐
|
||||||
|
惡悪
|
||||||
|
惠恵
|
||||||
|
忰悴
|
||||||
|
惱悩
|
||||||
|
愼慎
|
||||||
|
愽博
|
||||||
|
慘惨
|
||||||
|
慚慙
|
||||||
|
憇憩
|
||||||
|
應応
|
||||||
|
懷懐
|
||||||
|
懴懺
|
||||||
|
戀恋
|
||||||
|
戞戛
|
||||||
|
戰戦
|
||||||
|
戲戯
|
||||||
|
拔抜
|
||||||
|
拏拿
|
||||||
|
擔担
|
||||||
|
拜拝
|
||||||
|
拂払
|
||||||
|
挾挟
|
||||||
|
搜捜
|
||||||
|
插挿
|
||||||
|
搖揺
|
||||||
|
攝摂
|
||||||
|
攪撹
|
||||||
|
據拠
|
||||||
|
擇択
|
||||||
|
擧拳
|
||||||
|
舉拳
|
||||||
|
抬擡
|
||||||
|
擴拡
|
||||||
|
攜携
|
||||||
|
攵攴
|
||||||
|
攷考
|
||||||
|
收収
|
||||||
|
效効
|
||||||
|
敕勅
|
||||||
|
敍叙
|
||||||
|
敘叙
|
||||||
|
數数
|
||||||
|
變変
|
||||||
|
斷断
|
||||||
|
旙旛
|
||||||
|
昜陽
|
||||||
|
晄晃
|
||||||
|
晉晋
|
||||||
|
晝昼
|
||||||
|
晰晢
|
||||||
|
暎映
|
||||||
|
曉暁
|
||||||
|
暸瞭
|
||||||
|
昿曠
|
||||||
|
曵曳
|
||||||
|
朖朗
|
||||||
|
朞期
|
||||||
|
霸覇
|
||||||
|
杤栃
|
||||||
|
杰傑
|
||||||
|
枩松
|
||||||
|
檜桧
|
||||||
|
條条
|
||||||
|
檮梼
|
||||||
|
梹檳
|
||||||
|
棊棋
|
||||||
|
棧桟
|
||||||
|
棕椶
|
||||||
|
楙茂
|
||||||
|
榮栄
|
||||||
|
槨椁
|
||||||
|
樂楽
|
||||||
|
權権
|
||||||
|
樞枢
|
||||||
|
樣様
|
||||||
|
樓楼
|
||||||
|
橢楕
|
||||||
|
檢検
|
||||||
|
櫻桜
|
||||||
|
鬱欝
|
||||||
|
盜盗
|
||||||
|
飮飲
|
||||||
|
歐嘔
|
||||||
|
歡歓
|
||||||
|
歸帰
|
||||||
|
殘残
|
||||||
|
殱殲
|
||||||
|
殼殻
|
||||||
|
毆殴
|
||||||
|
毓育
|
||||||
|
氣気
|
||||||
|
沒没
|
||||||
|
泪涙
|
||||||
|
濤涛
|
||||||
|
渕淵
|
||||||
|
渊淵
|
||||||
|
淨浄
|
||||||
|
淺浅
|
||||||
|
滿満
|
||||||
|
溂剌
|
||||||
|
溪渓
|
||||||
|
灌潅
|
||||||
|
滯滞
|
||||||
|
澁渋
|
||||||
|
澀渋
|
||||||
|
潛潜
|
||||||
|
濳潜
|
||||||
|
澂澄
|
||||||
|
澑溜
|
||||||
|
澤沢
|
||||||
|
濟済
|
||||||
|
濕湿
|
||||||
|
濱浜
|
||||||
|
濾滬
|
||||||
|
灣湾
|
||||||
|
烱炯
|
||||||
|
烟煙
|
||||||
|
熈煕
|
||||||
|
熏燻
|
||||||
|
燒焼
|
||||||
|
爐炉
|
||||||
|
爭争
|
||||||
|
爲為
|
||||||
|
爼俎
|
||||||
|
犁犂
|
||||||
|
犹猶
|
||||||
|
犲豺
|
||||||
|
狹狭
|
||||||
|
獎奨
|
||||||
|
默黙
|
||||||
|
獨独
|
||||||
|
獸獣
|
||||||
|
獵猟
|
||||||
|
獻献
|
||||||
|
珎珍
|
||||||
|
璢瑠
|
||||||
|
瑯琅
|
||||||
|
珱瓔
|
||||||
|
瓣弁
|
||||||
|
甞嘗
|
||||||
|
甼町
|
||||||
|
畄留
|
||||||
|
畍界
|
||||||
|
畊耕
|
||||||
|
畆畝
|
||||||
|
畧略
|
||||||
|
畫画
|
||||||
|
當当
|
||||||
|
畴疇
|
||||||
|
疊畳
|
||||||
|
疉畳
|
||||||
|
疂畳
|
||||||
|
癡痴
|
||||||
|
發発
|
||||||
|
皃猊
|
||||||
|
皈帰
|
||||||
|
皹皸
|
||||||
|
盖蓋
|
||||||
|
盡尽
|
||||||
|
蘯盪
|
||||||
|
眞真
|
||||||
|
眦眥
|
||||||
|
礦鉱
|
||||||
|
礪砺
|
||||||
|
碎砕
|
||||||
|
碯瑙
|
||||||
|
祕秘
|
||||||
|
祿禄
|
||||||
|
齋斎
|
||||||
|
禪禅
|
||||||
|
禮礼
|
||||||
|
禀稟
|
||||||
|
稱称
|
||||||
|
稻稲
|
||||||
|
稾稿
|
||||||
|
穗穂
|
||||||
|
穩穏
|
||||||
|
龝穐
|
||||||
|
穰穣
|
||||||
|
窗窓
|
||||||
|
竈竃
|
||||||
|
窰窯
|
||||||
|
竊窃
|
||||||
|
竝並
|
||||||
|
筺筐
|
||||||
|
笋筍
|
||||||
|
箟箘
|
||||||
|
筝箏
|
||||||
|
簔蓑
|
||||||
|
籠篭
|
||||||
|
籘籐
|
||||||
|
籖籤
|
||||||
|
粹粋
|
||||||
|
糺糾
|
||||||
|
絲糸
|
||||||
|
經経
|
||||||
|
總総
|
||||||
|
緜綿
|
||||||
|
縣県
|
||||||
|
縱縦
|
||||||
|
繪絵
|
||||||
|
繩縄
|
||||||
|
繼継
|
||||||
|
緕纃
|
||||||
|
續続
|
||||||
|
纖繊
|
||||||
|
纎繊
|
||||||
|
纜繿
|
||||||
|
缺欠
|
||||||
|
罐缶
|
||||||
|
罸罰
|
||||||
|
羃冪
|
||||||
|
羣群
|
||||||
|
羮羹
|
||||||
|
譱善
|
||||||
|
翆翠
|
||||||
|
翦剪
|
||||||
|
耻恥
|
||||||
|
聟婿
|
||||||
|
聨聯
|
||||||
|
聲声
|
||||||
|
聰聡
|
||||||
|
聽聴
|
||||||
|
肅粛
|
||||||
|
冐冒
|
||||||
|
脉脈
|
||||||
|
腦脳
|
||||||
|
腟膣
|
||||||
|
膓腸
|
||||||
|
膸髄
|
||||||
|
膽胆
|
||||||
|
臈臘
|
||||||
|
臟臓
|
||||||
|
臺台
|
||||||
|
與与
|
||||||
|
舊旧
|
||||||
|
舍舎
|
||||||
|
舖舗
|
||||||
|
舩船
|
||||||
|
艢檣
|
||||||
|
舮艫
|
||||||
|
艷艶
|
||||||
|
莖茎
|
||||||
|
莊荘
|
||||||
|
莵兎
|
||||||
|
菷帚
|
||||||
|
萠萌
|
||||||
|
蕚萼
|
||||||
|
蒂蔕
|
||||||
|
萬万
|
||||||
|
葢蓋
|
||||||
|
蘂蕊
|
||||||
|
蕋蕊
|
||||||
|
藪薮
|
||||||
|
藏蔵
|
||||||
|
藝芸
|
||||||
|
藥薬
|
||||||
|
蘓蘇
|
||||||
|
乕虎
|
||||||
|
號号
|
||||||
|
蠣蛎
|
||||||
|
蝨虱
|
||||||
|
蠅蝿
|
||||||
|
螢蛍
|
||||||
|
蟆蟇
|
||||||
|
蟲虫
|
||||||
|
蠏蟹
|
||||||
|
蟷螳
|
||||||
|
蟒蠎
|
||||||
|
蠶蚕
|
||||||
|
蠧蠹
|
||||||
|
蠻蛮
|
||||||
|
衂衄
|
||||||
|
衞衛
|
||||||
|
袵衽
|
||||||
|
裝装
|
||||||
|
襃褒
|
||||||
|
褝襌
|
||||||
|
覩睹
|
||||||
|
覺覚
|
||||||
|
覽覧
|
||||||
|
觀観
|
||||||
|
觧解
|
||||||
|
觸触
|
||||||
|
誡戒
|
||||||
|
謌歌
|
||||||
|
諡謚
|
||||||
|
謠謡
|
||||||
|
證証
|
||||||
|
譛譖
|
||||||
|
譯訳
|
||||||
|
譽誉
|
||||||
|
讀読
|
||||||
|
讓譲
|
||||||
|
讚賛
|
||||||
|
豐豊
|
||||||
|
貉狢
|
||||||
|
貍狸
|
||||||
|
貎猊
|
||||||
|
豼貔
|
||||||
|
貘獏
|
||||||
|
戝財
|
||||||
|
貭質
|
||||||
|
貳弐
|
||||||
|
貮弐
|
||||||
|
賤賎
|
||||||
|
賣売
|
||||||
|
贊賛
|
||||||
|
賍贓
|
||||||
|
赱走
|
||||||
|
踈疎
|
||||||
|
踴踊
|
||||||
|
躰体
|
||||||
|
軆体
|
||||||
|
軈軅
|
||||||
|
軣轟
|
||||||
|
輕軽
|
||||||
|
輙輒
|
||||||
|
輌輛
|
||||||
|
轉転
|
||||||
|
辭辞
|
||||||
|
辯弁
|
||||||
|
迯逃
|
||||||
|
逹達
|
||||||
|
逎遒
|
||||||
|
遞逓
|
||||||
|
遲遅
|
||||||
|
邊辺
|
||||||
|
邉辺
|
||||||
|
邨村
|
||||||
|
鄰隣
|
||||||
|
醉酔
|
||||||
|
醫医
|
||||||
|
釀醸
|
||||||
|
釋釈
|
||||||
|
釡釜
|
||||||
|
釼剣
|
||||||
|
銕鉄
|
||||||
|
錢銭
|
||||||
|
鎭鎮
|
||||||
|
鐵鉄
|
||||||
|
鐡鉄
|
||||||
|
鑒鑑
|
||||||
|
鑄鋳
|
||||||
|
鑛鉱
|
||||||
|
鈩鑪
|
||||||
|
鑚鑽
|
||||||
|
閇閉
|
||||||
|
濶闊
|
||||||
|
關関
|
||||||
|
阯址
|
||||||
|
陷陥
|
||||||
|
險険
|
||||||
|
隱隠
|
||||||
|
隸隷
|
||||||
|
襍雑
|
||||||
|
雜雑
|
||||||
|
靈霊
|
||||||
|
靜静
|
||||||
|
靱靭
|
||||||
|
韭韮
|
||||||
|
韲齏
|
||||||
|
韵韻
|
||||||
|
顏顔
|
||||||
|
顯顕
|
||||||
|
飃飄
|
||||||
|
餘余
|
||||||
|
餝飾
|
||||||
|
餠餅
|
||||||
|
騷騒
|
||||||
|
驅駆
|
||||||
|
驛駅
|
||||||
|
驗験
|
||||||
|
髓髄
|
||||||
|
體体
|
||||||
|
髮髪
|
||||||
|
鬪闘
|
||||||
|
鰺鯵
|
||||||
|
鰛鰮
|
||||||
|
鳬鳧
|
||||||
|
鳫鴈
|
||||||
|
鵄鴟
|
||||||
|
鵞鵝
|
||||||
|
鷄鶏
|
||||||
|
鷏鷆
|
||||||
|
鹽塩
|
||||||
|
麥麦
|
||||||
|
麸麩
|
||||||
|
麪麺
|
||||||
|
點点
|
||||||
|
黨党
|
||||||
|
皷鼓
|
||||||
|
鼡鼠
|
||||||
|
齊斉
|
||||||
|
齒歯
|
||||||
|
齡齢
|
||||||
|
龜亀
|
||||||
|
槇槙
|
||||||
|
遙遥
|
||||||
|
瑤瑶
|
||||||
|
凜凛
|
||||||
|
熙煕
|
83
src/calibre/ebooks/unihandecode/pykakasi/j2h.py
Normal file
83
src/calibre/ebooks/unihandecode/pykakasi/j2h.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# j2h.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original Copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
|
||||||
|
import re
|
||||||
|
|
||||||
|
class J2H (object):
|
||||||
|
|
||||||
|
kanwa = None
|
||||||
|
|
||||||
|
cl_table = [
|
||||||
|
"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
|
||||||
|
"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
|
||||||
|
"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
|
||||||
|
"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
|
||||||
|
"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
|
||||||
|
"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
|
||||||
|
"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
|
||||||
|
"k", "", "", "", "", "", "", "", "", ""]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.kanwa = jisyo()
|
||||||
|
|
||||||
|
def isKanji(self, c):
|
||||||
|
return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
|
||||||
|
|
||||||
|
def isCletter(self, l, c):
|
||||||
|
if (ord(u"ぁ") <= ord(c) and ord(c) <= 0x309f) and ( l in self.cl_table[ord(c) - ord(u"ぁ")-1]):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def itaiji_conv(self, text):
|
||||||
|
r = []
|
||||||
|
for c in text:
|
||||||
|
if c in self.kanwa.itaijidict:
|
||||||
|
r.append(c)
|
||||||
|
for c in r:
|
||||||
|
text = re.sub(c, self.kanwa.itaijidict[c], text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def convert(self, text):
|
||||||
|
max_len = 0
|
||||||
|
Hstr = ""
|
||||||
|
table = self.kanwa.load_jisyo(text[0])
|
||||||
|
if table is None:
|
||||||
|
return ("", 0)
|
||||||
|
for (k,v) in table.iteritems():
|
||||||
|
length = len(k)
|
||||||
|
if len(text) >= length:
|
||||||
|
if text.startswith(k):
|
||||||
|
for (yomi, tail) in v:
|
||||||
|
if tail is '':
|
||||||
|
if max_len < length:
|
||||||
|
Hstr = yomi
|
||||||
|
max_len = length
|
||||||
|
elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
|
||||||
|
Hstr=''.join([yomi,text[length]])
|
||||||
|
max_len = length+1
|
||||||
|
return (Hstr, max_len)
|
53
src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
Normal file
53
src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# jisyo.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
from cPickle import load
|
||||||
|
import anydbm,marshal
|
||||||
|
from zlib import decompress
|
||||||
|
import os
|
||||||
|
|
||||||
|
import calibre.utils.resources as resources
|
||||||
|
|
||||||
|
class jisyo (object):
|
||||||
|
kanwadict = None
|
||||||
|
itaijidict = None
|
||||||
|
kanadict = None
|
||||||
|
jisyo_table = {}
|
||||||
|
|
||||||
|
# this class is Borg
|
||||||
|
_shared_state = {}
|
||||||
|
|
||||||
|
def __new__(cls, *p, **k):
|
||||||
|
self = object.__new__(cls, *p, **k)
|
||||||
|
self.__dict__ = cls._shared_state
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if self.kanwadict is None:
|
||||||
|
dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
|
||||||
|
self.kanwadict = anydbm.open(dictpath,'r')
|
||||||
|
if self.itaijidict is None:
|
||||||
|
itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
|
||||||
|
itaiji_pkl = open(itaijipath, 'rb')
|
||||||
|
self.itaijidict = load(itaiji_pkl)
|
||||||
|
if self.kanadict is None:
|
||||||
|
kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
|
||||||
|
kanadict_pkl = open(kanadictpath, 'rb')
|
||||||
|
self.kanadict = load(kanadict_pkl)
|
||||||
|
|
||||||
|
def load_jisyo(self, char):
|
||||||
|
try:#python2
|
||||||
|
key = "%04x"%ord(unicode(char))
|
||||||
|
except:#python3
|
||||||
|
key = "%04x"%ord(char)
|
||||||
|
|
||||||
|
try: #already exist?
|
||||||
|
table = self.jisyo_table[key]
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
table = self.jisyo_table[key] = marshal.loads(decompress(self.kanwadict[key]))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
return table
|
||||||
|
|
50
src/calibre/ebooks/unihandecode/pykakasi/k2a.py
Normal file
50
src/calibre/ebooks/unihandecode/pykakasi/k2a.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# k2a.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
|
||||||
|
|
||||||
|
class K2a (object):
|
||||||
|
|
||||||
|
kanwa = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.kanwa = jisyo()
|
||||||
|
|
||||||
|
def isKatakana(self, char):
|
||||||
|
return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
|
||||||
|
|
||||||
|
def convert(self, text):
|
||||||
|
Hstr = ""
|
||||||
|
max_len = -1
|
||||||
|
r = min(10, len(text)+1)
|
||||||
|
for x in xrange(r):
|
||||||
|
if text[:x] in self.kanwa.kanadict:
|
||||||
|
if max_len < x:
|
||||||
|
max_len = x
|
||||||
|
Hstr = self.kanwa.kanadict[text[:x]]
|
||||||
|
return (Hstr, max_len)
|
||||||
|
|
101
src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
Normal file
101
src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# kakasi.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original Copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
|
||||||
|
|
||||||
|
class kakasi(object):
|
||||||
|
|
||||||
|
j2h = None
|
||||||
|
h2a = None
|
||||||
|
k2a = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.j2h = J2H()
|
||||||
|
self.h2a = H2a()
|
||||||
|
self.k2a = K2a()
|
||||||
|
|
||||||
|
|
||||||
|
def do(self, text):
|
||||||
|
otext = ''
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
if i >= len(text):
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.j2h.isKanji(text[i]):
|
||||||
|
(t, l) = self.j2h.convert(text[i:])
|
||||||
|
if l <= 0:
|
||||||
|
otext = otext + text[i]
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
i = i + l
|
||||||
|
m = 0
|
||||||
|
tmptext = ""
|
||||||
|
while True:
|
||||||
|
if m >= len(t):
|
||||||
|
break
|
||||||
|
(s, n) = self.h2a.convert(t[m:])
|
||||||
|
if n <= 0:
|
||||||
|
break
|
||||||
|
m = m + n
|
||||||
|
tmptext = tmptext+s
|
||||||
|
if i >= len(text):
|
||||||
|
otext = otext + tmptext.capitalize()
|
||||||
|
else:
|
||||||
|
otext = otext + tmptext.capitalize() +' '
|
||||||
|
elif self.h2a.isHiragana(text[i]):
|
||||||
|
tmptext = ''
|
||||||
|
while True:
|
||||||
|
(t, l) = self.h2a.convert(text[i:])
|
||||||
|
tmptext = tmptext+t
|
||||||
|
i = i + l
|
||||||
|
if i >= len(text):
|
||||||
|
otext = otext + tmptext
|
||||||
|
break
|
||||||
|
elif not self.h2a.isHiragana(text[i]):
|
||||||
|
otext = otext + tmptext + ' '
|
||||||
|
break
|
||||||
|
elif self.k2a.isKatakana(text[i]):
|
||||||
|
tmptext = ''
|
||||||
|
while True:
|
||||||
|
(t, l) = self.k2a.convert(text[i:])
|
||||||
|
tmptext = tmptext+t
|
||||||
|
i = i + l
|
||||||
|
if i >= len(text):
|
||||||
|
otext = otext + tmptext
|
||||||
|
break
|
||||||
|
elif not self.k2a.isKatakana(text[i]):
|
||||||
|
otext = otext + tmptext + ' '
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
otext = otext + text[i]
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return otext
|
||||||
|
|
121826
src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
Normal file
121826
src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
Normal file
File diff suppressed because it is too large
Load Diff
317
src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
Normal file
317
src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
Normal file
@ -0,0 +1,317 @@
|
|||||||
|
;; Kana-Alphabet mapping dictionary
|
||||||
|
;;
|
||||||
|
;; To use this mapping table,
|
||||||
|
;; you should unicode normalize NKFC form.
|
||||||
|
;;
|
||||||
|
;; basic mapping
|
||||||
|
;;
|
||||||
|
a ァ
|
||||||
|
a ア
|
||||||
|
ba バ
|
||||||
|
bba ッバ
|
||||||
|
bbe ッベ
|
||||||
|
bbi ッビ
|
||||||
|
bbo ッボ
|
||||||
|
bbu ッブ
|
||||||
|
bbya ッビャ
|
||||||
|
bbyo ッビョ
|
||||||
|
bbyu ッビュ
|
||||||
|
be ベ
|
||||||
|
bi ビ
|
||||||
|
bo ボ
|
||||||
|
bu ブ
|
||||||
|
bya ビャ
|
||||||
|
byo ビョ
|
||||||
|
byu ビュ
|
||||||
|
cha チャ
|
||||||
|
che チェ
|
||||||
|
chi チ
|
||||||
|
cho チョ
|
||||||
|
chu チュ
|
||||||
|
da ダ
|
||||||
|
dda ッダ
|
||||||
|
dde ッデ
|
||||||
|
ddo ッド
|
||||||
|
de デ
|
||||||
|
di ディ
|
||||||
|
do ド
|
||||||
|
e ェ
|
||||||
|
e エ
|
||||||
|
e ヱ
|
||||||
|
fa ファ
|
||||||
|
fe フェ
|
||||||
|
ffa ッファ
|
||||||
|
ffe ッフェ
|
||||||
|
ffi ッフィ
|
||||||
|
ffo ッフォ
|
||||||
|
ffu ッフ
|
||||||
|
fi フィ
|
||||||
|
fo フォ
|
||||||
|
fu フ
|
||||||
|
ga ガ
|
||||||
|
ge ゲ
|
||||||
|
gga ッガ
|
||||||
|
gge ッゲ
|
||||||
|
ggi ッギ
|
||||||
|
ggo ッゴ
|
||||||
|
ggu ッグ
|
||||||
|
ggya ッギャ
|
||||||
|
ggyo ッギョ
|
||||||
|
ggyu ッギュ
|
||||||
|
gi ギ
|
||||||
|
go ゴ
|
||||||
|
gu グ
|
||||||
|
gya グャ
|
||||||
|
gyo ギョ
|
||||||
|
gyu ギゥ
|
||||||
|
ha ハ
|
||||||
|
he ヘ
|
||||||
|
hha ッハ
|
||||||
|
hhe ッヘ
|
||||||
|
hhi ッヒ
|
||||||
|
hho ッホ
|
||||||
|
hhya ッヒャ
|
||||||
|
hhyo ッヒョ
|
||||||
|
hhyu ッヒュ
|
||||||
|
hi ヒ
|
||||||
|
ho ホ
|
||||||
|
hya ヒャ
|
||||||
|
hyo ヒョ
|
||||||
|
hyu ヒュ
|
||||||
|
i ィ
|
||||||
|
i イ
|
||||||
|
i ヰ
|
||||||
|
ja ジャ
|
||||||
|
ja ヂャ
|
||||||
|
ji ジ
|
||||||
|
ji ヂ
|
||||||
|
jja ッジャ
|
||||||
|
jji ッジ
|
||||||
|
jji ッヂ
|
||||||
|
jjo ッジョ
|
||||||
|
jju ッジュ
|
||||||
|
jjya ッヂャ
|
||||||
|
jjyo ッヂョ
|
||||||
|
jjyu ッヂュ
|
||||||
|
jo ジョ
|
||||||
|
jo ヂョ
|
||||||
|
ju ジュ
|
||||||
|
ju ヂュ
|
||||||
|
ka カ
|
||||||
|
ka ヵ
|
||||||
|
ke ケ
|
||||||
|
ke ヶ
|
||||||
|
ki キ
|
||||||
|
kka ッカ
|
||||||
|
kke ッケ
|
||||||
|
kki ッキ
|
||||||
|
kko ッコ
|
||||||
|
kku ック
|
||||||
|
kkya ッキャ
|
||||||
|
kkyo ッキョ
|
||||||
|
kkyu ッキュ
|
||||||
|
ko コ
|
||||||
|
ku ク
|
||||||
|
kya キァ
|
||||||
|
kyo キォ
|
||||||
|
kyu キゥ
|
||||||
|
ma マ
|
||||||
|
me メ
|
||||||
|
mi ミ
|
||||||
|
mo モ
|
||||||
|
mu ム
|
||||||
|
mya ミャ
|
||||||
|
myo ミョ
|
||||||
|
myu ミュ
|
||||||
|
n ン
|
||||||
|
n'a ンア
|
||||||
|
n'e ンエ
|
||||||
|
n'i ンイ
|
||||||
|
n'o ンオ
|
||||||
|
n'u ンウ
|
||||||
|
na ナ
|
||||||
|
ne ネ
|
||||||
|
ni ニ
|
||||||
|
no ノ
|
||||||
|
nu ヌ
|
||||||
|
nya ニャ
|
||||||
|
nyo ニョ
|
||||||
|
nyu ニュ
|
||||||
|
o ォ
|
||||||
|
o オ
|
||||||
|
pa パ
|
||||||
|
pe ペ
|
||||||
|
pi ピ
|
||||||
|
po ポ
|
||||||
|
ppa ッパ
|
||||||
|
ppe ッペ
|
||||||
|
ppi ッピ
|
||||||
|
ppo ッポ
|
||||||
|
ppu ップ
|
||||||
|
ppya ッピャ
|
||||||
|
ppyo ッピョ
|
||||||
|
ppyu ッピュ
|
||||||
|
pu プ
|
||||||
|
pya ピャ
|
||||||
|
pyo ピョ
|
||||||
|
pyu ピュ
|
||||||
|
ra ラ
|
||||||
|
re レ
|
||||||
|
ri リ
|
||||||
|
ro ロ
|
||||||
|
rra ッラ
|
||||||
|
rre ッレ
|
||||||
|
rri ッリ
|
||||||
|
rro ッロ
|
||||||
|
rru ッル
|
||||||
|
rrya ッリャ
|
||||||
|
rryo ッリョ
|
||||||
|
rryu ッリュ
|
||||||
|
ru ル
|
||||||
|
rya リャ
|
||||||
|
ryo リョ
|
||||||
|
ryu リュ
|
||||||
|
sa サ
|
||||||
|
se セ
|
||||||
|
sha シャ
|
||||||
|
shi シ
|
||||||
|
sho ショ
|
||||||
|
shu シュ
|
||||||
|
so ソ
|
||||||
|
ssa ッサ
|
||||||
|
sse ッセ
|
||||||
|
ssha ッシャ
|
||||||
|
sshi ッシ
|
||||||
|
ssho ッショ
|
||||||
|
sshu ッシュ
|
||||||
|
sso ッソ
|
||||||
|
ssu ッス
|
||||||
|
su ス
|
||||||
|
ta タ
|
||||||
|
tcha ッチャ
|
||||||
|
tchi ッチ
|
||||||
|
tcho ッチョ
|
||||||
|
tchu ッチュ
|
||||||
|
te テ
|
||||||
|
to ト
|
||||||
|
tsu ッ
|
||||||
|
tsu ツ
|
||||||
|
tta ッタ
|
||||||
|
tte ッテ
|
||||||
|
tto ット
|
||||||
|
ttsu ッツ
|
||||||
|
u ゥ
|
||||||
|
u ウ
|
||||||
|
va ヴァ
|
||||||
|
ve ヴェ
|
||||||
|
vi ヴィ
|
||||||
|
vo ヴォ
|
||||||
|
vu ヴ
|
||||||
|
vva ッヴァ
|
||||||
|
vve ッヴェ
|
||||||
|
vvi ッヴィ
|
||||||
|
vvo ッヴォ
|
||||||
|
vvu ッヴ
|
||||||
|
wa ヮ
|
||||||
|
wa ワ
|
||||||
|
wo ヲ
|
||||||
|
ya ャ
|
||||||
|
ya ヤ
|
||||||
|
yo ョ
|
||||||
|
yo ヨ
|
||||||
|
yu ュ
|
||||||
|
yu ユ
|
||||||
|
yya ッヤ
|
||||||
|
yyo ッヨ
|
||||||
|
yyu ッユ
|
||||||
|
za ザ
|
||||||
|
ze ゼ
|
||||||
|
zo ゾ
|
||||||
|
zu ズ
|
||||||
|
zu ヅ
|
||||||
|
zza ッザ
|
||||||
|
zzo ッゾ
|
||||||
|
zzu ッズ
|
||||||
|
zzu ッヅ
|
||||||
|
;;
|
||||||
|
;; extended characters
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; gairai terms
|
||||||
|
;;
|
||||||
|
all オール
|
||||||
|
algrism アルゴリズム
|
||||||
|
answer アンサー
|
||||||
|
base ベース
|
||||||
|
begineer ビギナー
|
||||||
|
connection コネクション
|
||||||
|
contents コンテンツ
|
||||||
|
creator クリエーター
|
||||||
|
comic コミック
|
||||||
|
comics コミックス
|
||||||
|
culture カルチャー
|
||||||
|
debug デバッグ
|
||||||
|
debugging デバッギング
|
||||||
|
design デザイン
|
||||||
|
digital デジタル
|
||||||
|
dillenma ジレンマ
|
||||||
|
directory ディレクトリ
|
||||||
|
disk ディスク
|
||||||
|
document ドキュメント
|
||||||
|
download ダウンロード
|
||||||
|
electric エレクトリック
|
||||||
|
facebook フェイスブック
|
||||||
|
firefox ファイアーフォックス
|
||||||
|
folder フォルダ
|
||||||
|
format フォーマット
|
||||||
|
forum フォーラム
|
||||||
|
fox フォックス
|
||||||
|
free フリー
|
||||||
|
gnome ノーム
|
||||||
|
gnu グヌー
|
||||||
|
gozilla ゴジラ
|
||||||
|
guide ガイド
|
||||||
|
harvard ハーバード
|
||||||
|
help ヘルプ
|
||||||
|
highlight ハイライト
|
||||||
|
japan ジャパン
|
||||||
|
journal ジャーナル
|
||||||
|
library ライブラリ
|
||||||
|
line ライン
|
||||||
|
love ラヴ
|
||||||
|
love ラブ
|
||||||
|
mail メール
|
||||||
|
main メイン
|
||||||
|
mystery ミステリ
|
||||||
|
mozilla モジラ
|
||||||
|
network ネットワーク
|
||||||
|
next ネクスト
|
||||||
|
new ニュー
|
||||||
|
news ニュース
|
||||||
|
native ネイティブ
|
||||||
|
online オンライン
|
||||||
|
open オープン
|
||||||
|
professional プロフェッショナル
|
||||||
|
profile プロファイル
|
||||||
|
programmer プログラマ
|
||||||
|
sample サンプル
|
||||||
|
series シリーズ
|
||||||
|
share シェア
|
||||||
|
social ソーシャル
|
||||||
|
society ソサエティ
|
||||||
|
software ソフトウエア
|
||||||
|
source ソース
|
||||||
|
street ストリート
|
||||||
|
system システム
|
||||||
|
tag タグ
|
||||||
|
text テキスト
|
||||||
|
thunderbird サンダーバード
|
||||||
|
training トレーニング
|
||||||
|
twitter ツイッター
|
||||||
|
unicode ユニコード
|
||||||
|
wall ウオール
|
||||||
|
wall ウォール
|
||||||
|
welcome ウェルカム
|
||||||
|
welcome ウエルカム
|
||||||
|
wikinomics ウィキノミクス
|
||||||
|
york ヨーク
|
1798
src/calibre/ebooks/unihandecode/unicodepoints.py
Normal file
1798
src/calibre/ebooks/unihandecode/unicodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,17 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Decode unicode text to an ASCII representation of the text. Transliterate
|
Decode unicode text to an ASCII representation of the text in Chinese.
|
||||||
unicode characters to ASCII.
|
Transliterate unicode characters to ASCII based on chinese pronounce.
|
||||||
|
|
||||||
|
Derived from John Schember's unidecode library. Which was created
|
||||||
|
as part of calibre.
|
||||||
|
|
||||||
|
Copyright(c) 2009, John Schember <john@nachtimwald.com>
|
||||||
|
|
||||||
Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
|
Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
|
||||||
is based on the perl module Text::Unidecode
|
is based on the perl module Text::Unidecode
|
||||||
@ -55,29 +60,20 @@ it under the same terms as Perl itself.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
|
from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
|
||||||
from calibre.constants import preferred_encoding
|
|
||||||
|
|
||||||
class Unidecoder(object):
|
class Unidecoder(object):
|
||||||
|
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(HANCODES)
|
||||||
|
|
||||||
def decode(self, text):
|
def decode(self, text):
|
||||||
'''
|
|
||||||
Tranliterate the string from unicode characters to ASCII.
|
|
||||||
'''
|
|
||||||
# The keys for CODEPOINTS is unicode characters, we want to be sure the
|
|
||||||
# input text is unicode.
|
|
||||||
if not isinstance(text, unicode):
|
|
||||||
try:
|
|
||||||
text = unicode(text)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
text = text.decode(preferred_encoding)
|
|
||||||
except:
|
|
||||||
text = text.decode('utf-8', 'replace')
|
|
||||||
# Replace characters larger than 127 with their ASCII equivelent.
|
# Replace characters larger than 127 with their ASCII equivelent.
|
||||||
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
|
return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)
|
||||||
text)
|
|
||||||
|
|
||||||
def replace_point(self, codepoint):
|
def replace_point(self, codepoint):
|
||||||
'''
|
'''
|
||||||
@ -87,7 +83,7 @@ class Unidecoder(object):
|
|||||||
# Split the unicode character xABCD into parts 0xAB and 0xCD.
|
# Split the unicode character xABCD into parts 0xAB and 0xCD.
|
||||||
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
||||||
# represents the position in the list of characters for the group.
|
# represents the position in the list of characters for the group.
|
||||||
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
|
return self.codepoints[self.code_group(codepoint)][self.grouped_point(
|
||||||
codepoint)]
|
codepoint)]
|
||||||
except:
|
except:
|
||||||
return '?'
|
return '?'
|
||||||
@ -97,12 +93,18 @@ class Unidecoder(object):
|
|||||||
Find what group character is a part of.
|
Find what group character is a part of.
|
||||||
'''
|
'''
|
||||||
# Code groups withing CODEPOINTS take the form 'xAB'
|
# Code groups withing CODEPOINTS take the form 'xAB'
|
||||||
return u'x%02x' % (ord(unicode(character)) >> 8)
|
try:#python2
|
||||||
|
return 'x%02x' % (ord(unicode(character)) >> 8)
|
||||||
|
except:
|
||||||
|
return 'x%02x' % (ord(character) >> 8)
|
||||||
|
|
||||||
def grouped_point(self, character):
|
def grouped_point(self, character):
|
||||||
'''
|
'''
|
||||||
Return the location the replacement character is in the list for a
|
Return the location the replacement character is in the list for a
|
||||||
the group character is a part of.
|
the group character is a part of.
|
||||||
'''
|
'''
|
||||||
return ord(unicode(character)) & 255
|
try:#python2
|
||||||
|
return ord(unicode(character)) & 255
|
||||||
|
except:
|
||||||
|
return ord(character) & 255
|
||||||
|
|
5251
src/calibre/ebooks/unihandecode/vncodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/vncodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
23
src/calibre/ebooks/unihandecode/vndecoder.py
Normal file
23
src/calibre/ebooks/unihandecode/vndecoder.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text in Vietnamese.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
|
|
||||||
|
class Vndecoder(Unidecoder):
|
||||||
|
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(HANCODES)
|
||||||
|
|
5251
src/calibre/ebooks/unihandecode/zhcodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/zhcodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -271,11 +271,6 @@ class DeleteAction(InterfaceAction):
|
|||||||
partial(self.library_ids_deleted, current_row=row))
|
partial(self.library_ids_deleted, current_row=row))
|
||||||
# Device view is visible.
|
# Device view is visible.
|
||||||
else:
|
else:
|
||||||
if not confirm('<p>'+_('The selected books will be '
|
|
||||||
'<b>permanently deleted</b> '
|
|
||||||
'from your device. Are you sure?')
|
|
||||||
+'</p>', 'device_delete_books', self.gui):
|
|
||||||
return
|
|
||||||
if self.gui.stack.currentIndex() == 1:
|
if self.gui.stack.currentIndex() == 1:
|
||||||
view = self.gui.memory_view
|
view = self.gui.memory_view
|
||||||
elif self.gui.stack.currentIndex() == 2:
|
elif self.gui.stack.currentIndex() == 2:
|
||||||
@ -283,8 +278,14 @@ class DeleteAction(InterfaceAction):
|
|||||||
else:
|
else:
|
||||||
view = self.gui.card_b_view
|
view = self.gui.card_b_view
|
||||||
paths = view.model().paths(rows)
|
paths = view.model().paths(rows)
|
||||||
|
ids = view.model().indices(rows)
|
||||||
|
if not confirm('<p>'+_('The selected books will be '
|
||||||
|
'<b>permanently deleted</b> '
|
||||||
|
'from your device. Are you sure?')
|
||||||
|
+'</p>', 'device_delete_books', self.gui):
|
||||||
|
return
|
||||||
job = self.gui.remove_paths(paths)
|
job = self.gui.remove_paths(paths)
|
||||||
self.delete_memory[job] = (paths, view.model())
|
self.delete_memory[job] = (paths, view.model())
|
||||||
view.model().mark_for_deletion(job, rows)
|
view.model().mark_for_deletion(job, ids, rows_are_ids=True)
|
||||||
self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)
|
self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)
|
||||||
|
|
||||||
|
@ -213,6 +213,8 @@ class CheckLibraryDialog(QDialog):
|
|||||||
|
|
||||||
self.log = QTreeWidget(self)
|
self.log = QTreeWidget(self)
|
||||||
self.log.itemChanged.connect(self.item_changed)
|
self.log.itemChanged.connect(self.item_changed)
|
||||||
|
self.log.itemExpanded.connect(self.item_expanded_or_collapsed)
|
||||||
|
self.log.itemCollapsed.connect(self.item_expanded_or_collapsed)
|
||||||
self._layout.addWidget(self.log)
|
self._layout.addWidget(self.log)
|
||||||
|
|
||||||
self.check_button = QPushButton(_('&Run the check again'))
|
self.check_button = QPushButton(_('&Run the check again'))
|
||||||
@ -333,11 +335,15 @@ class CheckLibraryDialog(QDialog):
|
|||||||
for check in CHECKS:
|
for check in CHECKS:
|
||||||
builder(t, checker, check)
|
builder(t, checker, check)
|
||||||
|
|
||||||
t.setColumnWidth(0, 200)
|
t.resizeColumnToContents(0)
|
||||||
t.setColumnWidth(1, 400)
|
t.resizeColumnToContents(1)
|
||||||
self.delete_button.setEnabled(False)
|
self.delete_button.setEnabled(False)
|
||||||
self.text_results = '\n'.join(plaintext)
|
self.text_results = '\n'.join(plaintext)
|
||||||
|
|
||||||
|
def item_expanded_or_collapsed(self, item):
|
||||||
|
self.log.resizeColumnToContents(0)
|
||||||
|
self.log.resizeColumnToContents(1)
|
||||||
|
|
||||||
def item_changed(self, item, column):
|
def item_changed(self, item, column):
|
||||||
self.fix_button.setEnabled(False)
|
self.fix_button.setEnabled(False)
|
||||||
for it in self.top_level_items.values():
|
for it in self.top_level_items.values():
|
||||||
|
@ -120,11 +120,10 @@ class BooksModel(QAbstractTableModel): # {{{
|
|||||||
|
|
||||||
def set_device_connected(self, is_connected):
|
def set_device_connected(self, is_connected):
|
||||||
self.device_connected = is_connected
|
self.device_connected = is_connected
|
||||||
self.refresh_ondevice()
|
|
||||||
|
|
||||||
def refresh_ondevice(self):
|
def refresh_ondevice(self):
|
||||||
self.db.refresh_ondevice()
|
self.db.refresh_ondevice()
|
||||||
self.refresh() # does a resort()
|
self.resort()
|
||||||
self.research()
|
self.research()
|
||||||
|
|
||||||
def set_book_on_device_func(self, func):
|
def set_book_on_device_func(self, func):
|
||||||
@ -826,7 +825,7 @@ class BooksModel(QAbstractTableModel): # {{{
|
|||||||
return False
|
return False
|
||||||
val = int(value.toInt()[0]) if column == 'rating' else \
|
val = int(value.toInt()[0]) if column == 'rating' else \
|
||||||
value.toDate() if column in ('timestamp', 'pubdate') else \
|
value.toDate() if column in ('timestamp', 'pubdate') else \
|
||||||
unicode(value.toString())
|
unicode(value.toString()).strip()
|
||||||
id = self.db.id(row)
|
id = self.db.id(row)
|
||||||
books_to_refresh = set([id])
|
books_to_refresh = set([id])
|
||||||
if column == 'rating':
|
if column == 'rating':
|
||||||
|
@ -551,8 +551,10 @@ class BooksView(QTableView): # {{{
|
|||||||
return mods & Qt.ControlModifier or mods & Qt.ShiftModifier
|
return mods & Qt.ControlModifier or mods & Qt.ShiftModifier
|
||||||
|
|
||||||
def mousePressEvent(self, event):
|
def mousePressEvent(self, event):
|
||||||
if event.button() == Qt.LeftButton and not self.event_has_mods():
|
ep = event.pos()
|
||||||
self.drag_start_pos = event.pos()
|
if self.indexAt(ep) in self.selectionModel().selectedIndexes() and \
|
||||||
|
event.button() == Qt.LeftButton and not self.event_has_mods():
|
||||||
|
self.drag_start_pos = ep
|
||||||
return QTableView.mousePressEvent(self, event)
|
return QTableView.mousePressEvent(self, event)
|
||||||
|
|
||||||
def mouseMoveEvent(self, event):
|
def mouseMoveEvent(self, event):
|
||||||
|
@ -10,13 +10,15 @@ from PyQt4.Qt import QApplication
|
|||||||
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
|
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
|
||||||
CommaSeparatedList
|
CommaSeparatedList
|
||||||
from calibre.gui2.preferences.search_ui import Ui_Form
|
from calibre.gui2.preferences.search_ui import Ui_Form
|
||||||
from calibre.gui2 import config
|
from calibre.gui2 import config, error_dialog
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
|
|
||||||
class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||||
|
|
||||||
def genesis(self, gui):
|
def genesis(self, gui):
|
||||||
self.gui = gui
|
self.gui = gui
|
||||||
|
db = gui.library_view.model().db
|
||||||
|
self.db = db
|
||||||
|
|
||||||
r = self.register
|
r = self.register
|
||||||
|
|
||||||
@ -24,11 +26,153 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
r('highlight_search_matches', config)
|
r('highlight_search_matches', config)
|
||||||
r('limit_search_columns', prefs)
|
r('limit_search_columns', prefs)
|
||||||
r('limit_search_columns_to', prefs, setting=CommaSeparatedList)
|
r('limit_search_columns_to', prefs, setting=CommaSeparatedList)
|
||||||
fl = gui.library_view.model().db.field_metadata.get_search_terms()
|
fl = db.field_metadata.get_search_terms()
|
||||||
self.opt_limit_search_columns_to.update_items_cache(fl)
|
self.opt_limit_search_columns_to.update_items_cache(fl)
|
||||||
self.clear_history_button.clicked.connect(self.clear_histories)
|
self.clear_history_button.clicked.connect(self.clear_histories)
|
||||||
|
|
||||||
|
self.gst_explanation.setText('<p>' + _(
|
||||||
|
"<b>Grouped search terms</b> are search names that permit a query to automatically "
|
||||||
|
"search across more than one column. For example, if you create a grouped "
|
||||||
|
"search term <code>allseries</code> with the value "
|
||||||
|
"<code>series, #myseries, #myseries2</code>, then "
|
||||||
|
"the query <code>allseries:adhoc</code> will find 'adhoc' in any of the "
|
||||||
|
"columns <code>series</code>, <code>#myseries</code>, and "
|
||||||
|
"<code>#myseries2</code>.<p> Enter the name of the "
|
||||||
|
"grouped search term in the drop-down box, enter the list of columns "
|
||||||
|
"to search in the value box, then push the Save button. "
|
||||||
|
"<p>Note: Search terms are forced to lower case; <code>MySearch</code> "
|
||||||
|
"and <code>mysearch</code> are the same term."
|
||||||
|
"<p>You can have your grouped search term show up as user categories in "
|
||||||
|
" the Tag Browser. Just add the grouped search term names to the Make user "
|
||||||
|
"categories from box. You can add multiple terms separated by commas. "
|
||||||
|
"The new user category will be automatically "
|
||||||
|
"populated with all the items in the categories included in the grouped "
|
||||||
|
"search term. <p>Automatic user categories permit you to see easily "
|
||||||
|
"all the category items that "
|
||||||
|
"are in the columns contained in the grouped search term. Using the above "
|
||||||
|
"<code>allseries</code> example, the automatically-generated user category "
|
||||||
|
"will contain all the series mentioned in <code>series</code>, "
|
||||||
|
"<code>#myseries</code>, and <code>#myseries2</code>. This "
|
||||||
|
"can be useful to check for duplicates, to find which column contains "
|
||||||
|
"a particular item, or to have hierarchical categories (categories "
|
||||||
|
"that contain categories)."))
|
||||||
|
self.gst = db.prefs.get('grouped_search_terms', {})
|
||||||
|
self.orig_gst_keys = self.gst.keys()
|
||||||
|
|
||||||
|
fl = []
|
||||||
|
for f in db.all_field_keys():
|
||||||
|
fm = db.metadata_for_field(f)
|
||||||
|
if not fm['search_terms']:
|
||||||
|
continue
|
||||||
|
if not fm['is_category']:
|
||||||
|
continue
|
||||||
|
fl.append(f)
|
||||||
|
self.gst_value.update_items_cache(fl)
|
||||||
|
self.fill_gst_box(select=None)
|
||||||
|
|
||||||
|
self.gst_delete_button.setEnabled(False)
|
||||||
|
self.gst_save_button.setEnabled(False)
|
||||||
|
self.gst_names.currentIndexChanged[int].connect(self.gst_index_changed)
|
||||||
|
self.gst_names.editTextChanged.connect(self.gst_text_changed)
|
||||||
|
self.gst_value.textChanged.connect(self.gst_text_changed)
|
||||||
|
self.gst_save_button.clicked.connect(self.gst_save_clicked)
|
||||||
|
self.gst_delete_button.clicked.connect(self.gst_delete_clicked)
|
||||||
|
self.gst_changed = False
|
||||||
|
|
||||||
|
if db.prefs.get('grouped_search_make_user_categories', None) is None:
|
||||||
|
db.prefs.set('grouped_search_make_user_categories', [])
|
||||||
|
r('grouped_search_make_user_categories', db.prefs, setting=CommaSeparatedList)
|
||||||
|
self.muc_changed = False
|
||||||
|
self.opt_grouped_search_make_user_categories.editingFinished.connect(
|
||||||
|
self.muc_box_changed)
|
||||||
|
|
||||||
|
def muc_box_changed(self):
|
||||||
|
self.muc_changed = True
|
||||||
|
|
||||||
|
def gst_save_clicked(self):
|
||||||
|
idx = self.gst_names.currentIndex()
|
||||||
|
name = icu_lower(unicode(self.gst_names.currentText()))
|
||||||
|
if not name:
|
||||||
|
return error_dialog(self.gui, _('Grouped Search Terms'),
|
||||||
|
_('The search term cannot be blank'),
|
||||||
|
show=True)
|
||||||
|
if idx != 0:
|
||||||
|
orig_name = unicode(self.gst_names.itemData(idx).toString())
|
||||||
|
else:
|
||||||
|
orig_name = ''
|
||||||
|
if name != orig_name:
|
||||||
|
if name in self.db.field_metadata.get_search_terms() and \
|
||||||
|
name not in self.orig_gst_keys:
|
||||||
|
return error_dialog(self.gui, _('Grouped Search Terms'),
|
||||||
|
_('That name is already used for a column or grouped search term'),
|
||||||
|
show=True)
|
||||||
|
if name in [icu_lower(p) for p in self.db.prefs.get('user_categories', {})]:
|
||||||
|
return error_dialog(self.gui, _('Grouped Search Terms'),
|
||||||
|
_('That name is already used for user category'),
|
||||||
|
show=True)
|
||||||
|
|
||||||
|
val = [v.strip() for v in unicode(self.gst_value.text()).split(',') if v.strip()]
|
||||||
|
if not val:
|
||||||
|
return error_dialog(self.gui, _('Grouped Search Terms'),
|
||||||
|
_('The value box cannot be empty'), show=True)
|
||||||
|
|
||||||
|
if orig_name and name != orig_name:
|
||||||
|
del self.gst[orig_name]
|
||||||
|
self.gst_changed = True
|
||||||
|
self.gst[name] = val
|
||||||
|
self.fill_gst_box(select=name)
|
||||||
|
self.changed_signal.emit()
|
||||||
|
|
||||||
|
def gst_delete_clicked(self):
|
||||||
|
if self.gst_names.currentIndex() == 0:
|
||||||
|
return error_dialog(self.gui, _('Grouped Search Terms'),
|
||||||
|
_('The empty grouped search term cannot be deleted'), show=True)
|
||||||
|
name = unicode(self.gst_names.currentText())
|
||||||
|
if name in self.gst:
|
||||||
|
del self.gst[name]
|
||||||
|
self.fill_gst_box(select='')
|
||||||
|
self.changed_signal.emit()
|
||||||
|
self.gst_changed = True
|
||||||
|
|
||||||
|
def fill_gst_box(self, select=None):
|
||||||
|
terms = sorted(self.gst.keys())
|
||||||
|
self.opt_grouped_search_make_user_categories.update_items_cache(terms)
|
||||||
|
self.gst_names.blockSignals(True)
|
||||||
|
self.gst_names.clear()
|
||||||
|
self.gst_names.addItem('', '')
|
||||||
|
for t in terms:
|
||||||
|
self.gst_names.addItem(t, t)
|
||||||
|
self.gst_names.blockSignals(False)
|
||||||
|
if select is not None:
|
||||||
|
if select == '':
|
||||||
|
self.gst_index_changed(0)
|
||||||
|
elif select in terms:
|
||||||
|
self.gst_names.setCurrentIndex(self.gst_names.findText(select))
|
||||||
|
|
||||||
|
def gst_text_changed(self):
|
||||||
|
self.gst_delete_button.setEnabled(False)
|
||||||
|
self.gst_save_button.setEnabled(True)
|
||||||
|
|
||||||
|
def gst_index_changed(self, idx):
|
||||||
|
self.gst_delete_button.setEnabled(idx != 0)
|
||||||
|
self.gst_save_button.setEnabled(False)
|
||||||
|
self.gst_value.blockSignals(True)
|
||||||
|
if idx == 0:
|
||||||
|
self.gst_value.setText('')
|
||||||
|
else:
|
||||||
|
name = unicode(self.gst_names.itemData(idx).toString())
|
||||||
|
self.gst_value.setText(','.join(self.gst[name]))
|
||||||
|
self.gst_value.blockSignals(False)
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
if self.gst_changed:
|
||||||
|
self.db.prefs.set('grouped_search_terms', self.gst)
|
||||||
|
self.db.field_metadata.add_grouped_search_terms(self.gst)
|
||||||
|
return ConfigWidgetBase.commit(self)
|
||||||
|
|
||||||
def refresh_gui(self, gui):
|
def refresh_gui(self, gui):
|
||||||
|
if self.muc_changed:
|
||||||
|
gui.tags_view.set_new_model()
|
||||||
gui.search.search_as_you_type(config['search_as_you_type'])
|
gui.search.search_as_you_type(config['search_as_you_type'])
|
||||||
gui.library_view.model().set_highlight_only(config['highlight_search_matches'])
|
gui.library_view.model().set_highlight_only(config['highlight_search_matches'])
|
||||||
gui.search.do_search()
|
gui.search.do_search()
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>670</width>
|
<width>670</width>
|
||||||
<height>392</height>
|
<height>556</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
@ -77,19 +77,6 @@
|
|||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0">
|
|
||||||
<spacer name="verticalSpacer">
|
|
||||||
<property name="orientation">
|
|
||||||
<enum>Qt::Vertical</enum>
|
|
||||||
</property>
|
|
||||||
<property name="sizeHint" stdset="0">
|
|
||||||
<size>
|
|
||||||
<width>0</width>
|
|
||||||
<height>0</height>
|
|
||||||
</size>
|
|
||||||
</property>
|
|
||||||
</spacer>
|
|
||||||
</item>
|
|
||||||
<item row="3" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QPushButton" name="clear_history_button">
|
<widget class="QPushButton" name="clear_history_button">
|
||||||
<property name="toolTip">
|
<property name="toolTip">
|
||||||
@ -100,6 +87,120 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="4" column="0">
|
||||||
|
<widget class="QGroupBox" name="groupBox_2">
|
||||||
|
<property name="title">
|
||||||
|
<string>Grouped Search Terms</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QGridLayout" name="gridLayout_3">
|
||||||
|
<item row="0" column="0">
|
||||||
|
<layout class="QHBoxLayout" name="l12">
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="la10">
|
||||||
|
<property name="text">
|
||||||
|
<string>&Names:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>gst_names</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QComboBox" name="gst_names">
|
||||||
|
<property name="editable">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
<property name="minimumContentsLength">
|
||||||
|
<number>10</number>
|
||||||
|
</property>
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Contains the names of the currently-defined group search terms.
|
||||||
|
Create a new name by entering it into the empty box, then
|
||||||
|
pressing Save. Rename a search term by selecting it then
|
||||||
|
changing the name and pressing Save. Change the value of
|
||||||
|
a search term by changing the value box then pressing Save.</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QToolButton" name="gst_delete_button">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Delete the current search term</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>...</string>
|
||||||
|
</property>
|
||||||
|
<property name="icon">
|
||||||
|
<iconset resource="../../../../resources/images.qrc">
|
||||||
|
<normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="MultiCompleteLineEdit" name="gst_value"/>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QToolButton" name="gst_save_button">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Save the current search term. You can rename a search term by
|
||||||
|
changing the name then pressing Save. You can change the value
|
||||||
|
of a search term by changing the value box then pressing Save.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>&Save</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="1" rowspan="3">
|
||||||
|
<widget class="QTextBrowser" name="gst_explanation">
|
||||||
|
<property name="sizePolicy">
|
||||||
|
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
|
||||||
|
<horstretch>0</horstretch>
|
||||||
|
<verstretch>100</verstretch>
|
||||||
|
</sizepolicy>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<layout class="QHBoxLayout">
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="l11">
|
||||||
|
<property name="text">
|
||||||
|
<string>Make &user categories from:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_grouped_search_make_user_categories</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="MultiCompleteLineEdit" name="opt_grouped_search_make_user_categories">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Enter the names of any grouped search terms you wish
|
||||||
|
to be shown as user categories</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0">
|
||||||
|
<spacer name="verticalSpacer">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Vertical</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>20</width>
|
||||||
|
<height>40</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<customwidgets>
|
<customwidgets>
|
||||||
@ -109,6 +210,8 @@
|
|||||||
<header>calibre/gui2/complete.h</header>
|
<header>calibre/gui2/complete.h</header>
|
||||||
</customwidget>
|
</customwidget>
|
||||||
</customwidgets>
|
</customwidgets>
|
||||||
<resources/>
|
<resources>
|
||||||
|
<include location="../../../../resources/images.qrc"/>
|
||||||
|
</resources>
|
||||||
<connections/>
|
<connections/>
|
||||||
</ui>
|
</ui>
|
||||||
|
@ -466,10 +466,7 @@ class TagTreeItem(object): # {{{
|
|||||||
icon_map[0] = data.icon
|
icon_map[0] = data.icon
|
||||||
self.tag, self.icon_state_map = data, list(map(QVariant, icon_map))
|
self.tag, self.icon_state_map = data, list(map(QVariant, icon_map))
|
||||||
if tooltip:
|
if tooltip:
|
||||||
if tooltip.endswith(':'):
|
self.tooltip = tooltip + ' '
|
||||||
self.tooltip = tooltip + ' '
|
|
||||||
else:
|
|
||||||
self.tooltip = tooltip + ': '
|
|
||||||
else:
|
else:
|
||||||
self.tooltip = ''
|
self.tooltip = ''
|
||||||
|
|
||||||
@ -589,11 +586,17 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
|
|
||||||
# get_node_tree cannot return None here, because row_map is empty
|
# get_node_tree cannot return None here, because row_map is empty
|
||||||
data = self.get_node_tree(config['sort_tags_by'])
|
data = self.get_node_tree(config['sort_tags_by'])
|
||||||
|
gst = db.prefs.get('grouped_search_terms', {})
|
||||||
self.root_item = TagTreeItem()
|
self.root_item = TagTreeItem()
|
||||||
for i, r in enumerate(self.row_map):
|
for i, r in enumerate(self.row_map):
|
||||||
if self.hidden_categories and self.categories[i] in self.hidden_categories:
|
if self.hidden_categories and self.categories[i] in self.hidden_categories:
|
||||||
continue
|
continue
|
||||||
tt = _(u'The lookup/search name is "{0}"').format(r)
|
if r.startswith('@') and r[1:] in gst:
|
||||||
|
tt = _(u'The grouped search term name is "{0}"').format(r[1:])
|
||||||
|
elif r == 'news':
|
||||||
|
tt = ''
|
||||||
|
else:
|
||||||
|
tt = _(u'The lookup/search name is "{0}"').format(r)
|
||||||
TagTreeItem(parent=self.root_item,
|
TagTreeItem(parent=self.root_item,
|
||||||
data=self.categories[i],
|
data=self.categories[i],
|
||||||
category_icon=self.category_icon_map[r],
|
category_icon=self.category_icon_map[r],
|
||||||
@ -735,6 +738,14 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
self.row_map = []
|
self.row_map = []
|
||||||
self.categories = []
|
self.categories = []
|
||||||
|
|
||||||
|
# Get the categories
|
||||||
|
if self.search_restriction:
|
||||||
|
data = self.db.get_categories(sort=sort,
|
||||||
|
icon_map=self.category_icon_map,
|
||||||
|
ids=self.db.search('', return_matches=True))
|
||||||
|
else:
|
||||||
|
data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map)
|
||||||
|
|
||||||
# Reconstruct the user categories, putting them into metadata
|
# Reconstruct the user categories, putting them into metadata
|
||||||
self.db.field_metadata.remove_dynamic_categories()
|
self.db.field_metadata.remove_dynamic_categories()
|
||||||
tb_cats = self.db.field_metadata
|
tb_cats = self.db.field_metadata
|
||||||
@ -746,17 +757,16 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
for cat in sorted(self.db.prefs.get('grouped_search_terms', {}),
|
||||||
|
key=sort_key):
|
||||||
|
if (u'@' + cat) in data:
|
||||||
|
tb_cats.add_user_category(label=u'@' + cat, name=cat)
|
||||||
|
self.db.data.change_search_locations(self.db.field_metadata.get_search_terms())
|
||||||
|
|
||||||
if len(saved_searches().names()):
|
if len(saved_searches().names()):
|
||||||
tb_cats.add_search_category(label='search', name=_('Searches'))
|
tb_cats.add_search_category(label='search', name=_('Searches'))
|
||||||
|
|
||||||
# Now get the categories
|
|
||||||
if self.search_restriction:
|
|
||||||
data = self.db.get_categories(sort=sort,
|
|
||||||
icon_map=self.category_icon_map,
|
|
||||||
ids=self.db.search('', return_matches=True))
|
|
||||||
else:
|
|
||||||
data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map)
|
|
||||||
|
|
||||||
if self.filter_categories_by:
|
if self.filter_categories_by:
|
||||||
for category in data.keys():
|
for category in data.keys():
|
||||||
data[category] = [t for t in data[category]
|
data[category] = [t for t in data[category]
|
||||||
@ -767,6 +777,7 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
if category in data: # The search category can come and go
|
if category in data: # The search category can come and go
|
||||||
self.row_map.append(category)
|
self.row_map.append(category)
|
||||||
self.categories.append(tb_categories[category]['name'])
|
self.categories.append(tb_categories[category]['name'])
|
||||||
|
|
||||||
if len(old_row_map) != 0 and len(old_row_map) != len(self.row_map):
|
if len(old_row_map) != 0 and len(old_row_map) != len(self.row_map):
|
||||||
# A category has been added or removed. We must force a rebuild of
|
# A category has been added or removed. We must force a rebuild of
|
||||||
# the model
|
# the model
|
||||||
@ -822,6 +833,7 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
not self.db.field_metadata[r]['is_custom'] and \
|
not self.db.field_metadata[r]['is_custom'] and \
|
||||||
not self.db.field_metadata[r]['kind'] == 'user' \
|
not self.db.field_metadata[r]['kind'] == 'user' \
|
||||||
else False
|
else False
|
||||||
|
tt = r if self.db.field_metadata[r]['kind'] == 'user' else None
|
||||||
for idx,tag in enumerate(data[r]):
|
for idx,tag in enumerate(data[r]):
|
||||||
if clear_rating:
|
if clear_rating:
|
||||||
tag.avg_rating = None
|
tag.avg_rating = None
|
||||||
@ -861,10 +873,10 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
category_icon = category_node.icon,
|
category_icon = category_node.icon,
|
||||||
tooltip = None,
|
tooltip = None,
|
||||||
category_key=category_node.category_key)
|
category_key=category_node.category_key)
|
||||||
t = TagTreeItem(parent=sub_cat, data=tag, tooltip=r,
|
t = TagTreeItem(parent=sub_cat, data=tag, tooltip=tt,
|
||||||
icon_map=self.icon_state_map)
|
icon_map=self.icon_state_map)
|
||||||
else:
|
else:
|
||||||
t = TagTreeItem(parent=category, data=tag, tooltip=r,
|
t = TagTreeItem(parent=category, data=tag, tooltip=tt,
|
||||||
icon_map=self.icon_state_map)
|
icon_map=self.icon_state_map)
|
||||||
self.endInsertRows()
|
self.endInsertRows()
|
||||||
return True
|
return True
|
||||||
|
@ -433,6 +433,10 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
if len(candidates) == 0:
|
if len(candidates) == 0:
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
if len(location) > 2 and location.startswith('@') and \
|
||||||
|
location[1:] in self.db_prefs['grouped_search_terms']:
|
||||||
|
location = location[1:]
|
||||||
|
|
||||||
if query and query.strip():
|
if query and query.strip():
|
||||||
# get metadata key associated with the search term. Eliminates
|
# get metadata key associated with the search term. Eliminates
|
||||||
# dealing with plurals and other aliases
|
# dealing with plurals and other aliases
|
||||||
@ -440,9 +444,16 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
# grouped search terms
|
# grouped search terms
|
||||||
if isinstance(location, list):
|
if isinstance(location, list):
|
||||||
if allow_recursion:
|
if allow_recursion:
|
||||||
|
if query.lower() == 'false':
|
||||||
|
invert = True
|
||||||
|
query = 'true'
|
||||||
|
else:
|
||||||
|
invert = False
|
||||||
for loc in location:
|
for loc in location:
|
||||||
matches |= self.get_matches(loc, query,
|
matches |= self.get_matches(loc, query,
|
||||||
candidates=candidates, allow_recursion=False)
|
candidates=candidates, allow_recursion=False)
|
||||||
|
if invert:
|
||||||
|
matches = self.universal_set() - matches
|
||||||
return matches
|
return matches
|
||||||
raise ParseException(query, len(query), 'Recursive query group detected', self)
|
raise ParseException(query, len(query), 'Recursive query group detected', self)
|
||||||
|
|
||||||
|
@ -1481,23 +1481,36 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
current_author = authors[0]
|
current_author = authors[0]
|
||||||
for (i,author) in enumerate(authors):
|
for (i,author) in enumerate(authors):
|
||||||
if author != current_author and i:
|
if author != current_author and i:
|
||||||
# Exit if author matches previous, but author_sort doesn't match
|
|
||||||
if author[0] == current_author[0]:
|
if author[0] == current_author[0]:
|
||||||
error_msg = _('''
|
if self.opts.fmt == 'mobi':
|
||||||
Inconsistent Author Sort values for Author '{0}':
|
# Exit if building MOBI
|
||||||
'{1}' <> '{2}',
|
error_msg = _(
|
||||||
unable to build catalog.\n
|
'''Inconsistent Author Sort values for
|
||||||
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
|
Author '{0}':
|
||||||
then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
|
'{1}' <> '{2}'
|
||||||
self.opts.log.warn('\n*** Metadata error ***')
|
Unable to build MOBI catalog.\n
|
||||||
self.opts.log.warn(error_msg)
|
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
|
||||||
|
self.opts.log.warn('\n*** Metadata error ***')
|
||||||
|
self.opts.log.warn(error_msg)
|
||||||
|
|
||||||
|
self.error.append('Author Sort mismatch')
|
||||||
|
self.error.append(error_msg)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Warning if building non-MOBI
|
||||||
|
if not self.error:
|
||||||
|
self.error.append('Author Sort mismatch')
|
||||||
|
|
||||||
|
error_msg = _(
|
||||||
|
'''Warning: inconsistent Author Sort values for
|
||||||
|
Author '{0}':
|
||||||
|
'{1}' <> '{2}'\n''').format(author[0],author[1],current_author[1])
|
||||||
|
self.opts.log.warn('\n*** Metadata warning ***')
|
||||||
|
self.opts.log.warn(error_msg)
|
||||||
|
self.error.append(error_msg)
|
||||||
|
|
||||||
self.error.append('Metadata error')
|
|
||||||
self.error.append(error_msg)
|
|
||||||
return False
|
|
||||||
current_author = author
|
current_author = author
|
||||||
|
|
||||||
|
|
||||||
self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort)
|
self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort)
|
||||||
|
|
||||||
# Build the unique_authors set from existing data
|
# Build the unique_authors set from existing data
|
||||||
@ -2135,7 +2148,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
|
|||||||
if author_count == 1:
|
if author_count == 1:
|
||||||
divOpeningTag.insert(dotc, pBookTag)
|
divOpeningTag.insert(dotc, pBookTag)
|
||||||
dotc += 1
|
dotc += 1
|
||||||
else:
|
elif divRunningTag:
|
||||||
divRunningTag.insert(drtc,pBookTag)
|
divRunningTag.insert(drtc,pBookTag)
|
||||||
drtc += 1
|
drtc += 1
|
||||||
|
|
||||||
|
@ -188,6 +188,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
migrate_preference('saved_searches', {})
|
migrate_preference('saved_searches', {})
|
||||||
set_saved_searches(self, 'saved_searches')
|
set_saved_searches(self, 'saved_searches')
|
||||||
|
|
||||||
|
# migrate grouped_search_terms
|
||||||
|
if self.prefs.get('grouped_search_terms', None) is None:
|
||||||
|
try:
|
||||||
|
ogst = tweaks.get('grouped_search_terms', {})
|
||||||
|
ngst = {}
|
||||||
|
for t in ogst:
|
||||||
|
ngst[icu_lower(t)] = ogst[t]
|
||||||
|
self.prefs.set('grouped_search_terms', ngst)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
# Rename any user categories with names that differ only in case
|
# Rename any user categories with names that differ only in case
|
||||||
user_cats = self.prefs.get('user_categories', [])
|
user_cats = self.prefs.get('user_categories', [])
|
||||||
catmap = {}
|
catmap = {}
|
||||||
@ -349,12 +360,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
if len(saved_searches().names()):
|
if len(saved_searches().names()):
|
||||||
tb_cats.add_search_category(label='search', name=_('Searches'))
|
tb_cats.add_search_category(label='search', name=_('Searches'))
|
||||||
|
|
||||||
gst = tweaks['grouped_search_terms']
|
self.field_metadata.add_grouped_search_terms(
|
||||||
for t in gst:
|
self.prefs.get('grouped_search_terms', {}))
|
||||||
try:
|
|
||||||
self.field_metadata._add_search_terms_to_map(gst[t], [t])
|
|
||||||
except ValueError:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
self.book_on_device_func = None
|
self.book_on_device_func = None
|
||||||
self.data = ResultCache(self.FIELD_MAP, self.field_metadata, db_prefs=self.prefs)
|
self.data = ResultCache(self.FIELD_MAP, self.field_metadata, db_prefs=self.prefs)
|
||||||
@ -1293,7 +1300,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
# icon_map is not None if get_categories is to store an icon and
|
# icon_map is not None if get_categories is to store an icon and
|
||||||
# possibly a tooltip in the tag structure.
|
# possibly a tooltip in the tag structure.
|
||||||
icon = None
|
icon = None
|
||||||
tooltip = ''
|
tooltip = '(' + category + ')'
|
||||||
label = tb_cats.key_to_label(category)
|
label = tb_cats.key_to_label(category)
|
||||||
if icon_map:
|
if icon_map:
|
||||||
if not tb_cats.is_custom_field(category):
|
if not tb_cats.is_custom_field(category):
|
||||||
@ -1379,7 +1386,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
categories['formats'].sort(key = lambda x:x.name)
|
categories['formats'].sort(key = lambda x:x.name)
|
||||||
|
|
||||||
#### Now do the user-defined categories. ####
|
#### Now do the user-defined categories. ####
|
||||||
user_categories = self.prefs['user_categories']
|
user_categories = dict.copy(self.prefs['user_categories'])
|
||||||
|
|
||||||
# We want to use same node in the user category as in the source
|
# We want to use same node in the user category as in the source
|
||||||
# category. To do that, we need to find the original Tag node. There is
|
# category. To do that, we need to find the original Tag node. There is
|
||||||
@ -1390,6 +1397,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
for c in categories.keys():
|
for c in categories.keys():
|
||||||
taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
|
taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
|
||||||
|
|
||||||
|
muc = self.prefs.get('grouped_search_make_user_categories', [])
|
||||||
|
gst = self.prefs.get('grouped_search_terms', {})
|
||||||
|
for c in gst:
|
||||||
|
if c not in muc:
|
||||||
|
continue
|
||||||
|
user_categories[c] = []
|
||||||
|
for sc in gst[c]:
|
||||||
|
if sc in categories.keys():
|
||||||
|
for t in categories[sc]:
|
||||||
|
user_categories[c].append([t.name, sc, 0])
|
||||||
|
|
||||||
for user_cat in sorted(user_categories.keys(), key=sort_key):
|
for user_cat in sorted(user_categories.keys(), key=sort_key):
|
||||||
items = []
|
items = []
|
||||||
for (name,label,ign) in user_categories[user_cat]:
|
for (name,label,ign) in user_categories[user_cat]:
|
||||||
|
@ -3,7 +3,7 @@ Created on 25 May 2010
|
|||||||
|
|
||||||
@author: charles
|
@author: charles
|
||||||
'''
|
'''
|
||||||
import copy
|
import copy, traceback
|
||||||
|
|
||||||
from calibre.utils.ordered_dict import OrderedDict
|
from calibre.utils.ordered_dict import OrderedDict
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
@ -488,6 +488,20 @@ class FieldMetadata(dict):
|
|||||||
del self._search_term_map[k]
|
del self._search_term_map[k]
|
||||||
del self._tb_cats[key]
|
del self._tb_cats[key]
|
||||||
|
|
||||||
|
def _remove_grouped_search_terms(self):
|
||||||
|
to_remove = [v for v in self._search_term_map
|
||||||
|
if isinstance(self._search_term_map[v], list)]
|
||||||
|
for v in to_remove:
|
||||||
|
del self._search_term_map[v]
|
||||||
|
|
||||||
|
def add_grouped_search_terms(self, gst):
|
||||||
|
self._remove_grouped_search_terms()
|
||||||
|
for t in gst:
|
||||||
|
try:
|
||||||
|
self._add_search_terms_to_map(gst[t], [t])
|
||||||
|
except ValueError:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def cc_series_index_column_for(self, key):
|
def cc_series_index_column_for(self, key):
|
||||||
return self._tb_cats[key]['rec_index'] + 1
|
return self._tb_cats[key]['rec_index'] + 1
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -6,12 +6,12 @@ meaning as possible.
|
|||||||
import os
|
import os
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
|
||||||
from calibre import sanitize_file_name
|
from calibre import sanitize_file_name
|
||||||
from calibre.constants import preferred_encoding, iswindows
|
from calibre.constants import preferred_encoding, iswindows
|
||||||
udc = Unidecoder()
|
from calibre.utils.localization import get_udc
|
||||||
|
|
||||||
def ascii_text(orig):
|
def ascii_text(orig):
|
||||||
|
udc = get_udc()
|
||||||
try:
|
try:
|
||||||
ascii = udc.decode(orig)
|
ascii = udc.decode(orig)
|
||||||
except:
|
except:
|
||||||
|
@ -169,3 +169,13 @@ def set_qt_translator(translator):
|
|||||||
return translator.load(p)
|
return translator.load(p)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
_udc = None
|
||||||
|
|
||||||
|
def get_udc():
|
||||||
|
global _udc
|
||||||
|
if _udc is None:
|
||||||
|
from calibre.ebooks.unihandecode import Unihandecoder
|
||||||
|
_udc = Unihandecoder(lang=get_lang())
|
||||||
|
return _udc
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user