mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KG 0.7.45
This commit is contained in:
commit
ce56d9ea72
100
Changelog.yaml
100
Changelog.yaml
@ -19,6 +19,106 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.7.45
|
||||
date: 2011-02-11
|
||||
|
||||
new features:
|
||||
- title: "Add plugin to download series information from the Kent District Library"
|
||||
|
||||
- title: "Kindle driver: When uploading MOBI files to the device, upload page number information as well (used by the not yet released Kindle 3.1 firmware)"
|
||||
|
||||
- title: "When automatically sending news to device, send to main memory preferentially, if it has enough space."
|
||||
tickets: [8877]
|
||||
|
||||
- title: "Allow customization of which metadata fields are searched by default (click the preferences icon next to the search box)"
|
||||
|
||||
- title: "New format TXTZ: which is a zip file containing the TXT file and associated images + metadata. calibre can convert to and from this format and read/write metadata to it."
|
||||
|
||||
- title: "New option to control how automerge handles duplicate formats when adding books to your calibre library. See Preferences->Adding books"
|
||||
|
||||
- title: "Driver for Nokia X6, Nexus S, WiBook, MyTouch 4G and Huawei Ideos S7"
|
||||
|
||||
- title: "Nicer interface for editing tweaks"
|
||||
|
||||
- title: "Add tweak to remove yellow lines from edges of book list"
|
||||
|
||||
- title: "Completion: Restore adding of comma at end after completion for tags type fields. Add a tweak to control if an & is added after completion for author type fields"
|
||||
|
||||
- title: "Turn search as you type off by default for searching the book list. You can turn it on by clicking the preferences button next to the search bar."
|
||||
|
||||
- title: "TXT Input: Add option to remove indents and fix bug where spaces were not retained properly."
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a regression in 0.7.44 that could cause setting authors to fail in windows when the author name is very long"
|
||||
tickets: [8797]
|
||||
|
||||
- title: "E-book viewer: Fix bug that could cause the bottom of chapters to get cut-off if the topmost element had a large top margin."
|
||||
tickets: [8791]
|
||||
|
||||
- title: "Fix regression that caused a spurious error message after moving a library. Also ensure that the entries in the Copy to Library menu are updated after a library is moved/renamed/deleted."
|
||||
tickets: [8905]
|
||||
|
||||
- title: "PML Input: New handling of t and T tags. T's that do not start the line are ignored. t's that start and end the line use a margin for the text block"
|
||||
|
||||
- title: "News download: Remove all invalid ASCII control characters from article descriptions as they cause XML parsing to fail"
|
||||
|
||||
- title: "MOBI Output: Fix bug that was discarding non breaking spaces at the start of a paragraph when they were followed immediately by a tag."
|
||||
tickets: [4887]
|
||||
|
||||
- title: "LIT Input: Fix a regression in handling LIT files that contain txt rather than html data"
|
||||
tickets: [8904]
|
||||
|
||||
- title: "Fix bug in search box in the plugins dialog"
|
||||
tickets: [8882]
|
||||
|
||||
- title: "Fix renaming of categories via the Tag Browser"
|
||||
tickets: [8807]
|
||||
|
||||
- title: "Content server: Do not send mobile version to iPad"
|
||||
tickets: [8820]
|
||||
|
||||
- title: "Fix undefined publication date appearing in book jacket as 101"
|
||||
tickets: [8799]
|
||||
|
||||
- title: "Heuristics: Fix issue with invalid markup from italicize patterns."
|
||||
|
||||
- title: "TXT Input: De-hyphenate textile and markdown inpu as well. Fix inline toc not showing all items."
|
||||
|
||||
- title: "RTF Input: More encoding token splitting fixes."
|
||||
|
||||
- title: "Fix regression that broke the convenience Email to xxx entry in the connect share menu."
|
||||
tickets: [8775]
|
||||
|
||||
- title: "Fix editing of series type custom columns in the book list."
|
||||
tickets: [8765]
|
||||
|
||||
improved recipes:
|
||||
- El periodico de Aragon
|
||||
- B92
|
||||
- French Belgian news sources
|
||||
|
||||
new recipes:
|
||||
- title: "ABC.es"
|
||||
author: "Ricardo Jurado"
|
||||
|
||||
- title: "Korespondent and Kopalnia Wiedzy"
|
||||
author: "Attis"
|
||||
|
||||
- title: "Radio Prague"
|
||||
author: "Francois Pellicaan"
|
||||
|
||||
- title: "Europa Press"
|
||||
author: "Luis Hernandez"
|
||||
|
||||
- title: "Interoperability Happens and njuz.net"
|
||||
author: "Darko Miletic"
|
||||
|
||||
- title: "Weblogs SL"
|
||||
author: "desUBIKado"
|
||||
|
||||
- title: "Kompas and Jakarta Post"
|
||||
author: "Adrian Gunawan"
|
||||
|
||||
- version: 0.7.44
|
||||
date: 2011-02-04
|
||||
|
||||
|
69
format_docs/pdb/apnx.txt
Normal file
69
format_docs/pdb/apnx.txt
Normal file
@ -0,0 +1,69 @@
|
||||
APNX
|
||||
----
|
||||
|
||||
apnx files are used by the Amazon Kindle (firmware revision 3.1+) to
|
||||
map pages from a print book to the Kindle version. Integers within
|
||||
the file are big-endian.
|
||||
|
||||
|
||||
Layout
|
||||
------
|
||||
|
||||
bytes content comments
|
||||
|
||||
4 00010001 Format identifier. Value of 65537 little-endian.
|
||||
4 start of next The offset after ending location of the first header.
|
||||
Starts a new sequence of header info
|
||||
4 length Length of first header
|
||||
N first header String containing content header
|
||||
Starts next sequence
|
||||
2 unknown Always 1
|
||||
2 length Length of second header
|
||||
2 page count Total number of bytes after second header that
|
||||
represent pages. This total includes bytes that
|
||||
are ignored by the pageMap.
|
||||
2 unknown Always 32
|
||||
N second header String containing the page mapping header
|
||||
4*N padding The first number given in the page mapping header indicates the number of 0 bytes.
|
||||
4*N page list
|
||||
|
||||
|
||||
Content Header
|
||||
--------------
|
||||
|
||||
The content header is a string enclosed in {} containing key, value pairs.
|
||||
|
||||
content comments
|
||||
|
||||
contentGuid Guid.
|
||||
asin Amazon identifier for the Kindle version of the book.
|
||||
cdeType MOBI cdeType. Should always be EBOK for ebooks.
|
||||
fileRevisionId Revision of this file.
|
||||
|
||||
Example:
|
||||
{"contentGuid":"d8c14b0","asin":"B000JML5VM","cdeType":"EBOK","fileRevisionId":"1296874359405"}
|
||||
|
||||
|
||||
Page Mapping Header
|
||||
-------------------
|
||||
|
||||
The page mapping header is a string enclosed in {} containing key, value pairs.
|
||||
|
||||
content comments
|
||||
|
||||
asin The ISBN 10 for the paper book the pages correspond to
|
||||
pageMap Three value tuple. Looks like: "(N,N,N)"
|
||||
1) Number of bytes after header that starts the page numbering sequence
|
||||
2) unknown
|
||||
3) unknown
|
||||
|
||||
Example:
|
||||
{"asin":"1906694184","pageMap":"(4,a,1)"}
|
||||
|
||||
|
||||
Page List
|
||||
---------
|
||||
|
||||
The page list is a sequence of offsets in the uncompressed HTML. Each
|
||||
value is the beginning of a new page. Each entry is a 4 byte big endian
|
||||
int. The list is ordered lowest to highest.
|
@ -126,12 +126,14 @@ sort_columns_at_startup = None
|
||||
gui_pubdate_display_format = 'MMM yyyy'
|
||||
gui_timestamp_display_format = 'dd MMM yyyy'
|
||||
|
||||
#: Control sorting of titles and series in the display
|
||||
# Control title and series sorting in the library view.
|
||||
# If set to 'library_order', Leading articles such as The and A will be ignored.
|
||||
# If set to 'strictly_alphabetic', the titles will be sorted without processing
|
||||
# For example, with library_order, The Client will sort under 'C'. With
|
||||
# strictly_alphabetic, the book will sort under 'T'.
|
||||
#: Control sorting of titles and series in the library display
|
||||
# Control title and series sorting in the library view. If set to
|
||||
# 'library_order', the title sort field will be used instead of the title.
|
||||
# Unless you have manually edited the title sort field, leading articles such as
|
||||
# The and A will be ignored. If set to 'strictly_alphabetic', the titles will be
|
||||
# sorted as-is (sort by title instead of title sort). For example, with
|
||||
# library_order, The Client will sort under 'C'. With strictly_alphabetic, the
|
||||
# book will sort under 'T'.
|
||||
# This flag affects Calibre's library display. It has no effect on devices. In
|
||||
# addition, titles for books added before changing the flag will retain their
|
||||
# order until the title is edited. Double-clicking on a title and hitting return
|
||||
@ -140,11 +142,15 @@ title_series_sorting = 'library_order'
|
||||
|
||||
#: Control formatting of title and series when used in templates
|
||||
# Control how title and series names are formatted when saving to disk/sending
|
||||
# to device. If set to library_order, leading articles such as The and A will
|
||||
# be put at the end
|
||||
# If set to 'strictly_alphabetic', the titles will be sorted without processing
|
||||
# For example, with library_order, "The Client" will become "Client, The". With
|
||||
# strictly_alphabetic, it would remain "The Client".
|
||||
# to device. The behavior depends on the field being processed. If processing
|
||||
# title, then if this tweak is set to 'library_order', the title will be
|
||||
# replaced with title_sort. If it is set to 'strictly_alphabetic', then the
|
||||
# title will not be changed. If processing series, then if set to
|
||||
# 'library_order', articles such as 'The' and 'An' will be moved to the end. If
|
||||
# set to 'strictly_alphabetic', the series will be sent without change.
|
||||
# For example, if the tweak is set to library_order, "The Lord of the Rings"
|
||||
# will become "Lord of the Rings, The". If the tweak is set to
|
||||
# strictly_alphabetic, it would remain "The Lord of the Rings".
|
||||
save_template_title_series_sorting = 'library_order'
|
||||
|
||||
#: Set the list of words considered to be "articles" for sort strings
|
||||
|
68
resources/recipes/abc_es.recipe
Normal file
68
resources/recipes/abc_es.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Ricardo Jurado'
|
||||
__copyright__ = 'Ricardo Jurado'
|
||||
__version__ = 'v0.4'
|
||||
__date__ = '11 February 2011'
|
||||
|
||||
'''
|
||||
http://www.abc.es/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
||||
|
||||
title = u'ABC.es'
|
||||
masthead_url = 'http://www.abc.es/img/logo-abc.gif'
|
||||
cover_url = 'http://www.abc.es/img/logo-abc.gif'
|
||||
publisher = u'Grupo VOCENTO'
|
||||
|
||||
__author__ = 'Ricardo Jurado'
|
||||
description = 'Noticias de Spain y el mundo'
|
||||
category = 'News,Spain,National,International,Economy'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 10
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'ISO-8859-1'
|
||||
remove_javascript = True
|
||||
language = 'es'
|
||||
|
||||
extra_css = """
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
|
||||
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
"""
|
||||
|
||||
keep_only_tags = [
|
||||
# dict(name='h2', attrs={'class':['logos']}),
|
||||
dict(name='h3', attrs={'class':['overhead']}),
|
||||
dict(name='h1', attrs={'class':'headline'}),
|
||||
dict(name='h3', attrs={'class':['subhead']}),
|
||||
dict(name='div', attrs={'class':'datosi'}),
|
||||
dict(name='div', attrs={'class':'photo-alt1'}),
|
||||
dict(name='div', attrs={'class':'text'})
|
||||
]
|
||||
|
||||
# remove_tags_before = dict(name='div' , attrs={'id':['cabecera2']})
|
||||
|
||||
feeds = [
|
||||
(u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml')
|
||||
,(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml')
|
||||
,(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml')
|
||||
,(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml')
|
||||
,(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml')
|
||||
,(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml')
|
||||
,(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml')
|
||||
,(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml')
|
||||
,(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml')
|
||||
,(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml')
|
||||
,(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml')
|
||||
,(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml')
|
||||
,(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml')
|
||||
,(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml')
|
||||
,(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
|
||||
]
|
@ -5,8 +5,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '04 December 2010, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__description__ = 'Daily newspaper from Aragon'
|
||||
__version__ = 'v0.05'
|
||||
__date__ = '07, December 2010'
|
||||
__version__ = 'v0.07'
|
||||
__date__ = '06, February 2011'
|
||||
'''
|
||||
elperiodicodearagon.com
|
||||
'''
|
||||
@ -38,22 +38,26 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
feeds = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
||||
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
||||
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
||||
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
||||
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
||||
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
||||
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
||||
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
||||
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
||||
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
|
||||
feeds = [
|
||||
(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
||||
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
||||
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
||||
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
||||
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
||||
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
||||
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
||||
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
||||
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
||||
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||
h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
|
||||
h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
|
||||
.columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||
img{margin-bottom: 0.4em}
|
||||
'''
|
||||
|
||||
remove_attributes = ['height','width']
|
||||
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
dict(name='a', attrs={'class':'AvisoComentario'}),
|
||||
dict(name='div', attrs={'class':'CajaAvisoComentario'}),
|
||||
dict(name='div', attrs={'class':'navegaNoticias'}),
|
||||
dict(name='div', attrs={'class':'Mensaje'}),
|
||||
dict(name='div', attrs={'id':'PaginadorDiCom'}),
|
||||
dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
|
||||
dict(name='div', attrs={'id':'CintilloComentario'}),
|
||||
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
|
||||
]
|
||||
|
||||
# Para sustituir el video incrustado de YouTube por una imagen
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||
if video_yt:
|
||||
video_yt.name = 'img'
|
||||
fuente = video_yt['src']
|
||||
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||
video_yt['src'] = fuente2 + '/0.jpg'
|
||||
|
||||
return soup
|
||||
|
@ -182,6 +182,10 @@ class NYTimes(BasicNewsRecipe):
|
||||
'mediaOverlay slideshow',
|
||||
'headlinesOnly multiline flush',
|
||||
'wideThumb',
|
||||
'video', #added 02-11-2011
|
||||
'videoHeader',#added 02-11-2011
|
||||
'articleInlineVideoHolder', #added 02-11-2011
|
||||
'assetCompanionAd',
|
||||
re.compile('^subNavigation'),
|
||||
re.compile('^leaderboard'),
|
||||
re.compile('^module'),
|
||||
@ -664,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
try:
|
||||
#remove "Related content" bar
|
||||
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
|
||||
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ']})
|
||||
if runAroundsFound:
|
||||
for runAround in runAroundsFound:
|
||||
#find all section headers
|
||||
@ -672,6 +676,12 @@ class NYTimes(BasicNewsRecipe):
|
||||
if hlines:
|
||||
for hline in hlines:
|
||||
hline.extract()
|
||||
|
||||
#find all section headers
|
||||
hlines = runAround.findAll('h6')
|
||||
if hlines:
|
||||
for hline in hlines:
|
||||
hline.extract()
|
||||
except:
|
||||
self.log("Error removing related content bar")
|
||||
|
||||
|
33
resources/recipes/tedneward.recipe
Normal file
33
resources/recipes/tedneward.recipe
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blogs.tedneward.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class InteroperabilityHappens(BasicNewsRecipe):
|
||||
title = 'Interoperability Happens'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Tech blog by Ted Neward'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'blog, technology, microsoft, programming, C#, Java'
|
||||
, 'publisher': 'Ted Neward'
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
|
||||
|
104
resources/recipes/weblogs_sl.recipe
Normal file
104
resources/recipes/weblogs_sl.recipe
Normal file
@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '4 February 2011, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__version__ = 'v0.05'
|
||||
__date__ = '9, February 2011'
|
||||
'''
|
||||
http://www.weblogssl.com/
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class weblogssl(BasicNewsRecipe):
|
||||
__author__ = 'desUBIKado'
|
||||
description = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
|
||||
title = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
|
||||
publisher = 'Weblogs SL'
|
||||
category = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1.5
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
# Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
|
||||
# un caracter # por delante, es decir, # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||
# haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
|
||||
|
||||
feeds = [
|
||||
(u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
|
||||
(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
|
||||
(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
|
||||
(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
|
||||
(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
|
||||
(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
|
||||
(u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
|
||||
(u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||
(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
|
||||
(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
|
||||
(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
|
||||
(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
|
||||
(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
|
||||
(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
|
||||
(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
|
||||
(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
|
||||
(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
|
||||
(u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
|
||||
(u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
|
||||
(u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
|
||||
(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
|
||||
(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
|
||||
(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
|
||||
(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
|
||||
(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
|
||||
(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
|
||||
(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
|
||||
(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
|
||||
(u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
|
||||
(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
|
||||
(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
|
||||
(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
|
||||
(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
|
||||
(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
|
||||
(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
|
||||
(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
|
||||
(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
|
||||
(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
|
||||
(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
|
||||
(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
|
||||
dict(name='div', attrs={'class':'post'}),
|
||||
dict(name='div', attrs={'id':'blog-comments'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.', 'http://m.')
|
||||
|
||||
preprocess_regexps = [
|
||||
# Para poner una linea en blanco entre un comentario y el siguiente
|
||||
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
|
||||
]
|
||||
|
||||
# Para sustituir el video incrustado de YouTube por una imagen
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||
if video_yt:
|
||||
video_yt.name = 'img'
|
||||
fuente = video_yt['src']
|
||||
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||
fuente3 = fuente2.replace('?rel=0','')
|
||||
video_yt['src'] = fuente3 + '/0.jpg'
|
||||
|
||||
return soup
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.44'
|
||||
__version__ = '0.7.45'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
|
@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
|
||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
||||
LibraryThing
|
||||
KentDistrictLibrary
|
||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
||||
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
||||
@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||
|
||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
||||
LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
|
||||
NiceBooksCovers]
|
||||
plugins += [
|
||||
|
@ -83,7 +83,7 @@ class ANDROID(USBMS):
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT']
|
||||
|
68
src/calibre/devices/kindle/apnx.py
Normal file
68
src/calibre/devices/kindle/apnx.py
Normal file
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, John Schember <john at nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Generates and writes an APNX page mapping file.
|
||||
'''
|
||||
|
||||
import struct
|
||||
import uuid
|
||||
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
|
||||
class APNXBuilder(object):
|
||||
'''
|
||||
Currently uses the Adobe 1024 byte count equal one page formula.
|
||||
'''
|
||||
|
||||
def write_apnx(self, mobi_file_path, apnx_path):
|
||||
with open(mobi_file_path, 'rb') as mf:
|
||||
phead = PdbHeaderReader(mf)
|
||||
r0 = phead.section_data(0)
|
||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
||||
|
||||
pages = self.get_pages(text_length)
|
||||
apnx = self.generate_apnx(pages)
|
||||
|
||||
with open(apnx_path, 'wb') as apnxf:
|
||||
apnxf.write(apnx)
|
||||
|
||||
def generate_apnx(self, pages):
|
||||
apnx = ''
|
||||
|
||||
content_vals = {
|
||||
'guid': str(uuid.uuid4()).replace('-', '')[:8],
|
||||
'isbn': '',
|
||||
}
|
||||
|
||||
content_header = '{"contentGuid":"%(guid)s","asin":"%(isbn)s","cdeType":"EBOK","fileRevisionId":"1"}' % content_vals
|
||||
page_header = '{"asin":"%(isbn)s","pageMap":"(1,a,1)"}' % content_vals
|
||||
|
||||
apnx += struct.pack('>I', 65537)
|
||||
apnx += struct.pack('>I', 12 + len(content_header))
|
||||
apnx += struct.pack('>I', len(content_header))
|
||||
apnx += content_header
|
||||
apnx += struct.pack('>H', 1)
|
||||
apnx += struct.pack('>H', len(page_header))
|
||||
apnx += struct.pack('>H', len(pages))
|
||||
apnx += struct.pack('>H', 32)
|
||||
apnx += page_header
|
||||
|
||||
# write page values to apnx
|
||||
for page in pages:
|
||||
apnx += struct.pack('>L', page)
|
||||
|
||||
return apnx
|
||||
|
||||
def get_pages(self, text_length):
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += 1024
|
||||
|
||||
return pages
|
315
src/calibre/devices/kindle/bookmark.py
Normal file
315
src/calibre/devices/kindle/bookmark.py
Normal file
@ -0,0 +1,315 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from struct import unpack
|
||||
|
||||
class Bookmark(): # {{{
|
||||
'''
|
||||
A simple class fetching bookmark data
|
||||
Kindle-specific
|
||||
'''
|
||||
def __init__(self, path, id, book_format, bookmark_extension):
|
||||
self.book_format = book_format
|
||||
self.bookmark_extension = bookmark_extension
|
||||
self.book_length = 0
|
||||
self.id = id
|
||||
self.last_read = 0
|
||||
self.last_read_location = 0
|
||||
self.path = path
|
||||
self.timestamp = 0
|
||||
self.user_notes = None
|
||||
|
||||
self.get_bookmark_data()
|
||||
self.get_book_length()
|
||||
try:
|
||||
self.percent_read = min(float(100*self.last_read / self.book_length),100)
|
||||
except:
|
||||
self.percent_read = 0
|
||||
|
||||
def record(self, n):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
if n >= self.nrecs:
|
||||
raise ValueError('non-existent record %r' % n)
|
||||
offoff = 78 + (8 * n)
|
||||
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
|
||||
stop = None
|
||||
if n < (self.nrecs - 1):
|
||||
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
|
||||
return StreamSlicer(self.stream, start, stop)
|
||||
|
||||
def get_bookmark_data(self):
|
||||
''' Return the timestamp and last_read_location '''
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
user_notes = {}
|
||||
if self.bookmark_extension == 'mbp':
|
||||
MAGIC_MOBI_CONSTANT = 150
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.timestamp, = unpack('>I', data[0x24:0x28])
|
||||
bpar_offset, = unpack('>I', data[0x4e:0x52])
|
||||
lrlo = bpar_offset + 0x0c
|
||||
self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
|
||||
entries, = unpack('>I', data[0x4a:0x4e])
|
||||
|
||||
# Store the annotations/locations
|
||||
bpl = bpar_offset + 4
|
||||
bpar_len, = unpack('>I', data[bpl:bpl+4])
|
||||
bpar_len += 8
|
||||
#print "bpar_len: 0x%x" % bpar_len
|
||||
eo = bpar_offset + bpar_len
|
||||
|
||||
# Walk bookmark entries
|
||||
#print " --- %s --- " % self.path
|
||||
current_entry = 1
|
||||
sig = data[eo:eo+4]
|
||||
previous_block = None
|
||||
|
||||
while sig == 'DATA':
|
||||
text = None
|
||||
entry_type = None
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
if rec_len == 0:
|
||||
current_block = "empty_data"
|
||||
elif data[eo+8:eo+12] == "EBAR":
|
||||
current_block = "data_header"
|
||||
#entry_type = "data_header"
|
||||
location, = unpack('>I', data[eo+0x34:eo+0x38])
|
||||
#print "data_header location: %d" % location
|
||||
else:
|
||||
current_block = "text_block"
|
||||
if previous_block == 'empty_data':
|
||||
entry_type = 'Note'
|
||||
elif previous_block == 'data_header':
|
||||
entry_type = 'Highlight'
|
||||
text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
|
||||
|
||||
if entry_type:
|
||||
displayed_location = location/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=entry_type,
|
||||
text=text)
|
||||
|
||||
eo += rec_len + 8
|
||||
current_entry += 1
|
||||
previous_block = current_block
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
while sig == 'BKMK':
|
||||
# Fix start location for Highlights using BKMK data
|
||||
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
|
||||
|
||||
if end_loc in user_notes and \
|
||||
(user_notes[end_loc]['type'] == 'Highlight' or \
|
||||
user_notes[end_loc]['type'] == 'Note'):
|
||||
# Switch location to start (0x08:0x0c)
|
||||
start, = unpack('>I', data[eo+8:eo+12])
|
||||
user_notes[start] = user_notes[end_loc]
|
||||
'''
|
||||
print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
|
||||
end_loc,
|
||||
end_loc/MAGIC_MOBI_CONSTANT + 1,
|
||||
start,
|
||||
start//MAGIC_MOBI_CONSTANT + 1)
|
||||
'''
|
||||
user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes.pop(end_loc)
|
||||
else:
|
||||
# If a bookmark coincides with a user annotation, the locs could
|
||||
# be the same - cheat by nudging -1
|
||||
# Skip bookmark for last_read_location
|
||||
if end_loc != self.last_read:
|
||||
# print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
|
||||
displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[end_loc - 1] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type='Bookmark',
|
||||
text=None)
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
eo += rec_len + 8
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
elif self.bookmark_extension == 'tan':
|
||||
from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
|
||||
|
||||
def get_topaz_highlight(displayed_location):
|
||||
# Parse My Clippings.txt for a matching highlight
|
||||
# Search looks for book title match, highlight match, and location match
|
||||
# Author is not matched
|
||||
# This will find the first instance of a clipping only
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
with open(book_fs,'rb') as f2:
|
||||
stream = StringIO(f2.read())
|
||||
mi = get_topaz_metadata(stream)
|
||||
my_clippings = self.path
|
||||
split = my_clippings.find('documents') + len('documents/')
|
||||
my_clippings = my_clippings[:split] + "My Clippings.txt"
|
||||
try:
|
||||
with open(my_clippings, 'r') as f2:
|
||||
marker_found = 0
|
||||
text = ''
|
||||
search_str1 = '%s' % (mi.title)
|
||||
search_str2 = '- Highlight Loc. %d' % (displayed_location)
|
||||
for line in f2:
|
||||
if marker_found == 0:
|
||||
if line.startswith(search_str1):
|
||||
marker_found = 1
|
||||
elif marker_found == 1:
|
||||
if line.startswith(search_str2):
|
||||
marker_found = 2
|
||||
elif marker_found == 2:
|
||||
if line.startswith('=========='):
|
||||
break
|
||||
text += line.strip()
|
||||
else:
|
||||
raise Exception('error')
|
||||
except:
|
||||
text = '(Unable to extract highlight text from My Clippings.txt)'
|
||||
return text
|
||||
|
||||
MAGIC_TOPAZ_CONSTANT = 33.33
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
while current_entry < entries:
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
for location in user_notes:
|
||||
if location == self.last_read:
|
||||
user_notes.pop(location)
|
||||
break
|
||||
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
self.pdf_page_offset = 0
|
||||
while current_entry < entries:
|
||||
'''
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
if self.book_format in ['tpz','azw1']:
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
elif self.book_format == 'pdf':
|
||||
# *** This needs implementation
|
||||
displayed_location = location
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
'''
|
||||
# Use label as page number
|
||||
pdf_location, = unpack('>I', data[e_base+1:e_base+5])
|
||||
label_len, = unpack('>H', data[e_base+5:e_base+7])
|
||||
location = int(data[e_base+7:e_base+7+label_len])
|
||||
displayed_location = location
|
||||
e_type = 'Bookmark'
|
||||
text = None
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
self.pdf_page_offset = pdf_location - location
|
||||
e_base += (7 + label_len)
|
||||
current_entry += 1
|
||||
|
||||
self.last_read_location = self.last_read - self.pdf_page_offset
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
self.user_notes = user_notes
|
||||
|
||||
def get_book_length(self):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
|
||||
self.book_length = 0
|
||||
if self.bookmark_extension == 'mbp':
|
||||
# Read the book len from the header
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
self.stream = StringIO(f.read())
|
||||
self.data = StreamSlicer(self.stream)
|
||||
self.nrecs, = unpack('>H', self.data[76:78])
|
||||
record0 = self.record(0)
|
||||
self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'tan':
|
||||
# Read bookLength from metadata
|
||||
from calibre.ebooks.metadata.topaz import MetadataUpdater
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
mu = MetadataUpdater(f)
|
||||
self.book_length = mu.book_length
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
from calibre import plugins
|
||||
try:
|
||||
self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
|
||||
except:
|
||||
pass
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
|
||||
# }}}
|
@ -7,10 +7,11 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Device driver for Amazon's Kindle
|
||||
'''
|
||||
import datetime, os, re, sys, json, hashlib
|
||||
from cStringIO import StringIO
|
||||
from struct import unpack
|
||||
|
||||
import datetime, os, re, sys, json, hashlib
|
||||
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
|
||||
'''
|
||||
@ -170,6 +171,8 @@ class KINDLE2(KINDLE):
|
||||
description = _('Communicate with the Kindle 2/3 eBook reader.')
|
||||
|
||||
FORMATS = KINDLE.FORMATS + ['pdf']
|
||||
DELETE_EXTS = KINDLE.DELETE_EXTS + ['.apnx']
|
||||
|
||||
PRODUCT_ID = [0x0002, 0x0004]
|
||||
BCD = [0x0100]
|
||||
|
||||
@ -205,6 +208,23 @@ class KINDLE2(KINDLE):
|
||||
if h in path_map:
|
||||
book.device_collections = list(sorted(path_map[h]))
|
||||
|
||||
def upload_cover(self, path, filename, metadata, filepath):
|
||||
'''
|
||||
Hijacking this function to write the apnx file.
|
||||
'''
|
||||
if not filepath.lower().endswith('.mobi'):
|
||||
return
|
||||
|
||||
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
||||
apnx_builder = APNXBuilder()
|
||||
try:
|
||||
apnx_builder.write_apnx(filepath, apnx_path)
|
||||
except:
|
||||
print 'Failed to generate APNX'
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class KINDLE_DX(KINDLE2):
|
||||
|
||||
name = 'Kindle DX Device Interface'
|
||||
@ -214,310 +234,3 @@ class KINDLE_DX(KINDLE2):
|
||||
PRODUCT_ID = [0x0003]
|
||||
BCD = [0x0100]
|
||||
|
||||
class Bookmark(): # {{{
|
||||
'''
|
||||
A simple class fetching bookmark data
|
||||
Kindle-specific
|
||||
'''
|
||||
def __init__(self, path, id, book_format, bookmark_extension):
|
||||
self.book_format = book_format
|
||||
self.bookmark_extension = bookmark_extension
|
||||
self.book_length = 0
|
||||
self.id = id
|
||||
self.last_read = 0
|
||||
self.last_read_location = 0
|
||||
self.path = path
|
||||
self.timestamp = 0
|
||||
self.user_notes = None
|
||||
|
||||
self.get_bookmark_data()
|
||||
self.get_book_length()
|
||||
try:
|
||||
self.percent_read = min(float(100*self.last_read / self.book_length),100)
|
||||
except:
|
||||
self.percent_read = 0
|
||||
|
||||
def record(self, n):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
if n >= self.nrecs:
|
||||
raise ValueError('non-existent record %r' % n)
|
||||
offoff = 78 + (8 * n)
|
||||
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
|
||||
stop = None
|
||||
if n < (self.nrecs - 1):
|
||||
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
|
||||
return StreamSlicer(self.stream, start, stop)
|
||||
|
||||
def get_bookmark_data(self):
|
||||
''' Return the timestamp and last_read_location '''
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
user_notes = {}
|
||||
if self.bookmark_extension == 'mbp':
|
||||
MAGIC_MOBI_CONSTANT = 150
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.timestamp, = unpack('>I', data[0x24:0x28])
|
||||
bpar_offset, = unpack('>I', data[0x4e:0x52])
|
||||
lrlo = bpar_offset + 0x0c
|
||||
self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
|
||||
entries, = unpack('>I', data[0x4a:0x4e])
|
||||
|
||||
# Store the annotations/locations
|
||||
bpl = bpar_offset + 4
|
||||
bpar_len, = unpack('>I', data[bpl:bpl+4])
|
||||
bpar_len += 8
|
||||
#print "bpar_len: 0x%x" % bpar_len
|
||||
eo = bpar_offset + bpar_len
|
||||
|
||||
# Walk bookmark entries
|
||||
#print " --- %s --- " % self.path
|
||||
current_entry = 1
|
||||
sig = data[eo:eo+4]
|
||||
previous_block = None
|
||||
|
||||
while sig == 'DATA':
|
||||
text = None
|
||||
entry_type = None
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
if rec_len == 0:
|
||||
current_block = "empty_data"
|
||||
elif data[eo+8:eo+12] == "EBAR":
|
||||
current_block = "data_header"
|
||||
#entry_type = "data_header"
|
||||
location, = unpack('>I', data[eo+0x34:eo+0x38])
|
||||
#print "data_header location: %d" % location
|
||||
else:
|
||||
current_block = "text_block"
|
||||
if previous_block == 'empty_data':
|
||||
entry_type = 'Note'
|
||||
elif previous_block == 'data_header':
|
||||
entry_type = 'Highlight'
|
||||
text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
|
||||
|
||||
if entry_type:
|
||||
displayed_location = location/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=entry_type,
|
||||
text=text)
|
||||
|
||||
eo += rec_len + 8
|
||||
current_entry += 1
|
||||
previous_block = current_block
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
while sig == 'BKMK':
|
||||
# Fix start location for Highlights using BKMK data
|
||||
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
|
||||
|
||||
if end_loc in user_notes and \
|
||||
(user_notes[end_loc]['type'] == 'Highlight' or \
|
||||
user_notes[end_loc]['type'] == 'Note'):
|
||||
# Switch location to start (0x08:0x0c)
|
||||
start, = unpack('>I', data[eo+8:eo+12])
|
||||
user_notes[start] = user_notes[end_loc]
|
||||
'''
|
||||
print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
|
||||
end_loc,
|
||||
end_loc/MAGIC_MOBI_CONSTANT + 1,
|
||||
start,
|
||||
start//MAGIC_MOBI_CONSTANT + 1)
|
||||
'''
|
||||
user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes.pop(end_loc)
|
||||
else:
|
||||
# If a bookmark coincides with a user annotation, the locs could
|
||||
# be the same - cheat by nudging -1
|
||||
# Skip bookmark for last_read_location
|
||||
if end_loc != self.last_read:
|
||||
# print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
|
||||
displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[end_loc - 1] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type='Bookmark',
|
||||
text=None)
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
eo += rec_len + 8
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
elif self.bookmark_extension == 'tan':
|
||||
from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
|
||||
|
||||
def get_topaz_highlight(displayed_location):
|
||||
# Parse My Clippings.txt for a matching highlight
|
||||
# Search looks for book title match, highlight match, and location match
|
||||
# Author is not matched
|
||||
# This will find the first instance of a clipping only
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
with open(book_fs,'rb') as f2:
|
||||
stream = StringIO(f2.read())
|
||||
mi = get_topaz_metadata(stream)
|
||||
my_clippings = self.path
|
||||
split = my_clippings.find('documents') + len('documents/')
|
||||
my_clippings = my_clippings[:split] + "My Clippings.txt"
|
||||
try:
|
||||
with open(my_clippings, 'r') as f2:
|
||||
marker_found = 0
|
||||
text = ''
|
||||
search_str1 = '%s' % (mi.title)
|
||||
search_str2 = '- Highlight Loc. %d' % (displayed_location)
|
||||
for line in f2:
|
||||
if marker_found == 0:
|
||||
if line.startswith(search_str1):
|
||||
marker_found = 1
|
||||
elif marker_found == 1:
|
||||
if line.startswith(search_str2):
|
||||
marker_found = 2
|
||||
elif marker_found == 2:
|
||||
if line.startswith('=========='):
|
||||
break
|
||||
text += line.strip()
|
||||
else:
|
||||
raise Exception('error')
|
||||
except:
|
||||
text = '(Unable to extract highlight text from My Clippings.txt)'
|
||||
return text
|
||||
|
||||
MAGIC_TOPAZ_CONSTANT = 33.33
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
while current_entry < entries:
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
for location in user_notes:
|
||||
if location == self.last_read:
|
||||
user_notes.pop(location)
|
||||
break
|
||||
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
self.pdf_page_offset = 0
|
||||
while current_entry < entries:
|
||||
'''
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
if self.book_format in ['tpz','azw1']:
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
elif self.book_format == 'pdf':
|
||||
# *** This needs implementation
|
||||
displayed_location = location
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
'''
|
||||
# Use label as page number
|
||||
pdf_location, = unpack('>I', data[e_base+1:e_base+5])
|
||||
label_len, = unpack('>H', data[e_base+5:e_base+7])
|
||||
location = int(data[e_base+7:e_base+7+label_len])
|
||||
displayed_location = location
|
||||
e_type = 'Bookmark'
|
||||
text = None
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
self.pdf_page_offset = pdf_location - location
|
||||
e_base += (7 + label_len)
|
||||
current_entry += 1
|
||||
|
||||
self.last_read_location = self.last_read - self.pdf_page_offset
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
self.user_notes = user_notes
|
||||
|
||||
def get_book_length(self):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
|
||||
self.book_length = 0
|
||||
if self.bookmark_extension == 'mbp':
|
||||
# Read the book len from the header
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
self.stream = StringIO(f.read())
|
||||
self.data = StreamSlicer(self.stream)
|
||||
self.nrecs, = unpack('>H', self.data[76:78])
|
||||
record0 = self.record(0)
|
||||
self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'tan':
|
||||
# Read bookLength from metadata
|
||||
from calibre.ebooks.metadata.topaz import MetadataUpdater
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
mu = MetadataUpdater(f)
|
||||
self.book_length = mu.book_length
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
from calibre import plugins
|
||||
try:
|
||||
self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
|
||||
except:
|
||||
pass
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -76,11 +76,11 @@ class E52(USBMS):
|
||||
supported_platforms = ['windows', 'linux', 'osx']
|
||||
|
||||
VENDOR_ID = [0x421]
|
||||
PRODUCT_ID = [0x1CD]
|
||||
PRODUCT_ID = [0x1CD, 0x273]
|
||||
BCD = [0x100]
|
||||
|
||||
|
||||
FORMATS = ['mobi', 'prc']
|
||||
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'txt']
|
||||
|
||||
EBOOK_DIR_MAIN = 'eBooks'
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
@ -216,21 +216,22 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||
|
||||
from calibre.ebooks.epub import initialize_container
|
||||
epub = initialize_container(output_path, os.path.basename(opf),
|
||||
extra_entries=extra_entries)
|
||||
epub.add_dir(tdir)
|
||||
if encryption is not None:
|
||||
epub.writestr('META-INF/encryption.xml', encryption)
|
||||
if metadata_xml is not None:
|
||||
epub.writestr('META-INF/metadata.xml',
|
||||
metadata_xml.encode('utf-8'))
|
||||
with initialize_container(output_path, os.path.basename(opf),
|
||||
extra_entries=extra_entries) as epub:
|
||||
epub.add_dir(tdir)
|
||||
if encryption is not None:
|
||||
epub.writestr('META-INF/encryption.xml', encryption)
|
||||
if metadata_xml is not None:
|
||||
epub.writestr('META-INF/metadata.xml',
|
||||
metadata_xml.encode('utf-8'))
|
||||
if opts.extract_to is not None:
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
if os.path.exists(opts.extract_to):
|
||||
shutil.rmtree(opts.extract_to)
|
||||
os.mkdir(opts.extract_to)
|
||||
epub.extractall(path=opts.extract_to)
|
||||
with ZipFile(output_path) as zf:
|
||||
zf.extractall(path=opts.extract_to)
|
||||
self.log.info('EPUB extracted to', opts.extract_to)
|
||||
epub.close()
|
||||
|
||||
def encrypt_fonts(self, uris, tdir, uuid): # {{{
|
||||
from binascii import unhexlify
|
||||
|
@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class LibraryThing(MetadataSource): # {{{
|
||||
class KentDistrictLibrary(MetadataSource): # {{{
|
||||
|
||||
name = 'LibraryThing'
|
||||
name = 'Kent District Library'
|
||||
metadata_type = 'social'
|
||||
description = _('Downloads series/covers/rating information from librarything.com')
|
||||
description = _('Downloads series information from ww2.kdl.org')
|
||||
|
||||
def fetch(self):
|
||||
if not self.isbn or not self.site_customization:
|
||||
if not self.title or not self.book_author:
|
||||
return
|
||||
from calibre.ebooks.metadata.library_thing import get_social_metadata
|
||||
un, _, pw = self.site_customization.partition(':')
|
||||
from calibre.ebooks.metadata.kdl import get_series
|
||||
try:
|
||||
self.results = get_social_metadata(self.title, self.book_author,
|
||||
self.publisher, self.isbn, username=un, password=pw)
|
||||
self.results = get_series(self.title, self.book_author)
|
||||
except Exception, e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
@property
|
||||
def string_customization_help(self):
|
||||
ans = _('To use librarything.com you must sign up for a %sfree account%s '
|
||||
'and enter your username and password separated by a : below.')
|
||||
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
79
src/calibre/ebooks/metadata/kdl.py
Normal file
79
src/calibre/ebooks/metadata/kdl.py
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, urllib, urlparse
|
||||
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre import browser
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
URL = \
|
||||
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
|
||||
|
||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||
|
||||
def get_series(title, authors):
|
||||
mi = Metadata(title, authors)
|
||||
if title and title[0] in _ignore_starts:
|
||||
title = title[1:]
|
||||
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
|
||||
if not title:
|
||||
return mi
|
||||
if isinstance(title, unicode):
|
||||
title = title.encode('utf-8')
|
||||
|
||||
title = urllib.quote_plus(title)
|
||||
|
||||
author = authors[0].strip()
|
||||
if not author:
|
||||
return mi
|
||||
if ',' in author:
|
||||
author = author.split(',')[0]
|
||||
else:
|
||||
author = author.split()[-1]
|
||||
|
||||
url = URL.format(author, title)
|
||||
br = browser()
|
||||
raw = br.open(url).read()
|
||||
if 'see the full results' not in raw:
|
||||
return mi
|
||||
raw = xml_to_unicode(raw)[0]
|
||||
soup = BeautifulSoup(raw)
|
||||
searcharea = soup.find('div', attrs={'class':'searcharea'})
|
||||
if searcharea is None:
|
||||
return mi
|
||||
ss = searcharea.find('div', attrs={'class':'seriessearch'})
|
||||
if ss is None:
|
||||
return mi
|
||||
a = ss.find('a', href=True)
|
||||
if a is None:
|
||||
return mi
|
||||
href = a['href'].partition('?')[-1]
|
||||
data = urlparse.parse_qs(href)
|
||||
series = data.get('SeriesName', [])
|
||||
if not series:
|
||||
return mi
|
||||
series = series[0]
|
||||
series = re.sub(r' series$', '', series).strip()
|
||||
if series:
|
||||
mi.series = series
|
||||
ns = ss.nextSibling
|
||||
if ns.contents:
|
||||
raw = unicode(ns.contents[0])
|
||||
raw = raw.partition('.')[0].strip()
|
||||
try:
|
||||
mi.series_index = int(raw)
|
||||
except:
|
||||
pass
|
||||
return mi
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
print get_series(sys.argv[-2], [sys.argv[-1]])
|
||||
|
@ -39,6 +39,13 @@ def asfloat(value):
|
||||
return 0.0
|
||||
return float(value)
|
||||
|
||||
def isspace(text):
|
||||
if not text:
|
||||
return True
|
||||
if u'\xa0' in text:
|
||||
return False
|
||||
return text.isspace()
|
||||
|
||||
class BlockState(object):
|
||||
def __init__(self, body):
|
||||
self.body = body
|
||||
@ -438,7 +445,7 @@ class MobiMLizer(object):
|
||||
if elem.text:
|
||||
if istate.preserve:
|
||||
text = elem.text
|
||||
elif len(elem) > 0 and elem.text.isspace():
|
||||
elif len(elem) > 0 and isspace(elem.text):
|
||||
text = None
|
||||
else:
|
||||
text = COLLAPSE.sub(' ', elem.text)
|
||||
@ -481,7 +488,7 @@ class MobiMLizer(object):
|
||||
if child.tail:
|
||||
if istate.preserve:
|
||||
tail = child.tail
|
||||
elif bstate.para is None and child.tail.isspace():
|
||||
elif bstate.para is None and isspace(child.tail):
|
||||
tail = None
|
||||
else:
|
||||
tail = COLLAPSE.sub(' ', child.tail)
|
||||
|
@ -70,7 +70,7 @@ class PML_HTMLizer(object):
|
||||
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
|
||||
'r': ('<div style="text-align: right;">', '</div>'),
|
||||
't': ('<div style="margin-left: 5%;">', '</div>'),
|
||||
'T': ('<div style="margin-left: %s;">', '</div>'),
|
||||
'T': ('<div style="text-indent: %s;">', '</div>'),
|
||||
'i': ('<span style="font-style: italic;">', '</span>'),
|
||||
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
||||
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
||||
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
|
||||
self.toc = []
|
||||
self.file_name = file_name
|
||||
|
||||
indent_state = {'t': False, 'T': False}
|
||||
# t: Are we in an open \t tag set?
|
||||
# T: Are we in an open \T?
|
||||
# st: Did the \t start the line?
|
||||
# sT: Did the \T start the line?
|
||||
# et: Did the \t end the line?
|
||||
indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
|
||||
basic_indent = False
|
||||
adv_indent_val = ''
|
||||
# Keep track of the number of empty lines
|
||||
# between paragraphs. When we reach a set number
|
||||
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
|
||||
for line in pml.splitlines():
|
||||
parsed = []
|
||||
empty = True
|
||||
|
||||
basic_indent = indent_state['t']
|
||||
adv_indent = indent_state['T']
|
||||
indent_state['T'] = False
|
||||
# Determine if the \t starts the line or if we are
|
||||
# in an open \t block.
|
||||
if line.lstrip().startswith('\\t') or basic_indent:
|
||||
basic_indent = True
|
||||
indent_state['st'] = True
|
||||
else:
|
||||
indent_state['st'] = False
|
||||
# Determine if the \T starts the line.
|
||||
if line.lstrip().startswith('\\T'):
|
||||
indent_state['sT'] = True
|
||||
else:
|
||||
indent_state['sT'] = False
|
||||
# Determine if the \t ends the line.
|
||||
if line.rstrip().endswith('\\t'):
|
||||
indent_state['et'] = True
|
||||
else:
|
||||
indent_state['et'] = False
|
||||
|
||||
# Must use StringIO, cStringIO does not support unicode
|
||||
line = StringIO.StringIO(line)
|
||||
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
|
||||
empty = False
|
||||
text = '<hr width="%s" />' % self.code_value(line)
|
||||
elif c == 't':
|
||||
indent_state[c] = not indent_state[c]
|
||||
if indent_state[c]:
|
||||
basic_indent = True
|
||||
indent_state['t'] = not indent_state['t']
|
||||
elif c == 'T':
|
||||
# Ensure we only store the value on the first T set for the line.
|
||||
if not indent_state['T']:
|
||||
adv_indent = True
|
||||
adv_indent_val = self.code_value(line)
|
||||
else:
|
||||
# We detected a T previously on this line.
|
||||
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
|
||||
text = self.end_line()
|
||||
parsed.append(text)
|
||||
|
||||
# Basic indent will be set if the \t starts the line or
|
||||
# if we are in a continuing \t block.
|
||||
if basic_indent:
|
||||
parsed.insert(0, self.STATES_TAGS['t'][0])
|
||||
parsed.append(self.STATES_TAGS['t'][1])
|
||||
elif adv_indent:
|
||||
# if the \t started the line and either it ended the line or the \t
|
||||
# block is still open use a left margin.
|
||||
if indent_state['st'] and (indent_state['et'] or indent_state['t']):
|
||||
parsed.insert(0, self.STATES_TAGS['t'][0])
|
||||
parsed.append(self.STATES_TAGS['t'][1])
|
||||
# Use a text indent instead of a margin.
|
||||
# This handles cases such as:
|
||||
# \tO\tne upon a time...
|
||||
else:
|
||||
parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
|
||||
parsed.append(self.STATES_TAGS['T'][1])
|
||||
# \t will override \T's on the line.
|
||||
# We only handle \T's that started the line.
|
||||
elif indent_state['T'] and indent_state['sT']:
|
||||
parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
|
||||
parsed.append(self.STATES_TAGS['T'][1])
|
||||
indent_state['T'] = False
|
||||
|
@ -237,6 +237,7 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
return
|
||||
self.stats.rename(location, newloc)
|
||||
self.build_menus()
|
||||
self.gui.iactions['Copy To Library'].build_menus()
|
||||
|
||||
def delete_requested(self, name, location):
|
||||
loc = location.replace('/', os.sep)
|
||||
@ -253,6 +254,7 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
pass
|
||||
self.stats.remove(location)
|
||||
self.build_menus()
|
||||
self.gui.iactions['Copy To Library'].build_menus()
|
||||
|
||||
def backup_status(self, location):
|
||||
dirty_text = 'no'
|
||||
@ -329,6 +331,7 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
' libraries.')%loc, show=True)
|
||||
self.stats.remove(location)
|
||||
self.build_menus()
|
||||
self.gui.iactions['Copy To Library'].build_menus()
|
||||
return
|
||||
|
||||
prefs['library_path'] = loc
|
||||
@ -371,9 +374,20 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
if not self.change_library_allowed():
|
||||
return
|
||||
from calibre.gui2.dialogs.choose_library import ChooseLibrary
|
||||
self.gui.library_view.save_state()
|
||||
db = self.gui.library_view.model().db
|
||||
c = ChooseLibrary(db, self.gui.library_moved, self.gui)
|
||||
location = self.stats.canonicalize_path(db.library_path)
|
||||
self.pre_choose_dialog_location = location
|
||||
c = ChooseLibrary(db, self.choose_library_callback, self.gui)
|
||||
c.exec_()
|
||||
self.choose_dialog_library_renamed = getattr(c, 'library_renamed', False)
|
||||
|
||||
def choose_library_callback(self, newloc, copy_structure=False):
|
||||
self.gui.library_moved(newloc, copy_structure=copy_structure)
|
||||
if getattr(self, 'choose_dialog_library_renamed', False):
|
||||
self.stats.rename(self.pre_choose_dialog_location, prefs['library_path'])
|
||||
self.build_menus()
|
||||
self.gui.iactions['Copy To Library'].build_menus()
|
||||
|
||||
def change_library_allowed(self):
|
||||
if os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH', None):
|
||||
|
@ -71,6 +71,8 @@ class ChooseLibrary(QDialog, Ui_Dialog):
|
||||
prefs['library_path'] = loc
|
||||
self.callback(loc, copy_structure=self.copy_structure.isChecked())
|
||||
else:
|
||||
self.db.prefs.disable_setting = True
|
||||
self.library_renamed = True
|
||||
move_library(self.db.library_path, loc, self.parent(),
|
||||
self.callback)
|
||||
|
||||
|
@ -60,7 +60,8 @@ class Tweak(object): # {{{
|
||||
return ans
|
||||
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.is_customized, getattr(other, 'is_customized', False))
|
||||
return -1 * cmp(self.is_customized,
|
||||
getattr(other, 'is_customized', False))
|
||||
|
||||
@property
|
||||
def is_customized(self):
|
||||
@ -111,7 +112,10 @@ class Tweaks(QAbstractListModel): # {{{
|
||||
if role == Qt.ToolTipRole:
|
||||
tt = _('This tweak has it default value')
|
||||
if tweak.is_customized:
|
||||
tt = _('This tweak has been customized')
|
||||
tt = '<p>'+_('This tweak has been customized')
|
||||
tt += '<pre>'
|
||||
for varn, val in tweak.custom_values.iteritems():
|
||||
tt += '%s = %r\n\n'%(varn, val)
|
||||
return tt
|
||||
if role == Qt.UserRole:
|
||||
return tweak
|
||||
@ -136,6 +140,7 @@ class Tweaks(QAbstractListModel): # {{{
|
||||
pos = self.read_tweak(lines, pos, dl, l)
|
||||
pos += 1
|
||||
|
||||
self.tweaks.sort()
|
||||
default_keys = set(dl.iterkeys())
|
||||
custom_keys = set(l.iterkeys())
|
||||
|
||||
@ -227,8 +232,12 @@ class PluginTweaks(QDialog): # {{{
|
||||
self.highlighter = PythonHighlighter(self.edit.document())
|
||||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
self.l.addWidget(QLabel(
|
||||
_('Add/edit tweaks for any custom plugins you have installed.')))
|
||||
self.msg = QLabel(
|
||||
_('Add/edit tweaks for any custom plugins you have installed. '
|
||||
'Documentation for these tweaks should be available '
|
||||
'on the website from where you downloaded the plugins.'))
|
||||
self.msg.setWordWrap(True)
|
||||
self.l.addWidget(self.msg)
|
||||
self.l.addWidget(self.edit)
|
||||
self.edit.setPlainText(raw)
|
||||
self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
|
||||
|
@ -440,16 +440,17 @@ class Document(QWebPage): # {{{
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
j = self.javascript('document.body.offsetHeight', 'int')
|
||||
# Note that document.body.offsetHeight does not include top and bottom
|
||||
# margins on body and in some cases does not include the top margin on
|
||||
# the first element inside body either. See ticket #8791 for an example
|
||||
# of the latter.
|
||||
q = self.mainFrame().contentsSize().height()
|
||||
if q == j:
|
||||
return j
|
||||
if min(j, q) <= 0:
|
||||
return max(j, q)
|
||||
window_height = self.window_height
|
||||
if j == window_height:
|
||||
return j if q < 1.2*j else q
|
||||
return j
|
||||
if q < 0:
|
||||
# Don't know if this is still needed, but it can't hurt
|
||||
j = self.javascript('document.body.offsetHeight', 'int')
|
||||
if j >= 0:
|
||||
q = j
|
||||
return q
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
|
@ -7,7 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, traceback, cStringIO, re, shutil
|
||||
from functools import partial
|
||||
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.config import Config, StringConfig, tweaks
|
||||
@ -142,11 +141,19 @@ class SafeFormat(TemplateFormatter):
|
||||
def get_components(template, mi, id, timefmt='%b %Y', length=250,
|
||||
sanitize_func=ascii_filename, replace_whitespace=False,
|
||||
to_lowercase=False):
|
||||
tsfmt = partial(title_sort, order=tweaks['save_template_title_series_sorting'])
|
||||
|
||||
tsorder = tweaks['save_template_title_series_sorting']
|
||||
format_args = FORMAT_ARGS.copy()
|
||||
format_args.update(mi.all_non_none_fields())
|
||||
if mi.title:
|
||||
format_args['title'] = tsfmt(mi.title)
|
||||
if tsorder == 'strictly_alphabetic':
|
||||
v = mi.title
|
||||
else:
|
||||
# title_sort might be missing or empty. Check both conditions
|
||||
v = mi.get('title_sort', None)
|
||||
if not v:
|
||||
v = title_sort(mi.title, order=tsorder)
|
||||
format_args['title'] = v
|
||||
if mi.authors:
|
||||
format_args['authors'] = mi.format_authors()
|
||||
format_args['author'] = format_args['authors']
|
||||
@ -157,7 +164,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
|
||||
else:
|
||||
format_args['tags'] = ''
|
||||
if mi.series:
|
||||
format_args['series'] = tsfmt(mi.series)
|
||||
format_args['series'] = title_sort(mi.series, order=tsorder)
|
||||
if mi.series_index is not None:
|
||||
format_args['series_index'] = mi.format_series_index()
|
||||
else:
|
||||
@ -176,7 +183,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
|
||||
cm = custom_metadata[key]
|
||||
## TODO: NEWMETA: should ratings be divided by 2? The standard rating isn't...
|
||||
if cm['datatype'] == 'series':
|
||||
format_args[key] = tsfmt(format_args[key])
|
||||
format_args[key] = title_sort(format_args[key], order=tsorder)
|
||||
if key+'_index' in format_args:
|
||||
format_args[key+'_index'] = fmt_sidx(format_args[key+'_index'])
|
||||
elif cm['datatype'] == 'datetime':
|
||||
|
@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
|
||||
Convert Microsoft Word documents
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
|
||||
|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
|
||||
as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
|
||||
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
|
||||
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
|
||||
produces really messy HTML, converting it can take a long time, so be patient.
|
||||
|
||||
There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
|
||||
generating the Table of Contents much simpler. It is called BookCreator and is available for free
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -8,11 +8,13 @@ import re, htmlentitydefs
|
||||
_ascii_pat = None
|
||||
|
||||
def clean_ascii_chars(txt, charlist=None):
|
||||
'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
|
||||
'''
|
||||
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
|
||||
This is all control chars except \\t,\\n and \\r
|
||||
'''
|
||||
global _ascii_pat
|
||||
if _ascii_pat is None:
|
||||
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
|
||||
+ [0x1A, 0x1B]
|
||||
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
|
||||
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||
|
||||
if charlist is None:
|
||||
|
@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from calibre.utils.date import dt_factory, utcnow, local_tz
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
class Article(object):
|
||||
|
||||
@ -43,7 +44,7 @@ class Article(object):
|
||||
print summary.encode('utf-8')
|
||||
traceback.print_exc()
|
||||
summary = u''
|
||||
self.text_summary = summary
|
||||
self.text_summary = clean_ascii_chars(summary)
|
||||
self.author = author
|
||||
self.content = content
|
||||
self.date = published
|
||||
|
Loading…
x
Reference in New Issue
Block a user