From 2f5f2a9d335a77abaa97fe34ef86592c3acab5e3 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 26 Jul 2010 22:43:11 +0200
Subject: [PATCH 001/163] Bug correction: negative values of first line indent
where converted to positive values causing a lot of formatting problems
---
src/calibre/ebooks/rtf2xml/process_tokens.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 19a7d38135..9cb7c3c6a4 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -680,7 +680,7 @@ class ProcessTokens:
return the_string
def divide_num(self, numerator, denominator):
try:
- numerator = float(re.search('[0-9.]+', numerator).group())
+ numerator = float(re.search('[0-9.\-]+', numerator).group()) #calibre why ignore negative number? Wrong in case of \fi
except TypeError, msg:
if self.__run_level > 3:
msg = 'no number to process?\n'
From a2702d99c29c2a2eb86c1f957141544f2e11399b Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 27 Jul 2010 19:33:12 +0200
Subject: [PATCH 002/163] Formatting
---
resources/templates/rtf.xsl | 4 ----
src/calibre/ebooks/rtf/input.py | 7 -------
2 files changed, 11 deletions(-)
diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl
index bf016efaaf..ae054186d4 100644
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@@ -81,7 +81,6 @@
-
@@ -182,14 +181,12 @@
-
-
unnamed
@@ -386,7 +383,6 @@
-
truetruefalse
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 50f5571d58..df74a7b3cb 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -141,7 +141,6 @@ class RTFInput(InputFormatPlugin):
return name
-
def write_inline_css(self, ic):
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
enumerate(ic.font_sizes)]
@@ -152,17 +151,11 @@ class RTFInput(InputFormatPlugin):
text-decoration: none; font-weight: normal;
font-style: normal; font-variant: normal
}
-
span.italics { font-style: italic }
-
span.bold { font-weight: bold }
-
span.small-caps { font-variant: small-caps }
-
span.underlined { text-decoration: underline }
-
span.strike-through { text-decoration: line-through }
-
''')
css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes)
From 3cf9f7986a174a4404764790800272f2ecdf787d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 28 Jul 2010 00:47:31 +0200
Subject: [PATCH 003/163] Implementation of a multiple replace class based on
Dict substitutions. Very fast for large dictionnaries.
---
src/calibre/utils/mreplace.py | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
create mode 100644 src/calibre/utils/mreplace.py
diff --git a/src/calibre/utils/mreplace.py b/src/calibre/utils/mreplace.py
new file mode 100644
index 0000000000..dff5fab578
--- /dev/null
+++ b/src/calibre/utils/mreplace.py
@@ -0,0 +1,32 @@
+#multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
+__license__ = 'GPL v3'
+__copyright__ = '2010, sengian '
+__docformat__ = 'restructuredtext en'
+
+import re
+from UserDict import UserDict
+
+class MReplace(UserDict):
+ def __init__(self, dict = None):
+ UserDict.__init__(self, dict)
+ self.re = None
+ self.regex = None
+ self.compile_regex()
+
+ def compile_regex(self):
+ if len(self.data) > 0:
+ keys = sorted(self.data.keys(), key=len)
+ keys.reverse()
+ tmp = "(%s)" % "|".join([re.escape(item) for item in keys])
+ if self.re != tmp:
+ self.re = tmp
+ self.regex = re.compile(self.re)
+
+ def __call__(self, mo):
+ return self[mo.string[mo.start():mo.end()]]
+
+ def mreplace(self, text):
+ #Replace without regex compile
+ if len(self.data) < 1 or self.re is None:
+ return text
+ return self.regex.sub(self, text)
\ No newline at end of file
From 7ebf416513125cee88fc487aa3306a25e4ac6681 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 28 Jul 2010 00:49:37 +0200
Subject: [PATCH 004/163] Modifications of BIBTEX catalog generation: create a
class for bibtex fonctions, use the new Mreplace fonction as the dictionnary
is very large. Divide by 10 the total execution time.
---
src/calibre/library/catalog.py | 41 ++++++-----
src/calibre/utils/bibtex.py | 125 ++++++++++++++++-----------------
2 files changed, 85 insertions(+), 81 deletions(-)
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index a540a8a660..5ee0683b87 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -270,10 +270,10 @@ class BIBTEX(CatalogPlugin):
from calibre.library.save_to_disk import preprocess_template
#Bibtex functions
- from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey
+ from calibre.utils.bibtex import BibTeX
def create_bibtex_entry(entry, fields, mode, template_citation,
- asccii_bibtex = True, citation_bibtex = True):
+ bibtexdict, citation_bibtex = True):
#Bibtex doesn't like UTF-8 but keep unicode until writing
#Define starting chain or if book valid strict and not book return a Fail string
@@ -289,7 +289,8 @@ class BIBTEX(CatalogPlugin):
if citation_bibtex :
# Citation tag
- bibtex_entry.append(make_bibtex_citation(entry, template_citation, asccii_bibtex))
+ bibtex_entry.append(make_bibtex_citation(entry, template_citation,
+ bibtexdict))
bibtex_entry = [u' '.join(bibtex_entry)]
for field in fields:
@@ -304,11 +305,11 @@ class BIBTEX(CatalogPlugin):
pass
if field == 'authors' :
- bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item))
+ bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid',
'author_sort', 'series'] :
- bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item, asccii_bibtex)))
+ bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
elif field == 'id' :
bibtex_entry.append(u'calibreid = "%s"' % int(item))
@@ -321,13 +322,13 @@ class BIBTEX(CatalogPlugin):
elif field == 'tags' :
#A list to flatten
- bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item), asccii_bibtex))
+ bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item)))
elif field == 'comments' :
#\n removal
item = item.replace(u'\r\n',u' ')
item = item.replace(u'\n',u' ')
- bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item, asccii_bibtex))
+ bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item))
elif field == 'isbn' :
# Could be 9, 10 or 13 digits
@@ -345,8 +346,7 @@ class BIBTEX(CatalogPlugin):
elif field == 'pubdate' :
bibtex_entry.append(u'year = "%s"' % item.year)
- bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(strftime("%b", item),
- asccii_bibtex))
+ bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
bibtex_entry = u',\n '.join(bibtex_entry)
bibtex_entry += u' }\n\n'
@@ -363,7 +363,7 @@ class BIBTEX(CatalogPlugin):
else :
return True
- def make_bibtex_citation(entry, template_citation, asccii_bibtex):
+ def make_bibtex_citation(entry, template_citation, bibtexclass):
#define a function to replace the template entry by its value
def tpl_replace(objtplname) :
@@ -384,8 +384,9 @@ class BIBTEX(CatalogPlugin):
return u''
if len(template_citation) >0 :
- tpl_citation = utf8ToBibtex(ValidateCitationKey(re.sub(u'\{[^{}]*\}',
- tpl_replace, template_citation)), asccii_bibtex)
+ tpl_citation = bibtexclass.utf8ToBibtex(
+ bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}',
+ tpl_replace, template_citation)))
if len(tpl_citation) >0 :
return tpl_citation
@@ -397,9 +398,9 @@ class BIBTEX(CatalogPlugin):
template_citation = u'%s' % str(entry["id"])
if asccii_bibtex :
- return ValidateCitationKey(template_citation.encode('ascii', 'replace'))
+ return bibtexclass.ValidateCitationKey(template_citation.encode('ascii', 'replace'))
else :
- return ValidateCitationKey(template_citation)
+ return bibtexclass.ValidateCitationKey(template_citation)
self.fmt = path_to_output.rpartition('.')[2]
self.notification = notification
@@ -467,13 +468,16 @@ class BIBTEX(CatalogPlugin):
if not len(data):
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
+ #Initialize BibTeX class
+ bibtexc = BibTeX()
+
#Entries writing after Bibtex formating (or not)
if bibfile_enc != 'ascii' :
- asccii_bibtex = False
+ bibtexc.ascii_bibtex = False
else :
- asccii_bibtex = True
+ bibtexc.ascii_bibtex = True
- #Check and go to default in case of bad CLI
+ #Check citation choice and go to default in case of bad CLI
if isinstance(opts.impcit, (StringType, UnicodeType)) :
if opts.impcit == 'False' :
citation_bibtex= False
@@ -485,6 +489,7 @@ class BIBTEX(CatalogPlugin):
else :
citation_bibtex= opts.impcit
+ #Preprocess for error and light correction
template_citation = preprocess_template(opts.bib_cit)
#Open output and write entries
@@ -506,7 +511,7 @@ class BIBTEX(CatalogPlugin):
for entry in data:
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
- asccii_bibtex, citation_bibtex))
+ bibtexc, citation_bibtex))
outfile.close()
diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py
index f6e596e8f0..5b9193d16d 100644
--- a/src/calibre/utils/bibtex.py
+++ b/src/calibre/utils/bibtex.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
""" Collection of python utility-methodes commonly used by other
bibliograph packages.
From http://pypi.python.org/pypi/bibliograph.core/
@@ -62,10 +60,14 @@
DAMAGE.
"""
-__docformat__ = 'reStructuredText'
__author__ = 'sengian '
+__docformat__ = 'restructuredtext en'
import re, string
+from UserDict import UserDict
+
+from calibre.constants import preferred_encoding
+from calibre.utils.mreplace import MReplace
utf8enc2latex_mapping = {
# This is a mapping of Unicode characters to LaTeX equivalents.
@@ -2842,69 +2844,66 @@ entity_mapping = {
'"':'{"}',
}
-def ValidateCitationKey(text):
- """
- removes characters not allowed in BibTeX keys
+class BibTeX:
+ def __init__(self):
+ self.rep_utf8 = MReplace(utf8enc2latex_mapping)
+ self.rep_ent = MReplace(entity_mapping)
+ #Set default conversion to ASCII BibTeX
+ self.ascii_bibtex = True
+ # This substitution is based on the description of cite key restrictions at
+ # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
+ self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]')
+ self.upper = re.compile(u'[' +
+ string.uppercase.decode(preferred_encoding) + u']')
+ self.escape = re.compile(u'[~#&%_]')
+
+ def ValidateCitationKey(self, text):
+ """
+ removes characters not allowed in BibTeX keys
+ >>> ValidateCitationKey(DummyEntry('my@id'))
+ 'myid'
+ """
+ return self.invalid_cit.sub(u'', text)
- >>> from bibliograph.core.utils import _validKey
- >>> _validKey(DummyEntry('Foo Bar'))
- 'FooBar'
+ def braceUppercase(self, text):
+ """ Convert uppercase letters to bibtex encoded uppercase
+ >>> braceUppercase('Foo Bar')
+ '{F}oo {B}ar'
+ """
+ return self.upper.sub(lambda m: u'{%s}' % m.group(), text)
- >>> _validKey(DummyEntry('my@id'))
- 'myid'
+ def resolveEntities(self, text):
+ #for entity, entity_map in entity_mapping.iteritems():
+ # text = text.replace(entity, entity_map)
+ #return text
+ return self.rep_ent.mreplace(text)
- """
- # This substitution is based on the description of cite key restrictions at
- # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
- return re.sub(u'[ "@\',\\#}{~%&$^]', u'', text)
+ def resolveUnicode(self, text):
+ #UTF-8 text as entry
+ #for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
+ # text = text.replace(unichar, latexenc)
+ text = self.rep_utf8.mreplace(text)
+ return text.replace(u'$}{$', u'')
-def BraceUppercase(text):
- """ Convert uppercase letters to bibtex encoded uppercase
+ def escapeSpecialCharacters(self, text):
+ """
+ latex escaping some (not all) special characters
+ """
+ text.replace('\\', '\\\\')
+ return self.escape.sub(lambda m: u'\\%s' % m.group(), text)
- >>> from bibliograph.core.utils import _braceUppercase
- >>> _braceUppercase('foo bar')
- 'foo bar'
+ #Calibre functions
+ #Option to go to official ASCII Bibtex or unofficial UTF-8
+ #Go from an unicode entry to ASCII Bibtex format without encoding
+ def utf8ToBibtex(self, text):
+ if len(text) == 0:
+ return ''
+ text.replace('\\', '\\\\')
+ text = self.resolveEntities(text)
+ if self.ascii_bibtex :
+ text = self.resolveUnicode(text)
+ return self.escapeSpecialCharacters(text)
- >>> _braceUppercase('Foo Bar')
- '{F}oo {B}ar'
- """
- for uc in string.uppercase:
- text = text.replace(uc, u'{%s}' % uc)
- return text
-
-def resolveEntities(text):
- for entity, entity_map in entity_mapping.iteritems():
- text = text.replace(entity, entity_map)
- return text
-
-def resolveUnicode(text):
- #UTF-8 text as entry
- for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
- text = text.replace(unichar, latexenc)
- return text.replace(u'$}{$', u'')
-
-def escapeSpecialCharacters(text):
- """
- latex escaping some (not all) special characters
- """
- text.replace('\\', '\\\\')
- escape = ['~', '#', '&', '%', '_']
- for c in escape:
- text = text.replace(c, '\\' + c )
- return text
-
-#Calibre functions
-#Go from an unicode entry to ASCII Bibtex format without encoding
-#Option to go to official ASCII Bibtex or unofficial UTF-8
-def utf8ToBibtex(text, asccii_bibtex = True):
- if len(text) == 0:
- return ''
- text.replace('\\', '\\\\')
- text = resolveEntities(text)
- if asccii_bibtex :
- text = resolveUnicode(text)
- return escapeSpecialCharacters(text)
-
-def bibtex_author_format(item):
- #Format authors for Bibtex compliance (get a list as input)
- return utf8ToBibtex(u' and'.join([author for author in item]))
+ def bibtex_author_format(self, item):
+ #Format authors for Bibtex compliance (get a list as input)
+ return self.utf8ToBibtex(u' and'.join([author for author in item]))
From 8512f57866262b66f4cd542ac96cccf2b9c05737 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 28 Jul 2010 23:08:02 +0200
Subject: [PATCH 005/163] Check if RTF is asccii early. Will be effactive after
preprocess integration in rtf2xml.
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 7b89407f79..f494b7a9c1 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -17,7 +17,8 @@
#########################################################################
# $Revision: 1.41 $
# $Date: 2006/03/24 23:50:07 $
-import sys,os
+import sys, os, codecs
+
from calibre.ebooks.rtf2xml import headings_to_sections, \
line_endings, footnote, fields_small, default_encoding, \
make_lists, preamble_div, header, colors, group_borders, \
@@ -90,7 +91,6 @@ class ParseRtf:
out_file = '',
out_dir = None,
dtd = '',
- #debug = 0, #why? calibre
deb_dir = None,
convert_symbol = None,
convert_wingdings = None,
@@ -107,6 +107,7 @@ class ParseRtf:
no_dtd = 0,
char_data = '',
):
+
"""
Requires:
'file' --file to parse
@@ -125,14 +126,16 @@ class ParseRtf:
through a file. Only for debugging.
Returns: Nothing
"""
+
self.__file = in_file
self.__out_file = out_file
self.__out_dir = out_dir
self.__temp_dir = out_dir
self.__dtd_path = dtd
self.__check_file(in_file,"file_to_parse")
+ self.__check_ascii(in_file)
self.__char_data = char_data
- self.__debug_dir = deb_dir #self.__debug_dir = debug calibre
+ self.__debug_dir = deb_dir
self.__check_dir(self.__temp_dir)
self.__copy = self.__check_dir(self.__debug_dir)
self.__convert_caps = convert_caps
@@ -149,19 +152,17 @@ class ParseRtf:
self.__group_borders = group_borders
self.__empty_paragraphs = empty_paragraphs
self.__no_dtd = no_dtd
-
def __check_file(self, the_file, type):
"""Check to see if files exist"""
if hasattr(the_file, 'read'): return
if the_file == None:
if type == "file_to_parse":
- message = "You must provide a file for the script to work"
- msg = message
+ msg = "\nYou must provide a file for the script to work"
raise RtfInvalidCodeException, msg
elif os.path.exists(the_file):
pass # do nothing
else:
- message = "The file '%s' cannot be found" % the_file
+ message = "\nThe file '%s' cannot be found" % the_file
msg = message
raise RtfInvalidCodeException, msg
def __check_dir(self, the_dir):
@@ -170,7 +171,16 @@ class ParseRtf:
return
dir_exists = os.path.isdir(the_dir)
if not dir_exists:
- message = "%s is not a directory" % the_dir
+ msg = "\n%s is not a directory" % the_dir
+ raise RtfInvalidCodeException, msg
+ return 1
+ def __check_ascii(self, the_file):
+ """Check to see if the file is correct ascii"""
+ try:
+ test = codecs.open(the_file, 'r', 'ascii', 'strict')
+ test.close()
+ except UnicodeError:
+ message= "\n%s is not a correct ascii file" % the_file
msg = message
raise RtfInvalidCodeException, msg
return 1
From 09c8f13a1f17c869d06ace0d6cf76f0ff9b3fdc7 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 31 Jul 2010 10:47:12 +0200
Subject: [PATCH 006/163] Global overhaul of rtf2xml : RTF fixes (1)
---
src/calibre/ebooks/rtf/input.py | 1 +
src/calibre/ebooks/rtf2xml/ParseRtf.py | 53 ++++++++------------
src/calibre/ebooks/rtf2xml/check_brackets.py | 10 ++--
src/calibre/ebooks/rtf2xml/line_endings.py | 52 ++++++++-----------
src/calibre/ebooks/rtf2xml/process_tokens.py | 2 -
src/calibre/ebooks/rtf2xml/tokenize.py | 6 +--
src/calibre/ebooks/txt/processor.py | 5 +-
7 files changed, 52 insertions(+), 77 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index df74a7b3cb..2622d82d99 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -50,6 +50,7 @@ class RTFInput(InputFormatPlugin):
parser = ParseRtf(
in_file = stream,
out_file = ofile,
+ #deb_dir = 'I:\\Calibre\\rtfdebug',
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index f494b7a9c1..3a804792c5 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -143,7 +143,7 @@ class ParseRtf:
self.__convert_wingdings = convert_wingdings
self.__convert_zapf = convert_zapf
self.__run_level = run_level
- self.__exit_level = 0
+ #self.__exit_level = 0
self.__indent = indent
self.__replace_illegals = replace_illegals
self.__form_lists = form_lists
@@ -162,8 +162,7 @@ class ParseRtf:
elif os.path.exists(the_file):
pass # do nothing
else:
- message = "\nThe file '%s' cannot be found" % the_file
- msg = message
+ msg = "\nThe file '%s' cannot be found" % the_file
raise RtfInvalidCodeException, msg
def __check_dir(self, the_dir):
"""Check to see if directory exists"""
@@ -180,8 +179,7 @@ class ParseRtf:
test = codecs.open(the_file, 'r', 'ascii', 'strict')
test.close()
except UnicodeError:
- message= "\n%s is not a correct ascii file" % the_file
- msg = message
+ msg = "\n%s is not a correct ascii file" % the_file
raise RtfInvalidCodeException, msg
return 1
def parse_rtf(self):
@@ -204,27 +202,29 @@ class ParseRtf:
copy_obj.set_dir(self.__debug_dir)
copy_obj.remove_files()
copy_obj.copy_file(self.__temp_file, "original_file")
- # new as of 2005-08-02. Do I want this?
+ # Function to check if bracket are well handled
if self.__debug_dir or self.__run_level > 2:
self.__check_brack_obj = check_brackets.CheckBrackets\
(file = self.__temp_file,
bug_handler = RtfInvalidCodeException,
)
- # convert Macintosh line endings to Unix line endings
+ # convert Macintosh and Windows line endings to Unix line endings
+ #why do this if you don't wb after?
line_obj = line_endings.FixLineEndings(
in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException,
copy = self.__copy,
- run_level = self.__run_level,
+ #run_level = self.__run_level,
replace_illegals = self.__replace_illegals,
)
- return_value = line_obj.fix_endings()
- self.__return_code(return_value)
+ line_obj.fix_endings()
+ #return_value = line_obj.fix_endings() #calibre: no return in this function, why keep it?
+ #self.__return_code(return_value)
tokenize_obj = tokenize.Tokenize(
bug_handler = RtfInvalidCodeException,
in_file = self.__temp_file,
- copy = self.__copy,
- run_level = self.__run_level,)
+ copy = self.__copy,)
+ #run_level = self.__run_level,)
tokenize_obj.tokenize()
process_tokens_obj = process_tokens.ProcessTokens(
in_file = self.__temp_file,
@@ -529,36 +529,27 @@ class ParseRtf:
)
output_obj.output()
os.remove(self.__temp_file)
- return self.__exit_level
+ #return self.__exit_level
def __bracket_match(self, file_name):
if self.__run_level > 2:
good_br, msg = self.__check_brack_obj.check_brackets()
if good_br:
pass
- # sys.stderr.write( msg + ' in ' + file_name + "\n")
+ #sys.stderr.write( msg + ' in ' + file_name + "\n")
else:
msg += msg + " in file '" + file_name + "'\n"
raise RtfInvalidCodeException, msg
- def __return_code(self, num):
- if num == None:
- return
- if int(num) > self.__exit_level:
- self.__exit_level = num
+ #def __return_code(self, num): calibre not used
+ # if num == None:
+ # return
+ # if int(num) > self.__exit_level:
+ # self.__exit_level = num
def __make_temp_file(self,file):
"""Make a temporary file to parse"""
write_file="rtf_write_file"
read_obj = file if hasattr(file, 'read') else open(file,'r')
write_obj = open(write_file, 'w')
- line = "dummy"
- while line:
- line = read_obj.read(1000)
- write_obj.write(line )
+ for line in read_obj:
+ write_obj.write(line)
write_obj.close()
- return write_file
- """
-mi1\n
-mi33\n
-mi 0:
length_byte = len(txt.encode('utf-8'))
From 3405615e54da2f2aa7345d1f51525acd250cbd91 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 31 Jul 2010 13:15:47 +0200
Subject: [PATCH 007/163] Remove invalid ASCII characters from plain text files
---
src/calibre/ebooks/txt/input.py | 3 ++-
src/calibre/ebooks/txt/processor.py | 25 +++++++++++++++----------
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index b444bf1cf4..935a187d5d 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -57,6 +57,7 @@ class TXTInput(InputFormatPlugin):
txt = preserve_spaces(txt)
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
+ txt = txt.encode('utf-8')
if options.markdown:
log.debug('Running text though markdown conversion...')
@@ -79,7 +80,7 @@ class TXTInput(InputFormatPlugin):
base = os.path.dirname(stream.name)
htmlfile = open(os.path.join(base, 'temp_calibre_txt_input_to_html.html'),
'wb')
- htmlfile.write(html.encode('utf-8'))
+ htmlfile.write(html) #html.encode('utf-8')
htmlfile.close()
cwd = os.getcwdu()
odi = options.debug_pipeline
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 91c274a7b1..6bd635b6df 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -19,7 +19,7 @@ HTML_TEMPLATE = u'
]
-
+
@@ -294,7 +294,7 @@
-
+
From 1f237c99bfe5bb875f4dc384b4b80938967d7ae9 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 31 Jul 2010 20:01:54 +0200
Subject: [PATCH 010/163] Change in the convert to bibtex reference for euro
symbol
---
src/calibre/utils/bibtex.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py
index 5b9193d16d..09868ccdb1 100644
--- a/src/calibre/utils/bibtex.py
+++ b/src/calibre/utils/bibtex.py
@@ -80,7 +80,7 @@ utf8enc2latex_mapping = {
#Fix some encoding problem between cp1252 and latin1
# from http://www.microsoft.com/typography/unicode/1252.htm
- u'\x80': '{\\mbox{\\texteuro}}', # EURO SIGN
+ u'\x80': '{\\texteuro}', # EURO SIGN
u'\x82': '{,}', # SINGLE LOW-9 QUOTATION MARK
u'\x83': '$f$', # LATIN SMALL LETTER F WITH HOOK
u'\x84': '{,,}', # DOUBLE LOW-9 QUOTATION MARK
@@ -746,7 +746,7 @@ utf8enc2latex_mapping = {
u'\u205f': '{\\mkern4mu}',
u'\u2060': '{\\nolinebreak}',
u'\u20a7': '{\\ensuremath{\\Elzpes}}',
- u'\u20ac': '{\\mbox{\\texteuro}}',
+ u'\u20ac': '{\\texteuro}',
u'\u20db': '$\\dddot$',
u'\u20dc': '$\\ddddot$',
u'\u2102': '$\\mathbb{C}$',
From 2eb20249319e551f41d4d721c831e3e64abaf72c Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 10 Aug 2010 12:38:59 +0200
Subject: [PATCH 011/163] Merge from trunk
---
resources/catalog/stylesheet.css | 142 +++----
resources/content_server/gui.css | 163 ++++----
resources/content_server/index.html | 103 ++---
resources/content_server/mobile.css | 91 ++---
resources/templates/html.css | 361 ++++++++---------
setup/installer/windows/en-us.xml | 19 +-
setup/installer/windows/wix-template.xml | 267 ++++++-------
src/calibre/ebooks/lrf/html/demo/demo.html | 440 +++++++++++++--------
src/calibre/manual/templates/layout.html | 24 +-
src/calibre/manual/xpath.xhtml | 30 +-
10 files changed, 871 insertions(+), 769 deletions(-)
diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
index 4f9ca9ac41..ea01aeb43b 100644
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@@ -1,102 +1,104 @@
-body { background-color: white; }
+body {
+ background-color: white;
+}
-p.title {
- margin-top:0em;
- margin-bottom:1em;
- text-align:center;
- font-style:italic;
- font-size:xx-large;
- border-bottom: solid black 4px;
- }
+p.title {
+ margin-top: 0em;
+ margin-bottom: 1em;
+ text-align: center;
+ font-style: italic;
+ font-size: xx-large;
+ border-bottom: solid black 4px;
+}
p.author {
- margin-top:0em;
- margin-bottom:0em;
+ margin-top: 0em;
+ margin-bottom: 0em;
text-align: left;
text-indent: 1em;
- font-size:large;
- }
+ font-size: large;
+}
p.tags {
- margin-top:0em;
- margin-bottom:0em;
+ margin-top: 0em;
+ margin-bottom: 0em;
text-align: left;
text-indent: 1em;
- font-size:small;
- }
+ font-size: small;
+}
p.description {
- text-align:left;
- font-style:normal;
+ text-align: left;
+ font-style: normal;
margin-top: 0em;
- }
+}
p.date_index {
- font-size:x-large;
- text-align:center;
- font-weight:bold;
- margin-top:1em;
- margin-bottom:0px;
- }
+ font-size: x-large;
+ text-align: center;
+ font-weight: bold;
+ margin-top: 1em;
+ margin-bottom: 0px;
+}
p.letter_index {
- font-size:x-large;
- text-align:center;
- font-weight:bold;
- margin-top:1em;
- margin-bottom:0px;
- }
+ font-size: x-large;
+ text-align: center;
+ font-weight: bold;
+ margin-top: 1em;
+ margin-bottom: 0px;
+}
p.author_index {
- font-size:large;
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
+ font-size: large;
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
text-indent: 0em;
- }
+}
p.series {
text-align: left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:2em;
- text-indent:-2em;
- }
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 2em;
+ text-indent: -2em;
+}
p.read_book {
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:2em;
- text-indent:-2em;
- }
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 2em;
+ text-indent: -2em;
+}
p.unread_book {
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:2em;
- text-indent:-2em;
- }
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 2em;
+ text-indent: -2em;
+}
p.date_read {
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:6em;
- text-indent:-6em;
- }
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 6em;
+ text-indent: -6em;
+}
hr.series_divider {
- width:50%;
- margin-left:1em;
- margin-top:0em;
- margin-bottom:0em;
- }
+ width: 50%;
+ margin-left: 1em;
+ margin-top: 0em;
+ margin-bottom: 0em;
+}
hr.annotations_divider {
- width:50%;
- margin-left:1em;
- margin-top:0em;
- margin-bottom:0em;
- }
+ width: 50%;
+ margin-left: 1em;
+ margin-top: 0em;
+ margin-bottom: 0em;
+}
\ No newline at end of file
diff --git a/resources/content_server/gui.css b/resources/content_server/gui.css
index 1bcc4e1eb0..d7a3eda51e 100644
--- a/resources/content_server/gui.css
+++ b/resources/content_server/gui.css
@@ -1,142 +1,157 @@
body {
- background-color: white;
+ background-color: white;
}
#banner {
- position: absolute;
- left: 5px; top: 0px;
+ position: absolute;
+ left: 5px;
+ top: 0px;
}
/*
Search bar
*/
#search_box {
- width: 201px;
- height: 31px;
- background: url(bg_search_box.png);
- top: 5px; right: 20px;
- position: absolute;
+ width: 201px;
+ height: 31px;
+ background: url(bg_search_box.png);
+ top: 5px;
+ right: 20px;
+ position: absolute;
}
+
#search_box #s {
- float: left;
- padding: 0;
- margin: 6px 0 0 6px;
- border-width: 0px;
- font-size: 16px;
- width: 159px;
- background: transparent;
+ float: left;
+ padding: 0;
+ margin: 6px 0 0 6px;
+ border-width: 0px;
+ font-size: 16px;
+ width: 159px;
+ background: transparent;
}
+
#search_box #go {
- float: right;
- margin: 3px 4px 0 0;
+ float: right;
+ margin: 3px 4px 0 0;
}
/*
Count bar
*/
#count_bar {
- position: absolute;
- right: 30px;
- top: 80px;
- font-size:smaller;
- padding-bottom: 5px;
+ position: absolute;
+ right: 30px;
+ top: 80px;
+ font-size: smaller;
+ padding-bottom: 5px;
}
#count_bar * img {
- cursor: pointer;
+ cursor: pointer;
}
-#count { cursor: default;}
+#count {
+ cursor: default;
+}
/*
Styles for the book list
*/
#main {
- width:95%;
- overflow: auto;
- border: solid thin black;
- position: absolute;
- top: 115px; left: 10px;
- z-index: 1;
+ width: 95%;
+ overflow: auto;
+ border: solid thin black;
+ position: absolute;
+ top: 115px;
+ left: 10px;
+ z-index: 1;
}
table#book_list thead tr td {
- width: 100%;
- padding-right: 1em; padding-left: 1em;
- text-align: center;
- font-weight: bold;
- font-size: 130%;
- border-bottom: thick solid black;
- border-top: thick solid black;
- cursor: pointer;
- font-family: serif;
- padding-top: 0.5ex; padding-bottom: 0.5ex;
+ width: 100%;
+ padding-right: 1em;
+ padding-left: 1em;
+ text-align: center;
+ font-weight: bold;
+ font-size: 130%;
+ border-bottom: thick solid black;
+ border-top: thick solid black;
+ cursor: pointer;
+ font-family: serif;
+ padding-top: 0.5ex;
+ padding-bottom: 0.5ex;
}
table#book_list tbody tr td {
- padding-right: 1em; padding-left: 1em;
- /*border-bottom: thin solid black;*/
- padding-bottom: 0.7ex; padding-top: 0.7ex;
- margin: 0pt;
- cursor: pointer;
-
+ padding-right: 1em;
+ padding-left: 1em;
+ /*border-bottom: thin solid black;*/
+ padding-bottom: 0.7ex;
+ padding-top: 0.7ex;
+ margin: 0pt;
+ cursor: pointer;
}
table#book_list * .sort_indicator {
- visibility:hidden;
- color: #9f9f9f;
+ visibility: hidden;
+ color: #9f9f9f;
}
table#book_list * .rating {
- color: #3fbbe4;
+ color: #3fbbe4;
}
table#book_list * span.subtitle {
- font-size: smaller;
+ font-size: smaller;
}
table#book_list * a.format {
- text-decoration: none;
- color: blue;
- font-family: monospace;
+ text-decoration: none;
+ color: blue;
+ font-family: monospace;
}
table#book_list * a.format:hover {
- color: red;
+ color: red;
}
table#book_list * a.format:visited {
- color: blue;
+ color: blue;
}
table#book_list * .comments {
- font-size: smaller;
- display: none;
+ font-size: smaller;
+ display: none;
}
+
/*
Loading message
*/
#loading {
- top: 10px; left: 10px;
- position: absolute;
- font-size: 160%; font-family: monospace;
- text-align: center;
- visibility: hidden;
- z-index: 10000;
- background-color: #aaaaaa;
- opacity: 0.8;
-
+ top: 10px;
+ left: 10px;
+ position: absolute;
+ font-size: 160%;
+ font-family: monospace;
+ text-align: center;
+ visibility: hidden;
+ z-index: 10000;
+ background-color: #aaaaaa;
+ opacity: 0.8;
}
#loading div {
- top: 50%; position: relative;
+ top: 50%;
+ position: relative;
}
#cover_pane {
- overflow: auto;
- position: absolute;
- visibility: hidden;
- text-align: right;
- z-index: 2;
- margin: 0pt; padding: 0pt; border-width: 0pt;
-}
+ overflow: auto;
+ position: absolute;
+ visibility: hidden;
+ text-align: right;
+ z-index: 2;
+ margin: 0pt;
+ padding: 0pt;
+ border-width: 0pt;
+}
\ No newline at end of file
diff --git a/resources/content_server/index.html b/resources/content_server/index.html
index f9f0aff491..ff11acc719 100644
--- a/resources/content_server/index.html
+++ b/resources/content_server/index.html
@@ -1,49 +1,60 @@
-
-
- calibre library
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Loading…
-
-
-
-
-
-
-
+
+
+calibre library
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Loading…
+
+
+
+
+
diff --git a/resources/content_server/mobile.css b/resources/content_server/mobile.css
index 9be755b954..e3a4b58422 100644
--- a/resources/content_server/mobile.css
+++ b/resources/content_server/mobile.css
@@ -1,83 +1,78 @@
/* CSS for the mobile version of the content server webpage */
-
.navigation table.buttons {
- width: 100%;
+ width: 100%;
}
.navigation .button {
- width: 50%;
+ width: 50%;
}
-.button a, .button:visited a {
- padding: 0.5em;
- font-size: 1.25em;
- border: 1px solid black;
- text-color: black;
- background-color: #ddd;
- border-top: 1px solid ThreeDLightShadow;
- border-right: 1px solid ButtonShadow;
- border-bottom: 1px solid ButtonShadow;
- border-left: 1 px solid ThreeDLightShadow;
- -moz-border-radius: 0.25em;
- -webkit-border-radius: 0.25em;
+.button a,.button:visited a {
+ padding: 0.5em;
+ font-size: 1.25em;
+ border: 1px solid black;
+ text-color: black;
+ background-color: #ddd;
+ border-top: 1px solid ThreeDLightShadow;
+ border-right: 1px solid ButtonShadow;
+ border-bottom: 1px solid ButtonShadow;
+ border-left: 1 px solid ThreeDLightShadow;
+ -moz-border-radius: 0.25em;
+ -webkit-border-radius: 0.25em;
}
.button:hover a {
- border-top: 1px solid #666;
- border-right: 1px solid #CCC;
- border-bottom: 1 px solid #CCC;
- border-left: 1 px solid #666;
-
-
+ border-top: 1px solid #666;
+ border-right: 1px solid #CCC;
+ border-bottom: 1 px solid #CCC;
+ border-left: 1 px solid #666;
}
div.navigation {
- padding-bottom: 1em;
- clear: both;
+ padding-bottom: 1em;
+ clear: both;
}
#search_box {
- border: 1px solid #393;
- -moz-border-radius: 0.5em;
- -webkit-border-radius: 0.5em;
- padding: 1em;
- margin-bottom: 0.5em;
- float: right;
+ border: 1px solid #393;
+ -moz-border-radius: 0.5em;
+ -webkit-border-radius: 0.5em;
+ padding: 1em;
+ margin-bottom: 0.5em;
+ float: right;
}
#listing {
- width: 100%;
- border-collapse: collapse;
+ width: 100%;
+ border-collapse: collapse;
}
+
#listing td {
- padding: 0.25em;
+ padding: 0.25em;
}
#listing td.thumbnail {
- height: 60px;
- width: 60px;
+ height: 60px;
+ width: 60px;
}
#listing tr:nth-child(even) {
-
- background: #eee;
+ background: #eee;
}
-#listing .button a{
- display: inline-block;
- width: 2.5em;
- padding-left: 0em;
- padding-right: 0em;
- overflow: hidden;
- text-align: center;
+#listing .button a {
+ display: inline-block;
+ width: 2.5em;
+ padding-left: 0em;
+ padding-right: 0em;
+ overflow: hidden;
+ text-align: center;
}
#logo {
- float: left;
+ float: left;
}
#spacer {
- clear: both;
-}
-
-
+ clear: both;
+}
\ No newline at end of file
diff --git a/resources/templates/html.css b/resources/templates/html.css
index e9b683ca34..448ec596b9 100644
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@@ -34,380 +34,367 @@
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
+@
+namespace url (http: //www.w3.org /1999/xhtml);
+ @namespace svg url (http: //www.w3.org /2000/svg);
+ /* blocks */
-@namespace url(http://www.w3.org/1999/xhtml);
-@namespace svg url(http://www.w3.org/2000/svg);
-
-/* blocks */
-
-html, div, map, dt, isindex, form {
- display: block;
+html,div,map,dt,isindex,form {
+ display: block;
}
body {
- display: block;
+ display: block;
}
-p, dl, multicol {
- display: block;
- margin: 1em 0;
+p,dl,multicol {
+ display: block;
+ margin: 1em 0;
}
dd {
- display: block;
- margin-left: 40px;
+ display: block;
+ margin-left: 40px;
}
blockquote {
- display: block;
- margin: 1em;
+ display: block;
+ margin: 1em;
}
address {
- display: block;
- font-style: italic;
+ display: block;
+ font-style: italic;
}
center {
- display: block;
- text-align: center;
+ display: block;
+ text-align: center;
}
blockquote[type=cite] {
- display: block;
- margin: 1em 0em;
- border-color: blue;
- border-width: thin;
+ display: block;
+ margin: 1em 0em;
+ border-color: blue;
+ border-width: thin;
}
span[_moz_quote=true] {
- color: blue;
+ color: blue;
}
pre[_moz_quote=true] {
- color: blue;
+ color: blue;
}
h1 {
- display: block;
- font-size: 2em;
- font-weight: bold;
- margin: .67em 0;
+ display: block;
+ font-size: 2em;
+ font-weight: bold;
+ margin: .67em 0;
}
h2 {
- display: block;
- font-size: 1.5em;
- font-weight: bold;
- margin: .83em 0;
+ display: block;
+ font-size: 1.5em;
+ font-weight: bold;
+ margin: .83em 0;
}
h3 {
- display: block;
- font-size: 1.17em;
- font-weight: bold;
- margin: 1em 0;
+ display: block;
+ font-size: 1.17em;
+ font-weight: bold;
+ margin: 1em 0;
}
h4 {
- display: block;
- font-weight: bold;
- margin: 1.33em 0;
+ display: block;
+ font-weight: bold;
+ margin: 1.33em 0;
}
h5 {
- display: block;
- font-size: 0.83em;
- font-weight: bold;
- margin: 1.67em 0;
+ display: block;
+ font-size: 0.83em;
+ font-weight: bold;
+ margin: 1.67em 0;
}
h6 {
- display: block;
- font-size: 0.67em;
- font-weight: bold;
- margin: 2.33em 0;
+ display: block;
+ font-size: 0.67em;
+ font-weight: bold;
+ margin: 2.33em 0;
}
listing {
- display: block;
- font-family: monospace;
- font-size: medium;
- white-space: pre;
- margin: 1em 0;
+ display: block;
+ font-family: monospace;
+ font-size: medium;
+ white-space: pre;
+ margin: 1em 0;
}
-xmp, pre, plaintext {
- display: block;
- font-family: monospace;
- white-space: pre;
- margin: 1em 0;
+xmp,pre,plaintext {
+ display: block;
+ font-family: monospace;
+ white-space: pre;
+ margin: 1em 0;
}
/* tables */
-
table {
- display: table;
- border-spacing: 2px;
- border-collapse: separate;
- margin-top: 0;
- margin-bottom: 0;
- text-indent: 0;
+ display: table;
+ border-spacing: 2px;
+ border-collapse: separate;
+ margin-top: 0;
+ margin-bottom: 0;
+ text-indent: 0;
}
table[align="left"] {
- float: left;
+ float: left;
}
table[align="right"] {
- float: right;
+ float: right;
}
-table[rules]:not([rules="none"]) {
- border-collapse: collapse;
+table[rules]:not ([rules="none"] ) {
+ border-collapse: collapse;
}
-
-/* caption inherits from table not table-outer */
+
+/* caption inherits from table not table-outer */
caption {
- display: table-caption;
- text-align: center;
+ display: table-caption;
+ text-align: center;
}
-table[align="center"] > caption {
- margin-left: auto;
- margin-right: auto;
+table[align="center"]>caption {
+ margin-left: auto;
+ margin-right: auto;
}
-table[align="center"] > caption[align="left"] {
- margin-right: 0;
+table[align="center"]>caption[align="left"] {
+ margin-right: 0;
}
-table[align="center"] > caption[align="right"] {
- margin-left: 0;
+table[align="center"]>caption[align="right"] {
+ margin-left: 0;
}
tr {
- display: table-row;
- vertical-align: inherit;
+ display: table-row;
+ vertical-align: inherit;
}
col {
- display: table-column;
+ display: table-column;
}
colgroup {
- display: table-column-group;
+ display: table-column-group;
}
tbody {
- display: table-row-group;
- vertical-align: middle;
+ display: table-row-group;
+ vertical-align: middle;
}
thead {
- display: table-header-group;
- vertical-align: middle;
+ display: table-header-group;
+ vertical-align: middle;
}
tfoot {
- display: table-footer-group;
- vertical-align: middle;
+ display: table-footer-group;
+ vertical-align: middle;
}
/* for XHTML tables without tbody */
-table > tr {
- vertical-align: middle;
+table>tr {
+ vertical-align: middle;
}
-td {
- display: table-cell;
- vertical-align: inherit;
- text-align: inherit;
- padding: 1px;
+td {
+ display: table-cell;
+ vertical-align: inherit;
+ text-align: inherit;
+ padding: 1px;
}
th {
- display: table-cell;
- vertical-align: inherit;
- font-weight: bold;
- padding: 1px;
+ display: table-cell;
+ vertical-align: inherit;
+ font-weight: bold;
+ padding: 1px;
}
/* inlines */
-
-b, strong {
- font-weight: bolder;
+b,strong {
+ font-weight: bolder;
}
-i, cite, em, var, dfn {
- font-style: italic;
+i,cite,em,var,dfn {
+ font-style: italic;
}
-tt, code, kbd, samp {
- font-family: monospace;
+tt,code,kbd,samp {
+ font-family: monospace;
}
-u, ins {
- text-decoration: underline;
+u,ins {
+ text-decoration: underline;
}
-s, strike, del {
- text-decoration: line-through;
+s,strike,del {
+ text-decoration: line-through;
}
blink {
- text-decoration: blink;
+ text-decoration: blink;
}
big {
- font-size: larger;
+ font-size: larger;
}
small {
- font-size: smaller;
+ font-size: smaller;
}
sub {
- vertical-align: sub;
- font-size: smaller;
- line-height: normal;
+ vertical-align: sub;
+ font-size: smaller;
+ line-height: normal;
}
sup {
- vertical-align: super;
- font-size: smaller;
- line-height: normal;
+ vertical-align: super;
+ font-size: smaller;
+ line-height: normal;
}
nobr {
- white-space: nowrap;
+ white-space: nowrap;
}
/* titles */
-abbr[title], acronym[title] {
- border-bottom: dotted 1px;
+abbr[title],acronym[title] {
+ border-bottom: dotted 1px;
}
/* lists */
-
-ul, menu, dir {
- display: block;
- list-style-type: disc;
- margin: 1em 0;
+ul,menu,dir {
+ display: block;
+ list-style-type: disc;
+ margin: 1em 0;
}
ol {
- display: block;
- list-style-type: decimal;
- margin: 1em 0;
+ display: block;
+ list-style-type: decimal;
+ margin: 1em 0;
}
li {
- display: list-item;
+ display: list-item;
}
/* nested lists have no top/bottom margins */
-ul ul, ul ol, ul dir, ul menu, ul dl,
-ol ul, ol ol, ol dir, ol menu, ol dl,
-dir ul, dir ol, dir dir, dir menu, dir dl,
-menu ul, menu ol, menu dir, menu menu, menu dl,
-dl ul, dl ol, dl dir, dl menu, dl dl {
- margin-top: 0;
- margin-bottom: 0;
+ul ul,ul ol,ul dir,ul menu,ul dl,ol ul,ol ol,ol dir,ol menu,ol dl,dir ul,dir ol,dir dir,dir menu,dir dl,menu ul,menu ol,menu dir,menu menu,menu dl,dl ul,dl ol,dl dir,dl menu,dl dl
+ {
+ margin-top: 0;
+ margin-bottom: 0;
}
/* 2 deep unordered lists use a circle */
-ol ul, ul ul, menu ul, dir ul,
-ol menu, ul menu, menu menu, dir menu,
-ol dir, ul dir, menu dir, dir dir {
- list-style-type: circle;
+ol ul,ul ul,menu ul,dir ul,ol menu,ul menu,menu menu,dir menu,ol dir,ul dir,menu dir,dir dir
+ {
+ list-style-type: circle;
}
/* 3 deep (or more) unordered lists use a square */
-ol ol ul, ol ul ul, ol menu ul, ol dir ul,
-ol ol menu, ol ul menu, ol menu menu, ol dir menu,
-ol ol dir, ol ul dir, ol menu dir, ol dir dir,
-ul ol ul, ul ul ul, ul menu ul, ul dir ul,
-ul ol menu, ul ul menu, ul menu menu, ul dir menu,
-ul ol dir, ul ul dir, ul menu dir, ul dir dir,
-menu ol ul, menu ul ul, menu menu ul, menu dir ul,
-menu ol menu, menu ul menu, menu menu menu, menu dir menu,
-menu ol dir, menu ul dir, menu menu dir, menu dir dir,
-dir ol ul, dir ul ul, dir menu ul, dir dir ul,
-dir ol menu, dir ul menu, dir menu menu, dir dir menu,
-dir ol dir, dir ul dir, dir menu dir, dir dir dir {
- list-style-type: square;
+ol ol ul,ol ul ul,ol menu ul,ol dir ul,ol ol menu,ol ul menu,ol menu menu,ol dir menu,ol ol dir,ol ul dir,ol menu dir,ol dir dir,ul ol ul,ul ul ul,ul menu ul,ul dir ul,ul ol menu,ul ul menu,ul menu menu,ul dir menu,ul ol dir,ul ul dir,ul menu dir,ul dir dir,menu ol ul,menu ul ul,menu menu ul,menu dir ul,menu ol menu,menu ul menu,menu menu menu,menu dir menu,menu ol dir,menu ul dir,menu menu dir,menu dir dir,dir ol ul,dir ul ul,dir menu ul,dir dir ul,dir ol menu,dir ul menu,dir menu menu,dir dir menu,dir ol dir,dir ul dir,dir menu dir,dir dir dir
+ {
+ list-style-type: square;
}
-
/* leafs */
-
-/* noshade and color attributes are handled completely by
+ /* noshade and color attributes are handled completely by
* the nsHTMLHRElement attribute mapping code
*/
hr {
- display: block;
- height: 2px;
- border: 1px inset;
- margin: 0.5em auto 0.5em auto;
- color: gray;
+ display: block;
+ height: 2px;
+ border: 1px inset;
+ margin: 0.5em auto 0.5em auto;
+ color: gray;
}
hr[size="1"] {
- border-style: solid none none none;
+ border-style: solid none none none;
}
-img[usemap], object[usemap] {
- color: blue;
+img[usemap],object[usemap] {
+ color: blue;
}
frameset {
- display: block ! important;
- position: static ! important;
- float: none ! important;
- border: none ! important;
+ display: block ! important;
+ position: static ! important;
+ float: none ! important;
+ border: none ! important;
}
frame {
- border: none ! important;
+ border: none ! important;
}
iframe {
- border: 2px inset;
+ border: 2px inset;
}
noframes {
- display: none;
+ display: none;
}
spacer {
- position: static ! important;
- float: none ! important;
+ position: static ! important;
+ float: none ! important;
}
/* hidden elements */
-area, base, basefont, head, meta, script, style, title,
-noembed, param, link {
- display: none;
+area,base,basefont,head,meta,script,style,title,noembed,param,link {
+ display: none;
}
/* Page breaks at body tags, to help out with LIT-generation */
body {
- page-break-before: always;
+ page-break-before: always;
}
/* Explicit line-breaks are blocks, sure... */
br {
- display: block;
+ display: block;
}
/* Images, embedded object, and SVG size defaults */
-img, object, svg|svg {
- width: auto;
- height: auto;
+img,object,svg |svg {
+ width: auto;
+ height: auto;
}
/* These are needed because ADE renders anchors the same as links */
+a {
+ text-decoration: inherit;
+ color: inherit;
+ cursor: inherit
+}
-a { text-decoration: inherit; color: inherit; cursor: inherit }
-a[href] { text-decoration: underline; color: blue; cursor: pointer }
+a[href] {
+ text-decoration: underline;
+ color: blue;
+ cursor: pointer
+}
\ No newline at end of file
diff --git a/setup/installer/windows/en-us.xml b/setup/installer/windows/en-us.xml
index 89cc25f0a2..ed181c524b 100644
--- a/setup/installer/windows/en-us.xml
+++ b/setup/installer/windows/en-us.xml
@@ -1,9 +1,16 @@
-
- If you are upgrading from a {app} version older than 0.6.17, please uninstall {app} first. Click Advanced to change installation settings.
- Computing space requirements, this may take upto five minutes...
- Computing space requirements, this may take upto five minutes...
- Computing space requirements, this may take upto five minutes...
- Please wait while the installer finishes determining your disk space requirements, this may take upto five minutes...
+
+ If you are upgrading from a {app} version older than
+ 0.6.17, please uninstall {app} first. Click Advanced to change
+ installation settings.
+ Computing space requirements, this may take upto five
+ minutes...
+ Computing space requirements, this may take upto five
+ minutes...
+ Computing space requirements, this may take upto five
+ minutes...
+ Please wait while the installer finishes determining
+ your disk space requirements, this may take upto five minutes...
diff --git a/setup/installer/windows/wix-template.xml b/setup/installer/windows/wix-template.xml
index 37dd8b25a8..1300eba956 100644
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@@ -1,164 +1,157 @@
-
+
-
-
-
+
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
+
-
- {app_components}
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+ {app_components}
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
-
-
-
+
+
-
-
-
+
+
+
-
-
-
+
+
+
-
-
-
-
+
+
+
-
-
-
+
+
+
+
-
+
+
+
+
+
= 501)]]>
-
-
- NEWPRODUCTFOUND
-
-
-
- NEWPRODUCTFOUND
-
+
+
+ NEWPRODUCTFOUND
+
+
+
+ NEWPRODUCTFOUND
+
-
-
- WIXUI_EXITDIALOGOPTIONALCHECKBOX = 1 and NOT Installed
+
+
+ WIXUI_EXITDIALOGOPTIONALCHECKBOX = 1 and NOT Installed
-
+
-
-
-
-
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-
-
-
+
+
+
-
+
diff --git a/src/calibre/ebooks/lrf/html/demo/demo.html b/src/calibre/ebooks/lrf/html/demo/demo.html
index 7d2f783ccc..37bed69b88 100644
--- a/src/calibre/ebooks/lrf/html/demo/demo.html
+++ b/src/calibre/ebooks/lrf/html/demo/demo.html
@@ -1,187 +1,279 @@
-
Demo of html2lrf
-
- This document contains a demonstration of the capabilities of html2lrf, the HTML to LRF converter from calibre. To obtain calibre visit http://calibre-ebook.com
-
This document contains a demonstration of the capabilities of html2lrf, the HTML to LRF
+converter from calibre. To obtain calibre visit
+http://calibre-ebook.com
- html2lrf supports both rowspan and colspan, but no other HTML table attributes, as it uses its own algorithm to determine optimal placement of cells.
-
-
- Note that if you have custom fonts on your reader, the table may not be properly aligned. Also html2lrf does not support nested tables.
-
-
- On the next page you'll see a real life example taken from a Project Gutenberg text with no modifications. It shows off html2lrf's handling of rowspan and colspan.
-
- Here I demonstrate the use of inline images in the midst of text. Here is a small image embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block and finally we have a large image which is put on a page by itself. Try changing sizes from S to M to L and see how the images behave.
-
+
Nested lists
+
+
Item 1
+
+
Sub item 1
+
Sub item 2
+
+
Sub sub item 1. This is a multiline item with almost
+ correct blocking.
+
Sub sub item 2
+
+
+
Item 2
+
+
+
Definition Lists
+
+
Term 1
+
Definition of Term 1. A multi line definition showing correct
+ blocking.
This LRF file has been prepared by embedding Times New Roman and Andale Mono
- as the default serif and monospace fonts. This allows it to correctly display
- non English characters such as:
-
-
mouse in German: mūs
-
mouse in Russian: мышь
-
-
- Note that embedding fonts in LRF files slows down page turns slightly.
-
-
beautiful image based dropcaps to emphasize this
- paragraph. Image based dropcaps are specified by adding the class = 'libprs500_dropcaps'
- attribute to an <img> tag.
-
-
-
This is a plain text based dropcaps. It
- is not nearly as dramatic, but easier to code ;-)
-
-
-
-
This is an Example of small-caps.
- It can also be used to highlight the start of a paragraph very effectively.
-
-
-
A paragraph with a hanging indent. This is especially
- useful for highly structured text like verse, or dialogue.
html2lrf supports both rowspan and colspan, but no other HTML
+table attributes, as it uses its own algorithm to determine optimal
+placement of cells.
+
Note that if you have custom fonts on your reader, the table may
+not be properly aligned. Also html2lrf does not support nested tables.
+
On the next page you'll see a
+real life example taken from a Project Gutenberg text with no
+modifications. It shows off html2lrf's handling of rowspan and colspan.
+
- html2lrf follows links in HTML files that point to other files, recursively. Thus it can be used to convert a whole tree of HTML files into a single LRF file.
-
-
Here I demonstrate the use of inline images in the midst of text.
+Here is a small image embedded in a sentence.
+Now we have a slightly larger image that is automatically put in its own
+block and finally
+we have a large image which is put on a page by itself. Try changing
+sizes from S to M to L and see how the images behave.
This LRF file has been prepared by embedding Times New Roman and
+Andale Mono as the default serif and monospace fonts. This allows it to
+correctly display non English characters such as:
+
+
mouse in German: mūs
+
mouse in Russian: мышь
+
+
Note that embedding fonts in LRF files slows down page turns
+slightly.
+
beautiful image
+based dropcaps to emphasize this paragraph. Image based dropcaps are
+specified by adding the class = 'libprs500_dropcaps'
+attribute to an <img> tag.
+
+
+
This is a plain text based dropcaps. It is not
+nearly as dramatic, but easier to code ;-)
+
+
+
This is an Example
+of small-caps. It can also be used to highlight the start of a paragraph
+very effectively.
+
+
A paragraph with a hanging indent. This is
+especially useful for highly structured text like verse, or dialogue.
+
html2lrf follows links in
+HTML files that point to other files, recursively. Thus it can be used
+to convert a whole tree of HTML files into a single LRF file.
+
+
+
{% endblock %}
diff --git a/src/calibre/manual/xpath.xhtml b/src/calibre/manual/xpath.xhtml
index 7468e3d856..3a78863236 100644
--- a/src/calibre/manual/xpath.xhtml
+++ b/src/calibre/manual/xpath.xhtml
@@ -1,19 +1,19 @@
-
- A very short ebook
-
-
-
-
A very short ebook
-
Written by Kovid Goyal
-
-
A very short ebook to demonstrate the use of XPath.
-
+
+A very short ebook
+
+
+
+
A very short ebook
+
Written by Kovid Goyal
+
+
A very short ebook to demonstrate the use of XPath.
+
-
Chapter One
-
This is a truly fascinating chapter.
+
Chapter One
+
This is a truly fascinating chapter.
-
Chapter Two
-
A worthy continuation of a fine tradition.
-
+
Chapter Two
+
A worthy continuation of a fine tradition.
+
From ae8fcb1fd4579026c55f8ee6686fcc096b861b30 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 10 Aug 2010 13:07:29 +0200
Subject: [PATCH 012/163] Correct error with setup.py
---
setup.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 99 insertions(+)
create mode 100644 setup.py
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000..d8bd0267ee
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import sys, os, optparse
+
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+import setup.commands as commands
+from setup import prints, get_warnings
+
+def check_version_info():
+ vi = sys.version_info
+ if vi[0] == 2 and vi[1] > 5:
+ return None
+ return 'calibre requires python >= 2.6'
+
+def option_parser():
+ parser = optparse.OptionParser()
+ parser.add_option('-c', '--clean', default=False, action='store_true',
+ help=('Instead of running the command delete all files generated '
+ 'by the command'))
+ parser.add_option('--clean-backups', default=False, action='store_true',
+ help='Delete all backup files from the source tree')
+ parser.add_option('--clean-all', default=False, action='store_true',
+ help='Delete all machine generated files from the source tree')
+ return parser
+
+def clean_backups():
+ for root, _, files in os.walk('.'):
+ for name in files:
+ for t in ('.pyc', '.pyo', '~', '.swp', '.swo'):
+ if name.endswith(t):
+ os.remove(os.path.join(root, name))
+
+
+def main(args=sys.argv):
+ if len(args) == 1 or args[1] in ('-h', '--help'):
+ print 'Usage: python', args[0], 'command', '[options]'
+ print '\nWhere command is one of:'
+ print
+ for x in sorted(commands.__all__):
+ print '%-20s -'%x,
+ c = getattr(commands, x)
+ desc = getattr(c, 'short_description', c.description)
+ print desc
+
+ print '\nTo get help on a particular command, run:'
+ print '\tpython', args[0], 'command -h'
+ return 1
+
+ command = args[1]
+ if command not in commands.__all__:
+ print command, 'is not a recognized command.'
+ print 'Valid commands:', ', '.join(commands.__all__)
+ return 1
+
+ command = getattr(commands, command)
+
+ parser = option_parser()
+ command.add_all_options(parser)
+ parser.set_usage('Usage: python setup.py %s [options]\n\n'%args[1]+\
+ command.description)
+
+ opts, args = parser.parse_args(args)
+
+ if opts.clean_backups:
+ clean_backups()
+
+ if opts.clean:
+ prints('Cleaning', args[1])
+ command.clean()
+ return 0
+
+ if opts.clean_all:
+ for cmd in commands.__all__:
+ prints('Cleaning', cmd)
+ getattr(commands, cmd).clean()
+ return 0
+
+ command.run_all(opts)
+
+ warnings = get_warnings()
+ if warnings:
+ print
+ prints('There were', len(warnings), 'warning(s):')
+ print
+ for args, kwargs in warnings:
+ prints('*', *args, **kwargs)
+ print
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
From 7c70914ad30fc358bfcd7c099494b0a43682ba27 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 12 Aug 2010 16:25:09 +0200
Subject: [PATCH 013/163] Global overhaul of rtf2xml: RTFfixes (3) ->removal of
preprocessing, first draft of tokenize finished, introduction of \ud:\upr for
unicode
---
src/calibre/ebooks/rtf2xml/tokenize.py | 104 +++++++++++++++----------
1 file changed, 64 insertions(+), 40 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 3aa2079fb3..e594fed80d 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -26,7 +26,7 @@ class Tokenize:
in_file,
bug_handler,
copy = None,
- #run_level = 1,
+ run_level = 1,
):
self.__file = in_file
self.__bug_handler = bug_handler
@@ -37,17 +37,22 @@ class Tokenize:
self.__uc_char = 0
self.__uc_bin = False
self.__uc_value = [1]
-
- def __from_ms_to_utf8(self,match_obj):
- uni_char = int(match_obj.group(1))
- if uni_char < 0:
- uni_char += 65536
- return '' + str('%X' % uni_char) + ';'
-
+
def __reini_utf8_counters(self):
self.__uc_char = 0
self.__uc_bin = False
+ def __remove_uc_chars(self, startchar, token):
+ for i in xrange(startchar, len(token)):
+ if token[i] == " ":
+ continue
+ elif self.__uc_char:
+ self.__uc_char -= 1
+ else:
+ return token[i:]
+ #if only " " and char to skip
+ return ''
+
def __unicode_process(self, token):
#change scope in
if token == '\{':
@@ -55,9 +60,9 @@ class Tokenize:
#basic error handling
self.__reini_utf8_counters()
return token
- #change scope out: evaluate dict and rebuild
+ #change scope out
elif token == '\}':
- #self.__uc_value.pop()
+ self.__uc_value.pop()
self.__reini_utf8_counters()
return token
#add a uc control
@@ -65,58 +70,65 @@ class Tokenize:
self.__uc_value[-1] = int(token[3:])
self.__reini_utf8_counters()
return token
- #handle uc skippable char
+ #bin data to slip
+ elif self.__uc_bin:
+ self.__uc_bin = False
+ return ''
+ #uc char to remove
elif self.__uc_char:
- #if token[:1] == "\" and token[:1] == "\"
- pass
+ #handle \bin tag in case of uc char to skip
+ if token[:4] == '\bin':
+ self.__uc_char -=1
+ self.__uc_bin = True
+ return ''
+ elif token[:1] == "\\" :
+ self.__uc_char -=1
+ return ''
+ else:
+ return self.__remove_uc_chars(0, token)
#go for real \u token
match_obj = self.__utf_exp.match(token)
if match_obj is not None:
+ self.__reini_utf8_counters()
#get value and handle negative case
uni_char = int(match_obj.group(1))
uni_len = len(match_obj.group(1)) + 2
if uni_char < 0:
uni_char += 65536
uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
- #if not uc0
- if self.__uc_value[-1]:
- self.__uc_char = self.__uc_value[-1]
+ self.__uc_char = self.__uc_value[-1]
#there is only an unicode char
if len(token)<= uni_len:
return uni_char
#an unicode char and something else
#must be after as it is splited on \
- elif not self.__uc_value[-1]:
- print('not only token uc0 token: ' + uni_char + token[uni_len:])
+ #necessary? maybe for \bin?
+ elif not self.__uc_char:
return uni_char + token[uni_len:]
#if not uc0 and chars
else:
- for i in xrange(uni_len, len(token)):
- if token[i] == " ":
- continue
- elif self.__uc_char > 0:
- self.__uc_char -= 1
- else:
- return uni_char + token[i:]
- #print('uc: ' + str(self.__uc_value) + 'uni: ' + str(uni_char) + 'token: ' + token)
+ return uni_char + self.__remove_uc_chars(uni_len, token)
#default
return token
-
+
def __sub_reg_split(self,input_file):
input_file = self.__replace_spchar.mreplace(input_file)
- #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
- # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
- # this is for older RTF
- #line = re.sub(self.__par_exp, '\\par ', line)
- input_file = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", input_file)
+ input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
+ input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
+ #remove \n in bin data
+ input_file = self.__bin_exp.sub(lambda x: \
+ x.group().replace('\n', '') +'\n', input_file)
#split
tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
+ #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
+ # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
+ # this is for older RTF
+ #line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens))
-
-
+
def __compile_expressions(self):
SIMPLE_RPL = {
"\\\\": "\\backslash ",
@@ -145,18 +157,25 @@ class Tokenize:
r'\\$': '\\par ',
}
self.__replace_spchar = MReplace(SIMPLE_RPL)
+ #add ;? in case of char following \u
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
- self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}") #modify this
- #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
+ self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
+ self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
+ #manage upr/ud situations
+ self.__utf_ud = re.compile(r"\\{[\n ]?\\upr[\n ]?(?:\\{.*?\\})[\n ]?" + \
+ r"\\{[\n ]?\\*[\n ]?\\ud[\n ]?(\\{.*?\\})[\n ]?\\}[\n ]?\\}")
#add \n in split for whole file reading
- #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#why keep backslash whereas \is replaced before?
+ #remove \n from endline char
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
+ #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
+ #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
+ #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__par_exp = re.compile(r'\\$')
#self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
-
+
def tokenize(self):
"""Main class for handling other methods. Reads the file \
, uses method self.sub_reg to make basic substitutions,\
@@ -170,9 +189,9 @@ class Tokenize:
#remove '' and \n in the process
tokens = self.__sub_reg_split(input_file)
#correct unicode
- #tokens = map(self.__unicode_process, tokens)
+ tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc
- #tokens = filter(lambda x: len(x) > 0, tokens)
+ tokens = filter(lambda x: len(x) > 0, tokens)
#write
write_obj = open(self.__write_to, 'wb')
@@ -241,4 +260,9 @@ class Tokenize:
neg_uni_char = int(match_obj.group(1)) * -1
# sys.stderr.write(str( neg_uni_char))
uni_char = neg_uni_char + 65536
+ return '' + str('%X' % uni_char) + ';'''
+ '''def __from_ms_to_utf8(self,match_obj):
+ uni_char = int(match_obj.group(1))
+ if uni_char < 0:
+ uni_char += 65536
return '' + str('%X' % uni_char) + ';'''
\ No newline at end of file
From b9ed0c6b3d579f1dc2e2c5b94df5e2e8f9ec75d4 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 12 Aug 2010 17:16:37 +0200
Subject: [PATCH 014/163] Global overhaul of rtf2xml: RTFfixes (4) ->minors
corrections in line endings and check brackets, move check encoding first to
eliminate non ascii RTF
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 40 ++++++++++----------
src/calibre/ebooks/rtf2xml/check_brackets.py | 1 -
src/calibre/ebooks/rtf2xml/check_encoding.py | 10 +++--
src/calibre/ebooks/rtf2xml/line_endings.py | 11 ++++--
4 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 3a804792c5..76bdcc08af 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -133,7 +133,6 @@ class ParseRtf:
self.__temp_dir = out_dir
self.__dtd_path = dtd
self.__check_file(in_file,"file_to_parse")
- self.__check_ascii(in_file)
self.__char_data = char_data
self.__debug_dir = deb_dir
self.__check_dir(self.__temp_dir)
@@ -152,6 +151,7 @@ class ParseRtf:
self.__group_borders = group_borders
self.__empty_paragraphs = empty_paragraphs
self.__no_dtd = no_dtd
+
def __check_file(self, the_file, type):
"""Check to see if files exist"""
if hasattr(the_file, 'read'): return
@@ -164,6 +164,7 @@ class ParseRtf:
else:
msg = "\nThe file '%s' cannot be found" % the_file
raise RtfInvalidCodeException, msg
+
def __check_dir(self, the_dir):
"""Check to see if directory exists"""
if not the_dir :
@@ -173,15 +174,7 @@ class ParseRtf:
msg = "\n%s is not a directory" % the_dir
raise RtfInvalidCodeException, msg
return 1
- def __check_ascii(self, the_file):
- """Check to see if the file is correct ascii"""
- try:
- test = codecs.open(the_file, 'r', 'ascii', 'strict')
- test.close()
- except UnicodeError:
- msg = "\n%s is not a correct ascii file" % the_file
- raise RtfInvalidCodeException, msg
- return 1
+
def parse_rtf(self):
"""
Parse the file by calling on other classes.
@@ -192,6 +185,18 @@ class ParseRtf:
depending on the value of 'output' when the instance was created.
"""
self.__temp_file = self.__make_temp_file(self.__file)
+ #Check to see if the file is correct ascii first
+ check_encoding_obj = check_encoding.CheckEncoding(
+ bug_handler = RtfInvalidCodeException,
+ )
+ if check_encoding_obj.check_encoding(self.__file):
+ try:
+ os.remove(self.__temp_file)
+ except OSError:
+ pass
+ sys.stderr.write('File "%s" does not appear to be ascii.\n' \
+ % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
+ raise InvalidRtfException
# if the self.__deb_dir is true, then create a copy object,
# set the directory to write to, remove files, and copy
# the new temporary file to this directory
@@ -214,7 +219,7 @@ class ParseRtf:
in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException,
copy = self.__copy,
- #run_level = self.__run_level,
+ run_level = self.__run_level,
replace_illegals = self.__replace_illegals,
)
line_obj.fix_endings()
@@ -223,8 +228,8 @@ class ParseRtf:
tokenize_obj = tokenize.Tokenize(
bug_handler = RtfInvalidCodeException,
in_file = self.__temp_file,
- copy = self.__copy,)
- #run_level = self.__run_level,)
+ copy = self.__copy,
+ run_level = self.__run_level)
tokenize_obj.tokenize()
process_tokens_obj = process_tokens.ProcessTokens(
in_file = self.__temp_file,
@@ -240,10 +245,6 @@ class ParseRtf:
os.remove(self.__temp_file)
except OSError:
pass
- check_encoding_obj = check_encoding.CheckEncoding(
- bug_handler = RtfInvalidCodeException,
- )
- check_encoding_obj.check_encoding(self.__file)
sys.stderr.write('File "%s" does not appear to be RTF.\n' % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo(
@@ -548,8 +549,7 @@ class ParseRtf:
"""Make a temporary file to parse"""
write_file="rtf_write_file"
read_obj = file if hasattr(file, 'read') else open(file,'r')
- write_obj = open(write_file, 'w')
- for line in read_obj:
- write_obj.write(line)
+ write_obj = open(write_file, 'wb')
+ write_obj.write(read_obj.read())
write_obj.close()
return write_file
\ No newline at end of file
diff --git a/src/calibre/ebooks/rtf2xml/check_brackets.py b/src/calibre/ebooks/rtf2xml/check_brackets.py
index 53f9363d63..8917780746 100755
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@@ -30,7 +30,6 @@ class CheckBrackets:
self.__bracket_count += 1
def close_brack(self, line):
num = line[-5:-1]
- ##self.__open_bracket_num.append(num)
try:
last_num = self.__open_bracket_num.pop()
except:
diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py
index f6810e4909..1f8645bb0c 100755
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@@ -14,12 +14,11 @@ class CheckEncoding:
sys.stderr.write(str(msg) + '\n')
def check_encoding(self, path, encoding='us-ascii'):
read_obj = open(path, 'r')
- line_to_read = 1
+ input_file = read_obj.read()
+ read_obj.close()
line_num = 0
- while line_to_read:
+ for line in input_file:
line_num += 1
- line_to_read = read_obj.readline()
- line = line_to_read
try:
line.decode(encoding)
except UnicodeError:
@@ -27,6 +26,9 @@ class CheckEncoding:
self.__get_position_error(line, encoding, line_num)
else:
sys.stderr.write('line: %d has bad encoding\n'%line_num)
+ return True
+ return False
+
if __name__ == '__main__':
check_encoding_obj = CheckEncoding()
check_encoding_obj.check_encoding(sys.argv[1])
diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py
index e77e5d747c..86546967a7 100755
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@@ -23,7 +23,7 @@ class FixLineEndings:
bug_handler,
in_file = None,
copy = None,
- #run_level = 1, calibre why keep it?
+ run_level = 1,
replace_illegals = 1,
):
self.__file = in_file
@@ -32,8 +32,11 @@ class FixLineEndings:
self.__write_to = tempfile.mktemp()
self.__replace_illegals = replace_illegals
def fix_endings(self):
- illegal_regx = re.compile('\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
- # always check since I have to get rid of illegal characters
+ #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
+ #always check since I have to get rid of illegal characters
+ chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
+ illegal_regx = re.compile(u'|'.join(map(unichr, chars)))
+ #illegal_regx = re.compile('\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
#read
read_obj = open(self.__file, 'r')
input_file = read_obj.read()
@@ -42,7 +45,7 @@ class FixLineEndings:
input_file = input_file.replace ('\r\n', '\n')
input_file = input_file.replace ('\r', '\n')
if self.__replace_illegals:
- input_file = re.sub(illegal_regx, '', input_file)
+ input_file = illegal_regx.sub('', input_file)
#write
write_obj = open(self.__write_to, 'wb')
write_obj.write(input_file)
From a9fd0ad4ba9acdcc07d5bfcae503c378c25a7303 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 16 Aug 2010 10:08:59 +0200
Subject: [PATCH 015/163] Global overhaul of rtf2xml: RTFfixes (5) ->minors
corrections and regression correction
---
src/calibre/ebooks/rtf/input.py | 2 +-
src/calibre/ebooks/rtf2xml/ParseRtf.py | 14 +-
src/calibre/ebooks/rtf2xml/check_encoding.py | 11 +-
src/calibre/ebooks/rtf2xml/copy.py | 14 +-
src/calibre/ebooks/rtf2xml/process_tokens.py | 163 ++++++++++---------
5 files changed, 104 insertions(+), 100 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 824da7d6f1..f4fbdf411c 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -50,7 +50,7 @@ class RTFInput(InputFormatPlugin):
parser = ParseRtf(
in_file = stream,
out_file = ofile,
- deb_dir = 'I:\\Calibre\\rtfdebug',
+ deb_dir = 'D:\\calibre\\pierre\\debug\\rtfdebug',
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 76bdcc08af..1230ae150e 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -120,8 +120,6 @@ class ParseRtf:
script tries to output to directory where is script is exectued.)
'deb_dir' --debug directory. If a debug_dir is provided, the script
will copy each run through as a file to examine in the debug_dir
- 'perl_script'--use perl to make tokens. This runs just a bit faster.
- (I will probably phase this out.)
'check_brackets' -- make sure the brackets match up after each run
through a file. Only for debugging.
Returns: Nothing
@@ -142,7 +140,7 @@ class ParseRtf:
self.__convert_wingdings = convert_wingdings
self.__convert_zapf = convert_zapf
self.__run_level = run_level
- #self.__exit_level = 0
+ #self.__exit_level = 0 See what this means and if it is consistent
self.__indent = indent
self.__replace_illegals = replace_illegals
self.__form_lists = form_lists
@@ -184,19 +182,15 @@ class ParseRtf:
A parsed file in XML, either to standard output or to a file,
depending on the value of 'output' when the instance was created.
"""
- self.__temp_file = self.__make_temp_file(self.__file)
#Check to see if the file is correct ascii first
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException,
)
if check_encoding_obj.check_encoding(self.__file):
- try:
- os.remove(self.__temp_file)
- except OSError:
- pass
sys.stderr.write('File "%s" does not appear to be ascii.\n' \
% self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
raise InvalidRtfException
+ self.__temp_file = self.__make_temp_file(self.__file)
# if the self.__deb_dir is true, then create a copy object,
# set the directory to write to, remove files, and copy
# the new temporary file to this directory
@@ -223,7 +217,6 @@ class ParseRtf:
replace_illegals = self.__replace_illegals,
)
line_obj.fix_endings()
- #return_value = line_obj.fix_endings() #calibre: no return in this function, why keep it?
#self.__return_code(return_value)
tokenize_obj = tokenize.Tokenize(
bug_handler = RtfInvalidCodeException,
@@ -550,6 +543,7 @@ class ParseRtf:
write_file="rtf_write_file"
read_obj = file if hasattr(file, 'read') else open(file,'r')
write_obj = open(write_file, 'wb')
- write_obj.write(read_obj.read())
+ for line in read_obj:
+ write_obj.write(line)
write_obj.close()
return write_file
\ No newline at end of file
diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py
index 1f8645bb0c..444fd373e4 100755
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@@ -14,10 +14,10 @@ class CheckEncoding:
sys.stderr.write(str(msg) + '\n')
def check_encoding(self, path, encoding='us-ascii'):
read_obj = open(path, 'r')
- input_file = read_obj.read()
- read_obj.close()
+
line_num = 0
- for line in input_file:
+ error_found = False
+ for line in read_obj:
line_num += 1
try:
line.decode(encoding)
@@ -26,8 +26,9 @@ class CheckEncoding:
self.__get_position_error(line, encoding, line_num)
else:
sys.stderr.write('line: %d has bad encoding\n'%line_num)
- return True
- return False
+ error_found = True
+ read_obj.close()
+ return error_found
if __name__ == '__main__':
check_encoding_obj = CheckEncoding()
diff --git a/src/calibre/ebooks/rtf2xml/copy.py b/src/calibre/ebooks/rtf2xml/copy.py
index ff029c1841..1b620b9fbf 100755
--- a/src/calibre/ebooks/rtf2xml/copy.py
+++ b/src/calibre/ebooks/rtf2xml/copy.py
@@ -23,6 +23,7 @@ class Copy:
def __init__(self, bug_handler, file = None, deb_dir = None, ):
self.__file = file
self.__bug_handler = bug_handler
+
def set_dir(self, deb_dir):
"""Set the temporary directory to write files to"""
if deb_dir is None:
@@ -33,19 +34,11 @@ class Copy:
message = "%(deb_dir)s is not a directory" % vars()
raise self.__bug_handler , message
Copy.__dir = deb_dir
+
def remove_files(self ):
"""Remove files from directory"""
self.__remove_the_files(Copy.__dir)
- """
- list_of_files = os.listdir(Copy.__dir)
- list_of_files = os.listdir(the_dir)
- for file in list_of_files:
- rem_file = os.path.join(Copy.__dir,file)
- if os.path.isdir(rem_file):
- self.remove_files(rem_file)
- else:
- os.remove(rem_file)
- """
+
def __remove_the_files(self, the_dir):
"""Remove files from directory"""
list_of_files = os.listdir(the_dir)
@@ -58,6 +51,7 @@ class Copy:
os.remove(rem_file)
except OSError:
pass
+
def copy_file(self, file, new_file):
"""
Copy the file to a new name
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 072d8b02e4..2c5c0c7df0 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -735,8 +735,94 @@ class ProcessTokens:
pre, token, action = self.dict_token.get(token, (None, None, None))
if action:
return action(pre, token, num)
- # unused function
- def initiate_token_actions(self):
+
+ def __check_brackets(self, in_file):
+ self.__check_brack_obj = check_brackets.CheckBrackets\
+ (file = in_file)
+ good_br = self.__check_brack_obj.check_brackets()[0]
+ if not good_br:
+ return 1
+ def process_tokens(self):
+ """Main method for handling other methods. """
+
+ read_obj= open(self.__file, 'r')
+ write_obj = open(self.__write_to, 'wb')
+
+ '''first_token = 0
+ second_token = 0'''
+ line_count = 0
+
+ for line in read_obj:
+ token = line.replace("\n","")
+ #calibre not necessary normaly, fixed in tokenize
+ '''if not token:
+ continue'''
+ line_count += 1
+ #calibre not necessary, encoding checked before
+ """try:
+ token.decode('us-ascii')
+ except UnicodeError, msg:
+ msg = str(msg)
+ msg += 'Invalid RTF: File not ascii encoded.\n'
+ raise self.__exception_handler, msg"""
+ #calibre: with tokenize, should be first and second line, why bother?
+ """if not first_token:
+ if token != '\\{':
+ msg = 'Invalid RTF: document doesn\'t start with {\n'
+ raise self.__exception_handler, msg
+ first_token = 1
+ elif line_count == and not second_token:
+ if token[0:4] != '\\rtf':
+ msg ='Invalid RTF: document doesn\'t start with \\rtf \n'
+ raise self.__exception_handler, msg
+ second_token = 1"""
+ if line_count == 1 and token != '\\{':
+ msg = 'Invalid RTF: document doesn\'t start with {\n'
+ raise self.__exception_handler, msg
+ elif line_count == 2 and token[0:4] != '\\rtf':
+ msg ='Invalid RTF: document doesn\'t start with \\rtf \n'
+ raise self.__exception_handler, msg
+
+ ##token = self.evaluate_token(token)
+ the_index = token.find('\\ ')
+ if token is not None and the_index > -1:
+ msg ='Invalid RTF: token "\\ " not valid.\n'
+ raise self.__exception_handler, msg
+ elif token[:1] == "\\":
+ line = self.process_cw(token)
+ if line is not None:
+ write_obj.write(line)
+ else:
+ fields = re.split(self.__utf_exp, token)
+ for field in fields:
+ if not field:
+ continue
+ if field[0:1] == '&':
+ write_obj.write('tx -1:
- msg ='Invalid RTF: token "\\ " not valid. \n'
- raise self.__exception_handler, msg
- elif token[0:1] == "\\":
- line = self.process_cw(token)
- if line != None:
- write_obj.write(line)
- else:
- fields = re.split(self.__utf_exp, token)
- for field in fields:
- if not field:
- continue
- if field[0:1] == '&':
- write_obj.write('tx
Date: Sun, 26 Sep 2010 17:49:59 +0200
Subject: [PATCH 016/163] Modif debug
---
src/calibre/ebooks/rtf/input.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 1de064df5c..4c7dfd9260 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -51,7 +51,7 @@ class RTFInput(InputFormatPlugin):
parser = ParseRtf(
in_file = stream,
out_file = ofile,
- deb_dir = 'D:\\calibre\\pierre\\debug\\rtfdebug',
+ deb_dir = 'H:\\Temp\\Calibre\\rtfdebug',
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
From 9590ba62348930d93c496e507549a8c97d43ef16 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 11 Oct 2010 00:35:07 +0200
Subject: [PATCH 017/163] isbndb.py minor changes
---
src/calibre/ebooks/metadata/isbndb.py | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index 221cfc13d1..2bbffc2c8b 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -74,14 +74,14 @@ class ISBNDBMetadata(Metadata):
if authors:
self.authors = authors
try:
- self.author_sort = self.tostring(book.find('authors').find('person'))
+ self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
pass
- self.publisher = self.tostring(book.find('publishertext'))
+ self.publisher = tostring(book.find('publishertext'))
- summ = self.tostring(book.find('summary'))
+ summ = tostring(book.find('summary'))
if summ:
self.comments = 'SUMMARY:\n'+summ
@@ -141,7 +141,7 @@ def create_books(opts, args, timeout=5.):
print ('ISBNDB query: '+url)
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
- ans = []
+ '''ans = []
for x in tans:
add = True
for y in ans:
@@ -149,7 +149,9 @@ def create_books(opts, args, timeout=5.):
add = False
if add:
ans.append(x)
- return ans
+ return ans'''
+ #remove duplicates ISBN
+ return dict((book.isbn, book) for book in tans).values()
def main(args=sys.argv):
parser = option_parser()
From 19288b38acd138e4e3702845e1b1b61ef82c0d2d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 11 Oct 2010 00:36:26 +0200
Subject: [PATCH 018/163] Merge from trunk
---
resources/catalog/stylesheet.css | 142 ++++++++++++++--------------
resources/content_server/index.html | 6 +-
resources/templates/fb2.xsl | 97 ++++++++++---------
resources/templates/html.css | 35 +++++--
4 files changed, 154 insertions(+), 126 deletions(-)
diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
index 4f9ca9ac41..ea01aeb43b 100644
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@@ -1,102 +1,104 @@
-body { background-color: white; }
+body {
+ background-color: white;
+}
-p.title {
- margin-top:0em;
- margin-bottom:1em;
- text-align:center;
- font-style:italic;
- font-size:xx-large;
- border-bottom: solid black 4px;
- }
+p.title {
+ margin-top: 0em;
+ margin-bottom: 1em;
+ text-align: center;
+ font-style: italic;
+ font-size: xx-large;
+ border-bottom: solid black 4px;
+}
p.author {
- margin-top:0em;
- margin-bottom:0em;
+ margin-top: 0em;
+ margin-bottom: 0em;
text-align: left;
text-indent: 1em;
- font-size:large;
- }
+ font-size: large;
+}
p.tags {
- margin-top:0em;
- margin-bottom:0em;
+ margin-top: 0em;
+ margin-bottom: 0em;
text-align: left;
text-indent: 1em;
- font-size:small;
- }
+ font-size: small;
+}
p.description {
- text-align:left;
- font-style:normal;
+ text-align: left;
+ font-style: normal;
margin-top: 0em;
- }
+}
p.date_index {
- font-size:x-large;
- text-align:center;
- font-weight:bold;
- margin-top:1em;
- margin-bottom:0px;
- }
+ font-size: x-large;
+ text-align: center;
+ font-weight: bold;
+ margin-top: 1em;
+ margin-bottom: 0px;
+}
p.letter_index {
- font-size:x-large;
- text-align:center;
- font-weight:bold;
- margin-top:1em;
- margin-bottom:0px;
- }
+ font-size: x-large;
+ text-align: center;
+ font-weight: bold;
+ margin-top: 1em;
+ margin-bottom: 0px;
+}
p.author_index {
- font-size:large;
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
+ font-size: large;
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
text-indent: 0em;
- }
+}
p.series {
text-align: left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:2em;
- text-indent:-2em;
- }
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 2em;
+ text-indent: -2em;
+}
p.read_book {
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:2em;
- text-indent:-2em;
- }
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 2em;
+ text-indent: -2em;
+}
p.unread_book {
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:2em;
- text-indent:-2em;
- }
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 2em;
+ text-indent: -2em;
+}
p.date_read {
- text-align:left;
- margin-top:0px;
- margin-bottom:0px;
- margin-left:6em;
- text-indent:-6em;
- }
+ text-align: left;
+ margin-top: 0px;
+ margin-bottom: 0px;
+ margin-left: 6em;
+ text-indent: -6em;
+}
hr.series_divider {
- width:50%;
- margin-left:1em;
- margin-top:0em;
- margin-bottom:0em;
- }
+ width: 50%;
+ margin-left: 1em;
+ margin-top: 0em;
+ margin-bottom: 0em;
+}
hr.annotations_divider {
- width:50%;
- margin-left:1em;
- margin-top:0em;
- margin-bottom:0em;
- }
+ width: 50%;
+ margin-left: 1em;
+ margin-top: 0em;
+ margin-bottom: 0em;
+}
\ No newline at end of file
diff --git a/resources/content_server/index.html b/resources/content_server/index.html
index ff11acc719..1bc13096d5 100644
--- a/resources/content_server/index.html
+++ b/resources/content_server/index.html
@@ -29,9 +29,9 @@
-
-
-
+
+
+
+
-
+
-
+
@@ -51,37 +58,37 @@
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
+
-
+
-
+
diff --git a/resources/templates/html.css b/resources/templates/html.css
index 448ec596b9..bfbb646afb 100644
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@@ -35,9 +35,9 @@
*
* ***** END LICENSE BLOCK ***** */
@
-namespace url (http: //www.w3.org /1999/xhtml);
- @namespace svg url (http: //www.w3.org /2000/svg);
- /* blocks */
+namespace url (http: //www.w3.org /1999/xhtml);
+ @namespace svg url (http: //www.w3.org /2000/svg);
+ /* blocks */
html,div,map,dt,isindex,form {
display: block;
@@ -161,10 +161,29 @@ table[align="right"] {
float: right;
}
-table[rules]:not ([rules="none"] ) {
- border-collapse: collapse;
-}
+table
+[
+rules
+]
+:not
+
+(
+[
+rules
+=
+"none"
+]
+
+)
+{
+border-collapse
+:
+
+collapse
+;
+
+}
/* caption inherits from table not table-outer */
caption {
display: table-caption;
@@ -322,7 +341,7 @@ ol ol ul,ol ul ul,ol menu ul,ol dir ul,ol ol menu,ol ul menu,ol menu menu,ol dir
}
/* leafs */
- /* noshade and color attributes are handled completely by
+/* noshade and color attributes are handled completely by
* the nsHTMLHRElement attribute mapping code
*/
hr {
@@ -381,7 +400,7 @@ br {
}
/* Images, embedded object, and SVG size defaults */
-img,object,svg |svg {
+img,object,svg |svg {
width: auto;
height: auto;
}
From 282c6aaa49006086c0887115edd3da1381d663e9 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 15 Oct 2010 08:45:09 +0200
Subject: [PATCH 019/163] Minor modification to isbndb.py
---
src/calibre/ebooks/metadata/isbndb.py | 15 ++-------------
1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index 2bbffc2c8b..615b4ab818 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -90,10 +90,8 @@ def build_isbn(base_url, opts):
return base_url + 'index1=isbn&value1='+opts.isbn
def build_combined(base_url, opts):
- query = ''
- for e in (opts.title, opts.author, opts.publisher):
- if e is not None:
- query += ' ' + e
+ query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
+ if e is not None ])
query = query.strip()
if len(query) == 0:
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
@@ -141,15 +139,6 @@ def create_books(opts, args, timeout=5.):
print ('ISBNDB query: '+url)
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
- '''ans = []
- for x in tans:
- add = True
- for y in ans:
- if y.isbn == x.isbn:
- add = False
- if add:
- ans.append(x)
- return ans'''
#remove duplicates ISBN
return dict((book.isbn, book) for book in tans).values()
From 18d2c55d4bccfaff1b32416a7fe7c7507dcaee0b Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 19 Oct 2010 23:10:34 +0200
Subject: [PATCH 020/163] Modify single metadata display to include summary and
covers check
---
src/calibre/gui2/dialogs/fetch_metadata.py | 8 +-
src/calibre/gui2/dialogs/fetch_metadata.ui | 344 ++++++++++-----------
2 files changed, 179 insertions(+), 173 deletions(-)
diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py
index eb6edce75d..950f014442 100644
--- a/src/calibre/gui2/dialogs/fetch_metadata.py
+++ b/src/calibre/gui2/dialogs/fetch_metadata.py
@@ -48,7 +48,7 @@ class Matches(QAbstractTableModel):
return len(self.matches)
def columnCount(self, *args):
- return 6
+ return 8
def headerData(self, section, orientation, role):
if role != Qt.DisplayRole:
@@ -61,6 +61,8 @@ class Matches(QAbstractTableModel):
elif section == 3: text = _("Publisher")
elif section == 4: text = _("ISBN")
elif section == 5: text = _("Published")
+ elif section == 6: text = _("Cover?")
+ elif section == 7: text = _("Summary?")
return QVariant(text)
else:
@@ -87,6 +89,10 @@ class Matches(QAbstractTableModel):
elif col == 5:
if hasattr(book.pubdate, 'timetuple'):
res = strftime('%b %Y', book.pubdate.timetuple())
+ elif col == 6 and book.has_cover:
+ res = 'OK'
+ elif col == 7 and book.comments:
+ res = 'OK'
if not res:
return NONE
return QVariant(res)
diff --git a/src/calibre/gui2/dialogs/fetch_metadata.ui b/src/calibre/gui2/dialogs/fetch_metadata.ui
index 03a362096c..c54ee66044 100644
--- a/src/calibre/gui2/dialogs/fetch_metadata.ui
+++ b/src/calibre/gui2/dialogs/fetch_metadata.ui
@@ -1,172 +1,172 @@
-
-
- FetchMetadata
-
-
- Qt::WindowModal
-
-
-
- 0
- 0
- 830
- 642
-
-
-
- Fetch metadata
-
-
-
- :/images/metadata.png:/images/metadata.png
-
-
-
-
-
- <p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and enter your access key below.
-
-
- Qt::AlignCenter
-
-
- true
-
-
- true
-
-
-
-
-
-
-
-
- &Access Key:
-
-
- key
-
-
-
-
-
-
-
-
-
- Fetch
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
-
- Matches
-
-
-
-
-
- Select the book that most closely matches your copy from the list below
-
-
-
-
-
-
-
- 0
- 1
-
-
-
- true
-
-
- QAbstractItemView::SingleSelection
-
-
- QAbstractItemView::SelectRows
-
-
-
-
-
-
-
-
-
-
-
-
- Download &social metadata (tags/rating/etc.) for the selected book
-
-
-
-
-
-
- Overwrite author and title with author and title of selected book
-
-
-
-
-
-
- QDialogButtonBox::Cancel|QDialogButtonBox::Ok
-
-
-
-
-
-
-
-
-
-
- buttonBox
- accepted()
- FetchMetadata
- accept()
-
-
- 460
- 599
-
-
- 657
- 530
-
-
-
-
- buttonBox
- rejected()
- FetchMetadata
- reject()
-
-
- 417
- 599
-
-
- 0
- 491
-
-
-
-
-
+
+
+ FetchMetadata
+
+
+ Qt::WindowModal
+
+
+
+ 0
+ 0
+ 890
+ 642
+
+
+
+ Fetch metadata
+
+
+
+ :/images/metadata.png:/images/metadata.png
+
+
+
+
+
+ <p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and enter your access key below.
+
+
+ Qt::AlignCenter
+
+
+ true
+
+
+ true
+
+
+
+
+
+
+
+
+ &Access Key:
+
+
+ key
+
+
+
+
+
+
+
+
+
+ Fetch
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+ Matches
+
+
+
+
+
+ Select the book that most closely matches your copy from the list below
+
+
+
+
+
+
+
+ 0
+ 1
+
+
+
+ true
+
+
+ QAbstractItemView::SingleSelection
+
+
+ QAbstractItemView::SelectRows
+
+
+
+
+
+
+
+
+
+
+
+
+ Download &social metadata (tags/rating/etc.) for the selected book
+
+
+
+
+
+
+ Overwrite author and title with author and title of selected book
+
+
+
+
+
+
+ QDialogButtonBox::Cancel|QDialogButtonBox::Ok
+
+
+
+
+
+
+
+
+
+
+ buttonBox
+ accepted()
+ FetchMetadata
+ accept()
+
+
+ 460
+ 599
+
+
+ 657
+ 530
+
+
+
+
+ buttonBox
+ rejected()
+ FetchMetadata
+ reject()
+
+
+ 417
+ 599
+
+
+ 0
+ 491
+
+
+
+
+
From b59631db5f348c2cba069ffc725251afc87a3a1c Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 24 Oct 2010 23:26:17 +0200
Subject: [PATCH 021/163] Add a get cover option which overwrite the cover if
one is available to metadata_single.py but needs to be modified to remember
the option
---
src/calibre/gui2/dialogs/fetch_metadata.ui | 11 +++++++++--
src/calibre/gui2/dialogs/metadata_single.py | 2 ++
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/calibre/gui2/dialogs/fetch_metadata.ui b/src/calibre/gui2/dialogs/fetch_metadata.ui
index c54ee66044..0b39089ee3 100644
--- a/src/calibre/gui2/dialogs/fetch_metadata.ui
+++ b/src/calibre/gui2/dialogs/fetch_metadata.ui
@@ -109,6 +109,13 @@
+
+
+
+ Overwrite author and title with author and title of selected book
+
+
+
@@ -117,9 +124,9 @@
-
+
- Overwrite author and title with author and title of selected book
+ Overwrite cover image with downloaded cover if available for the selected book
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index ef1bddca0c..65cfdf57d4 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -709,6 +709,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.title.setText(book.title)
self.authors.setText(authors_to_string(book.authors))
if book.author_sort: self.author_sort.setText(book.author_sort)
+ if d.opt_overwrite_cover_image.isChecked() and book.has_cover:
+ self.fetch_cover()
if book.publisher: self.publisher.setEditText(book.publisher)
if book.isbn: self.isbn.setText(book.isbn)
if book.pubdate:
From c7995f136f839c2719f5aada74c59239916bfd7f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 30 Oct 2010 18:11:50 +0200
Subject: [PATCH 022/163] Finishing the option of downloading cover in single
metadata and correcting a bug concerning option saving
---
src/calibre/gui2/__init__.py | 2 ++
src/calibre/gui2/dialogs/fetch_metadata.py | 8 ++++++++
2 files changed, 10 insertions(+)
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 4820bd251c..712c6b8a04 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -123,6 +123,8 @@ def _config():
help=_('Download social metadata (tags/rating/etc.)'))
c.add_opt('overwrite_author_title_metadata', default=True,
help=_('Overwrite author and title with new metadata'))
+ c.add_opt('overwrite_cover_image', default=False,
+ help=_('Overwrite cover with new new cover if existing'))
c.add_opt('enforce_cpu_limit', default=True,
help=_('Limit max simultaneous jobs to number of CPUs'))
c.add_opt('tag_browser_hidden_categories', default=set(),
diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py
index 35b5e576e6..a0ee250457 100644
--- a/src/calibre/gui2/dialogs/fetch_metadata.py
+++ b/src/calibre/gui2/dialogs/fetch_metadata.py
@@ -137,6 +137,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
self.fetch_metadata()
self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
+ self.opt_overwrite_cover_image.setChecked(config['overwrite_cover_image'])
def show_summary(self, current, *args):
@@ -219,6 +220,13 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
_hung_fetchers.add(self.fetcher)
if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
self._hangcheck.stop()
+ #option configure
+ if self.opt_get_social_metadata.isChecked() != config['get_social_metadata']:
+ config.set('get_social_metadata', self.opt_get_social_metadata.isChecked())
+ if self.opt_overwrite_author_title_metadata.isChecked() != config['overwrite_author_title_metadata']:
+ config.set('overwrite_author_title_metadata', self.opt_overwrite_author_title_metadata.isChecked())
+ if self.opt_overwrite_cover_image.isChecked() != config['overwrite_cover_image']:
+ config.set('overwrite_cover_image', self.opt_overwrite_cover_image.isChecked())
def __enter__(self, *args):
return self
From c369ff9534d597bda6b7b8910278adaed9b359e9 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 30 Oct 2010 21:59:03 +0200
Subject: [PATCH 023/163] Modify for html correct display
---
src/calibre/gui2/dialogs/metadata_single.ui | 1626 +++++++++----------
1 file changed, 813 insertions(+), 813 deletions(-)
diff --git a/src/calibre/gui2/dialogs/metadata_single.ui b/src/calibre/gui2/dialogs/metadata_single.ui
index 18bcf2dc4c..29f5d48a11 100644
--- a/src/calibre/gui2/dialogs/metadata_single.ui
+++ b/src/calibre/gui2/dialogs/metadata_single.ui
@@ -1,813 +1,813 @@
-
-
- MetadataSingleDialog
-
-
-
- 0
- 0
- 887
- 750
-
-
-
-
- 0
- 0
-
-
-
- Edit Meta Information
-
-
-
- :/images/edit_input.png:/images/edit_input.png
-
-
- true
-
-
- true
-
-
-
-
-
- QFrame::NoFrame
-
-
- true
-
-
-
-
- 0
- 0
- 879
- 711
-
-
-
-
- 0
-
-
-
-
-
- 800
- 665
-
-
-
- 0
-
-
-
- &Basic metadata
-
-
-
-
-
- Qt::Horizontal
-
-
-
-
-
-
- Meta information
-
-
-
-
-
- &Title:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- title
-
-
-
-
-
-
- Change the title of this book
-
-
-
-
-
-
- Swap the author and title
-
-
- ...
-
-
-
- :/images/swap.png:/images/swap.png
-
-
-
- 16
- 16
-
-
-
-
-
-
-
- &Author(s):
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- authors
-
-
-
-
-
-
- Author S&ort:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- author_sort
-
-
-
-
-
-
-
-
- Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.
-If the box is colored green, then text matches the individual author's sort strings. If it is colored red, then the authors and this text do not match.
-
-
-
-
-
-
- Automatically create the author sort entry based on the current author entry.
-Using this button to create author sort will change author sort from red to green.
-
-
- ...
-
-
-
- :/images/auto_author_sort.png:/images/auto_author_sort.png
-
-
-
-
-
-
-
-
- &Rating:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- rating
-
-
-
-
-
-
- Rating of this book. 0-5 stars
-
-
- Rating of this book. 0-5 stars
-
-
- QAbstractSpinBox::PlusMinus
-
-
- stars
-
-
- 5
-
-
-
-
-
-
- &Publisher:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- publisher
-
-
-
-
-
-
- Ta&gs:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- tags
-
-
-
-
-
-
-
-
- Tags categorize the book. This is particularly useful while searching. <br><br>They can be any words or phrases, separated by commas.
-
-
-
-
-
-
- Open Tag Editor
-
-
- Open Tag Editor
-
-
-
- :/images/chapters.png:/images/chapters.png
-
-
-
-
-
-
-
-
- &Series:
-
-
- Qt::PlainText
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- series
-
-
-
-
-
-
- 5
-
-
-
-
- List of known series. You can add new series.
-
-
- List of known series. You can add new series.
-
-
- true
-
-
- QComboBox::InsertAlphabetically
-
-
-
-
-
-
- Remove unused series (Series that have no books)
-
-
- ...
-
-
-
- :/images/trash.png:/images/trash.png
-
-
-
-
-
-
-
-
- IS&BN:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- isbn
-
-
-
-
-
-
-
-
-
- Publishe&d:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- pubdate
-
-
-
-
-
-
- true
-
-
-
-
-
-
- false
-
-
- Book
-
-
- 9999.989999999999782
-
-
-
-
-
-
- MMM yyyy
-
-
- true
-
-
-
-
-
-
- true
-
-
-
-
-
-
- dd MMM yyyy
-
-
- true
-
-
-
-
-
-
- &Date:
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
- date
-
-
-
-
-
-
-
-
-
- &Comments
-
-
-
-
-
- true
-
-
- false
-
-
-
-
-
-
-
-
-
- &Fetch metadata from server
-
-
-
-
-
-
-
-
-
-
-
- 0
- 0
-
-
-
- Available Formats
-
-
-
-
-
-
-
-
- 0
- 0
-
-
-
-
- 16777215
- 130
-
-
-
- QAbstractItemView::DropOnly
-
-
-
- 64
- 64
-
-
-
-
-
-
-
- Add a new format for this book to the database
-
-
- ...
-
-
-
- :/images/add_book.png:/images/add_book.png
-
-
-
- 32
- 32
-
-
-
-
-
-
-
- Remove the selected formats for this book from the database.
-
-
- ...
-
-
-
- :/images/trash.png:/images/trash.png
-
-
-
- 32
- 32
-
-
-
-
-
-
-
- Set the cover for the book from the selected format
-
-
- ...
-
-
-
- :/images/book.png:/images/book.png
-
-
-
- 32
- 32
-
-
-
-
-
-
-
- Update metadata from the metadata in the selected format
-
-
-
-
-
-
- :/images/edit_input.png:/images/edit_input.png
-
-
-
- 32
- 32
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 0
- 10
-
-
-
- Book Cover
-
-
-
-
-
-
- 0
- 100
-
-
-
-
-
-
-
- 6
-
-
- QLayout::SetMaximumSize
-
-
- 0
-
-
-
-
- Change &cover image:
-
-
- cover_path
-
-
-
-
-
-
- 6
-
-
- 0
-
-
-
-
- true
-
-
-
-
-
-
- &Browse
-
-
-
- :/images/document_open.png:/images/document_open.png
-
-
-
-
-
-
- Remove border (if any) from cover
-
-
- T&rim
-
-
-
- :/images/trim.png:/images/trim.png
-
-
- Qt::ToolButtonTextBesideIcon
-
-
-
-
-
-
- Reset cover to default
-
-
- ...
-
-
-
- :/images/trash.png:/images/trash.png
-
-
-
-
-
-
-
-
-
-
-
-
- Download co&ver
-
-
-
-
-
-
- Generate a default cover based on the title and author
-
-
- &Generate cover
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- &Custom metadata
-
-
-
-
-
-
-
-
-
-
-
-
- Qt::Horizontal
-
-
- QDialogButtonBox::Cancel|QDialogButtonBox::Ok
-
-
-
-
-
-
-
- EnLineEdit
- QLineEdit
- widgets.h
-
-
- EnComboBox
- QComboBox
- widgets.h
-
-
- TagsLineEdit
- QLineEdit
- widgets.h
-
-
- FormatList
- QListWidget
- calibre/gui2/widgets.h
-
-
- ImageView
- QWidget
- calibre/gui2/widgets.h
- 1
-
-
-
- title
- swap_button
- authors
- author_sort
- auto_author_sort
- rating
- publisher
- tags
- tag_editor_button
- series
- remove_series_button
- series_index
- isbn
- date
- pubdate
- comments
- fetch_metadata_button
- add_format_button
- remove_format_button
- button_set_cover
- button_set_metadata
- formats
- cover_path
- reset_cover
- fetch_cover_button
- generate_cover_button
- scrollArea
- central_widget
- button_box
-
-
-
-
-
-
- button_box
- accepted()
- MetadataSingleDialog
- accept()
-
-
- 261
- 710
-
-
- 157
- 274
-
-
-
-
- button_box
- rejected()
- MetadataSingleDialog
- reject()
-
-
- 329
- 710
-
-
- 286
- 274
-
-
-
-
-
+
+
+ MetadataSingleDialog
+
+
+
+ 0
+ 0
+ 887
+ 750
+
+
+
+
+ 0
+ 0
+
+
+
+ Edit Meta Information
+
+
+
+ :/images/edit_input.png:/images/edit_input.png
+
+
+ true
+
+
+ true
+
+
+
+
+
+ QFrame::NoFrame
+
+
+ true
+
+
+
+
+ 0
+ 0
+ 879
+ 711
+
+
+
+
+ 0
+
+
+
+
+
+ 800
+ 665
+
+
+
+ 0
+
+
+
+ &Basic metadata
+
+
+
+
+
+ Qt::Horizontal
+
+
+
+
+
+
+ Meta information
+
+
+
+
+
+ &Title:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ title
+
+
+
+
+
+
+ Change the title of this book
+
+
+
+
+
+
+ Swap the author and title
+
+
+ ...
+
+
+
+ :/images/swap.png:/images/swap.png
+
+
+
+ 16
+ 16
+
+
+
+
+
+
+
+ &Author(s):
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ authors
+
+
+
+
+
+
+ Author S&ort:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ author_sort
+
+
+
+
+
+
+
+
+ Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.
+If the box is colored green, then text matches the individual author's sort strings. If it is colored red, then the authors and this text do not match.
+
+
+
+
+
+
+ Automatically create the author sort entry based on the current author entry.
+Using this button to create author sort will change author sort from red to green.
+
+
+ ...
+
+
+
+ :/images/auto_author_sort.png:/images/auto_author_sort.png
+
+
+
+
+
+
+
+
+ &Rating:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ rating
+
+
+
+
+
+
+ Rating of this book. 0-5 stars
+
+
+ Rating of this book. 0-5 stars
+
+
+ QAbstractSpinBox::PlusMinus
+
+
+ stars
+
+
+ 5
+
+
+
+
+
+
+ &Publisher:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ publisher
+
+
+
+
+
+
+ Ta&gs:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ tags
+
+
+
+
+
+
+
+
+ Tags categorize the book. This is particularly useful while searching. <br><br>They can be any words or phrases, separated by commas.
+
+
+
+
+
+
+ Open Tag Editor
+
+
+ Open Tag Editor
+
+
+
+ :/images/chapters.png:/images/chapters.png
+
+
+
+
+
+
+
+
+ &Series:
+
+
+ Qt::PlainText
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ series
+
+
+
+
+
+
+ 5
+
+
+
+
+ List of known series. You can add new series.
+
+
+ List of known series. You can add new series.
+
+
+ true
+
+
+ QComboBox::InsertAlphabetically
+
+
+
+
+
+
+ Remove unused series (Series that have no books)
+
+
+ ...
+
+
+
+ :/images/trash.png:/images/trash.png
+
+
+
+
+
+
+
+
+ IS&BN:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ isbn
+
+
+
+
+
+
+
+
+
+ Publishe&d:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ pubdate
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+ false
+
+
+ Book
+
+
+ 9999.989999999999782
+
+
+
+
+
+
+ MMM yyyy
+
+
+ true
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+ dd MMM yyyy
+
+
+ true
+
+
+
+
+
+
+ &Date:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+ date
+
+
+
+
+
+
+
+
+
+ &Comments
+
+
+
+
+
+ true
+
+
+ true
+
+
+
+
+
+
+
+
+
+ &Fetch metadata from server
+
+
+
+
+
+
+
+
+
+
+
+ 0
+ 0
+
+
+
+ Available Formats
+
+
+
+
+
+
+
+
+ 0
+ 0
+
+
+
+
+ 16777215
+ 130
+
+
+
+ QAbstractItemView::DropOnly
+
+
+
+ 64
+ 64
+
+
+
+
+
+
+
+ Add a new format for this book to the database
+
+
+ ...
+
+
+
+ :/images/add_book.png:/images/add_book.png
+
+
+
+ 32
+ 32
+
+
+
+
+
+
+
+ Remove the selected formats for this book from the database.
+
+
+ ...
+
+
+
+ :/images/trash.png:/images/trash.png
+
+
+
+ 32
+ 32
+
+
+
+
+
+
+
+ Set the cover for the book from the selected format
+
+
+ ...
+
+
+
+ :/images/book.png:/images/book.png
+
+
+
+ 32
+ 32
+
+
+
+
+
+
+
+ Update metadata from the metadata in the selected format
+
+
+
+
+
+
+ :/images/edit_input.png:/images/edit_input.png
+
+
+
+ 32
+ 32
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0
+ 10
+
+
+
+ Book Cover
+
+
+
+
+
+
+ 0
+ 100
+
+
+
+
+
+
+
+ 6
+
+
+ QLayout::SetMaximumSize
+
+
+ 0
+
+
+
+
+ Change &cover image:
+
+
+ cover_path
+
+
+
+
+
+
+ 6
+
+
+ 0
+
+
+
+
+ true
+
+
+
+
+
+
+ &Browse
+
+
+
+ :/images/document_open.png:/images/document_open.png
+
+
+
+
+
+
+ Remove border (if any) from cover
+
+
+ T&rim
+
+
+
+ :/images/trim.png:/images/trim.png
+
+
+ Qt::ToolButtonTextBesideIcon
+
+
+
+
+
+
+ Reset cover to default
+
+
+ ...
+
+
+
+ :/images/trash.png:/images/trash.png
+
+
+
+
+
+
+
+
+
+
+
+
+ Download co&ver
+
+
+
+
+
+
+ Generate a default cover based on the title and author
+
+
+ &Generate cover
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ &Custom metadata
+
+
+
+
+
+
+
+
+
+
+
+
+ Qt::Horizontal
+
+
+ QDialogButtonBox::Cancel|QDialogButtonBox::Ok
+
+
+
+
+
+
+
+ EnLineEdit
+ QLineEdit
+ widgets.h
+
+
+ EnComboBox
+ QComboBox
+ widgets.h
+
+
+ TagsLineEdit
+ QLineEdit
+ widgets.h
+
+
+ FormatList
+ QListWidget
+ calibre/gui2/widgets.h
+
+
+ ImageView
+ QWidget
+ calibre/gui2/widgets.h
+ 1
+
+
+
+ title
+ swap_button
+ authors
+ author_sort
+ auto_author_sort
+ rating
+ publisher
+ tags
+ tag_editor_button
+ series
+ remove_series_button
+ series_index
+ isbn
+ date
+ pubdate
+ comments
+ fetch_metadata_button
+ add_format_button
+ remove_format_button
+ button_set_cover
+ button_set_metadata
+ formats
+ cover_path
+ reset_cover
+ fetch_cover_button
+ generate_cover_button
+ scrollArea
+ central_widget
+ button_box
+
+
+
+
+
+
+ button_box
+ accepted()
+ MetadataSingleDialog
+ accept()
+
+
+ 261
+ 710
+
+
+ 157
+ 274
+
+
+
+
+ button_box
+ rejected()
+ MetadataSingleDialog
+ reject()
+
+
+ 329
+ 710
+
+
+ 286
+ 274
+
+
+
+
+
From dd522b051e85ccef7e153a873510ae681988e89c Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 31 Oct 2010 23:37:19 +0100
Subject: [PATCH 024/163] Add a choice to get text instead of html in metadata
plugins
---
src/calibre/ebooks/metadata/fetch.py | 11 +-
src/calibre/utils/html2text.py | 451 +++++++++++++++++++++++++++
2 files changed, 461 insertions(+), 1 deletion(-)
create mode 100644 src/calibre/utils/html2text.py
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 9b8a42e482..87989a4d42 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -10,6 +10,7 @@ from calibre import prints
from calibre.utils.config import OptionParser
from calibre.utils.logging import default_log
from calibre.utils.titlecase import titlecase
+from calibre.utils.html2text import html2text
from calibre.customize import Plugin
from calibre.ebooks.metadata.covers import check_for_cover
@@ -79,6 +80,8 @@ class MetadataSource(Plugin): # {{{
mi.comments = None
if not c.get('tags', True):
mi.tags = []
+ if c.get('textconvert', True) and mi.comments is not None:
+ mi.comments = html2text(mi.comments)
except Exception, e:
self.exception = e
@@ -132,11 +135,17 @@ class MetadataSource(Plugin): # {{{
setattr(w, '_'+x, cb)
cb.setChecked(c.get(x, True))
w._layout.addWidget(cb)
+ #textconvert for comments
+ cb = QCheckBox(_('Convert comments from %s to text')%(self.name))
+ setattr(w, '_textconvert', cb)
+ cb.setChecked(c.get('textconvert', False))
+ w._layout.addWidget(cb)
+
return w
def save_settings(self, w):
dl_settings = {}
- for x in ('rating', 'tags', 'comments'):
+ for x in ('rating', 'tags', 'comments', 'textconvert'):
dl_settings[x] = getattr(w, '_'+x).isChecked()
c = self.config_store()
c.set(self.name, dl_settings)
diff --git a/src/calibre/utils/html2text.py b/src/calibre/utils/html2text.py
new file mode 100644
index 0000000000..b271def4bb
--- /dev/null
+++ b/src/calibre/utils/html2text.py
@@ -0,0 +1,451 @@
+#!/usr/bin/env python
+"""html2text: Turn HTML into equivalent Markdown-structured text."""
+__version__ = "2.39"
+__author__ = "Aaron Swartz (me@aaronsw.com)"
+__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3."
+__contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"]
+
+# TODO:
+# Support decoded entities with unifiable.
+
+if not hasattr(__builtins__, 'True'): True, False = 1, 0
+import re, sys, urllib, htmlentitydefs, codecs, StringIO, types
+import sgmllib
+import urlparse
+sgmllib.charref = re.compile('([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]')
+
+try: from textwrap import wrap
+except: pass
+
+# Use Unicode characters instead of their ascii pseudo-replacements
+UNICODE_SNOB = 0
+
+# Put the links after each paragraph instead of at the end.
+LINKS_EACH_PARAGRAPH = 0
+
+# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
+BODY_WIDTH = 0
+
+# Don't show internal links (href="#local-anchor") -- corresponding link targets
+# won't be visible in the plain text file anyway.
+SKIP_INTERNAL_LINKS = True
+
+### Entity Nonsense ###
+
+def name2cp(k):
+ if k == 'apos': return ord("'")
+ if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
+ return htmlentitydefs.name2codepoint[k]
+ else:
+ k = htmlentitydefs.entitydefs[k]
+ if k.startswith("") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
+ return ord(codecs.latin_1_decode(k)[0])
+
+unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"',
+'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*',
+'ndash':'-', 'oelig':'oe', 'aelig':'ae',
+'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a',
+'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e',
+'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i',
+'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o',
+'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'}
+
+unifiable_n = {}
+
+for k in unifiable.keys():
+ unifiable_n[name2cp(k)] = unifiable[k]
+
+def charref(name):
+ if name[0] in ['x','X']:
+ c = int(name[1:], 16)
+ else:
+ c = int(name)
+
+ if not UNICODE_SNOB and c in unifiable_n.keys():
+ return unifiable_n[c]
+ else:
+ return unichr(c)
+
+def entityref(c):
+ if not UNICODE_SNOB and c in unifiable.keys():
+ return unifiable[c]
+ else:
+ try: name2cp(c)
+ except KeyError: return "&" + c
+ else: return unichr(name2cp(c))
+
+def replaceEntities(s):
+ s = s.group(1)
+ if s[0] == "#":
+ return charref(s[1:])
+ else: return entityref(s)
+
+r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+def unescape(s):
+ return r_unescape.sub(replaceEntities, s)
+
+def fixattrs(attrs):
+ # Fix bug in sgmllib.py
+ if not attrs: return attrs
+ newattrs = []
+ for attr in attrs:
+ newattrs.append((attr[0], unescape(attr[1])))
+ return newattrs
+
+### End Entity Nonsense ###
+
+def onlywhite(line):
+ """Return true if the line does only consist of whitespace characters."""
+ for c in line:
+ if c is not ' ' and c is not ' ':
+ return c is ' '
+ return line
+
+def optwrap(text):
+ """Wrap all paragraphs in the provided text."""
+ if not BODY_WIDTH:
+ return text
+
+ assert wrap, "Requires Python 2.3."
+ result = ''
+ newlines = 0
+ for para in text.split("\n"):
+ if len(para) > 0:
+ if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*':
+ for line in wrap(para, BODY_WIDTH):
+ result += line + "\n"
+ result += "\n"
+ newlines = 2
+ else:
+ if not onlywhite(para):
+ result += para + "\n"
+ newlines = 1
+ else:
+ if newlines < 2:
+ result += "\n"
+ newlines += 1
+ return result
+
+def hn(tag):
+ if tag[0] == 'h' and len(tag) == 2:
+ try:
+ n = int(tag[1])
+ if n in range(1, 10): return n
+ except ValueError: return 0
+
+class _html2text(sgmllib.SGMLParser):
+ def __init__(self, out=None, baseurl=''):
+ sgmllib.SGMLParser.__init__(self)
+
+ if out is None: self.out = self.outtextf
+ else: self.out = out
+ self.outtext = u''
+ self.quiet = 0
+ self.p_p = 0
+ self.outcount = 0
+ self.start = 1
+ self.space = 0
+ self.a = []
+ self.astack = []
+ self.acount = 0
+ self.list = []
+ self.blockquote = 0
+ self.pre = 0
+ self.startpre = 0
+ self.lastWasNL = 0
+ self.abbr_title = None # current abbreviation definition
+ self.abbr_data = None # last inner HTML (for abbr being defined)
+ self.abbr_list = {} # stack of abbreviations to write later
+ self.baseurl = baseurl
+
+ def outtextf(self, s):
+ self.outtext += s
+
+ def close(self):
+ sgmllib.SGMLParser.close(self)
+
+ self.pbr()
+ self.o('', 0, 'end')
+
+ return self.outtext
+
+ def handle_charref(self, c):
+ self.o(charref(c))
+
+ def handle_entityref(self, c):
+ self.o(entityref(c))
+
+ def unknown_starttag(self, tag, attrs):
+ self.handle_tag(tag, attrs, 1)
+
+ def unknown_endtag(self, tag):
+ self.handle_tag(tag, None, 0)
+
+ def previousIndex(self, attrs):
+ """ returns the index of certain set of attributes (of a link) in the
+ self.a list
+
+ If the set of attributes is not found, returns None
+ """
+ if not attrs.has_key('href'): return None
+
+ i = -1
+ for a in self.a:
+ i += 1
+ match = 0
+
+ if a.has_key('href') and a['href'] == attrs['href']:
+ if a.has_key('title') or attrs.has_key('title'):
+ if (a.has_key('title') and attrs.has_key('title') and
+ a['title'] == attrs['title']):
+ match = True
+ else:
+ match = True
+
+ if match: return i
+
+ def handle_tag(self, tag, attrs, start):
+ attrs = fixattrs(attrs)
+
+ if hn(tag):
+ self.p()
+ if start: self.o(hn(tag)*"#" + ' ')
+
+ if tag in ['p', 'div']: self.p()
+
+ if tag == "br" and start: self.o(" \n")
+
+ if tag == "hr" and start:
+ self.p()
+ self.o("* * *")
+ self.p()
+
+ if tag in ["head", "style", 'script']:
+ if start: self.quiet += 1
+ else: self.quiet -= 1
+
+ if tag in ["body"]:
+ self.quiet = 0 # sites like 9rules.com never close
+
+ if tag == "blockquote":
+ if start:
+ self.p(); self.o('> ', 0, 1); self.start = 1
+ self.blockquote += 1
+ else:
+ self.blockquote -= 1
+ self.p()
+
+ if tag in ['em', 'i', 'u']: self.o("_")
+ if tag in ['strong', 'b']: self.o("**")
+ if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` ``
+ if tag == "abbr":
+ if start:
+ attrsD = {}
+ for (x, y) in attrs: attrsD[x] = y
+ attrs = attrsD
+
+ self.abbr_title = None
+ self.abbr_data = ''
+ if attrs.has_key('title'):
+ self.abbr_title = attrs['title']
+ else:
+ if self.abbr_title != None:
+ self.abbr_list[self.abbr_data] = self.abbr_title
+ self.abbr_title = None
+ self.abbr_data = ''
+
+ if tag == "a":
+ if start:
+ attrsD = {}
+ for (x, y) in attrs: attrsD[x] = y
+ attrs = attrsD
+ if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')):
+ self.astack.append(attrs)
+ self.o("[")
+ else:
+ self.astack.append(None)
+ else:
+ if self.astack:
+ a = self.astack.pop()
+ if a:
+ i = self.previousIndex(a)
+ if i is not None:
+ a = self.a[i]
+ else:
+ self.acount += 1
+ a['count'] = self.acount
+ a['outcount'] = self.outcount
+ self.a.append(a)
+ self.o("][" + `a['count']` + "]")
+
+ if tag == "img" and start:
+ attrsD = {}
+ for (x, y) in attrs: attrsD[x] = y
+ attrs = attrsD
+ if attrs.has_key('src'):
+ attrs['href'] = attrs['src']
+ alt = attrs.get('alt', '')
+ i = self.previousIndex(attrs)
+ if i is not None:
+ attrs = self.a[i]
+ else:
+ self.acount += 1
+ attrs['count'] = self.acount
+ attrs['outcount'] = self.outcount
+ self.a.append(attrs)
+ self.o("![")
+ self.o(alt)
+ self.o("]["+`attrs['count']`+"]")
+
+ if tag == 'dl' and start: self.p()
+ if tag == 'dt' and not start: self.pbr()
+ if tag == 'dd' and start: self.o(' ')
+ if tag == 'dd' and not start: self.pbr()
+
+ if tag in ["ol", "ul"]:
+ if start:
+ self.list.append({'name':tag, 'num':0})
+ else:
+ if self.list: self.list.pop()
+
+ self.p()
+
+ if tag == 'li':
+ if start:
+ self.pbr()
+ if self.list: li = self.list[-1]
+ else: li = {'name':'ul', 'num':0}
+ self.o(" "*len(self.list)) #TODO: line up
s > 9 correctly.
+ if li['name'] == "ul": self.o("* ")
+ elif li['name'] == "ol":
+ li['num'] += 1
+ self.o(`li['num']`+". ")
+ self.start = 1
+ else:
+ self.pbr()
+
+ if tag in ["table", "tr"] and start: self.p()
+ if tag == 'td': self.pbr()
+
+ if tag == "pre":
+ if start:
+ self.startpre = 1
+ self.pre = 1
+ else:
+ self.pre = 0
+ self.p()
+
+ def pbr(self):
+ if self.p_p == 0: self.p_p = 1
+
+ def p(self): self.p_p = 2
+
+ def o(self, data, puredata=0, force=0):
+ if self.abbr_data is not None: self.abbr_data += data
+
+ if not self.quiet:
+ if puredata and not self.pre:
+ data = re.sub('\s+', ' ', data)
+ if data and data[0] == ' ':
+ self.space = 1
+ data = data[1:]
+ if not data and not force: return
+
+ if self.startpre:
+ #self.out(" :") #TODO: not output when already one there
+ self.startpre = 0
+
+ bq = (">" * self.blockquote)
+ if not (force and data and data[0] == ">") and self.blockquote: bq += " "
+
+ if self.pre:
+ bq += " "
+ data = data.replace("\n", "\n"+bq)
+
+ if self.start:
+ self.space = 0
+ self.p_p = 0
+ self.start = 0
+
+ if force == 'end':
+ # It's the end.
+ self.p_p = 0
+ self.out("\n")
+ self.space = 0
+
+
+ if self.p_p:
+ self.out(('\n'+bq)*self.p_p)
+ self.space = 0
+
+ if self.space:
+ if not self.lastWasNL: self.out(' ')
+ self.space = 0
+
+ if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"):
+ if force == "end": self.out("\n")
+
+ newa = []
+ for link in self.a:
+ if self.outcount > link['outcount']:
+ self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href']))
+ if link.has_key('title'): self.out(" ("+link['title']+")")
+ self.out("\n")
+ else:
+ newa.append(link)
+
+ if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.
+
+ self.a = newa
+
+ if self.abbr_list and force == "end":
+ for abbr, definition in self.abbr_list.items():
+ self.out(" *[" + abbr + "]: " + definition + "\n")
+
+ self.p_p = 0
+ self.out(data)
+ self.lastWasNL = data and data[-1] == '\n'
+ self.outcount += 1
+
+ def handle_data(self, data):
+ if r'\/script>' in data: self.quiet -= 1
+ self.o(data, 1)
+
+ def unknown_decl(self, data): pass
+
+def wrapwrite(text): sys.stdout.write(text.encode('utf8'))
+
+def html2text_file(html, out=wrapwrite, baseurl=''):
+ h = _html2text(out, baseurl)
+ h.feed(html)
+ h.feed("")
+ return h.close()
+
+def html2text(html, baseurl=''):
+ return optwrap(html2text_file(html, None, baseurl))
+
+if __name__ == "__main__":
+ baseurl = ''
+ if sys.argv[1:]:
+ arg = sys.argv[1]
+ if arg.startswith('http://') or arg.startswith('https://'):
+ baseurl = arg
+ j = urllib.urlopen(baseurl)
+ try:
+ from feedparser import _getCharacterEncoding as enc
+ except ImportError:
+ enc = lambda x, y: ('utf-8', 1)
+ text = j.read()
+ encoding = enc(j.headers, text)[0]
+ if encoding == 'us-ascii': encoding = 'utf-8'
+ data = text.decode(encoding)
+
+ else:
+ encoding = 'utf8'
+ if len(sys.argv) > 2:
+ encoding = sys.argv[2]
+ data = open(arg, 'r').read().decode(encoding)
+ else:
+ data = sys.stdin.read().decode('utf8')
+ wrapwrite(html2text(data, baseurl))
+
From 9aefafc74506ac60fbc0e0ffbe1c53d48edbc0a5 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 1 Nov 2010 01:22:47 +0100
Subject: [PATCH 025/163] Implemented basic html check and none check to avoid
problems with html2text
---
src/calibre/ebooks/metadata/fetch.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 87989a4d42..d45a299e39 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -16,6 +16,8 @@ from calibre.ebooks.metadata.covers import check_for_cover
metadata_config = None
+html_check = re.compile("([\<])([^\>]{1,})*([\>])", re.I)
+
class MetadataSource(Plugin): # {{{
'''
Represents a source to query for metadata. Subclasses must implement
@@ -78,10 +80,11 @@ class MetadataSource(Plugin): # {{{
mi.rating = None
if not c.get('comments', True):
mi.comments = None
+ if c.get('textconvert', True) and mi.comments is not None \
+ and html_check.search(mi.comments) is not None:
+ mi.comments = html2text(mi.comments)
if not c.get('tags', True):
mi.tags = []
- if c.get('textconvert', True) and mi.comments is not None:
- mi.comments = html2text(mi.comments)
except Exception, e:
self.exception = e
From a8578eee2d4008a547f0cf2ac9c880ef02cf0a37 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 2 Nov 2010 00:05:20 +0100
Subject: [PATCH 026/163] minor corrections linked to bug 7345
---
src/calibre/ebooks/metadata/fetch.py | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 36a1af9c07..dedd251640 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -17,8 +17,6 @@ from calibre.utils.html2text import html2text
metadata_config = None
-html_check = re.compile("([\<])([^\>]{1,})*([\>])", re.I)
-
class MetadataSource(Plugin): # {{{
'''
Represents a source to query for metadata. Subclasses must implement
@@ -86,9 +84,6 @@ class MetadataSource(Plugin): # {{{
mi.rating = None
if not c.get('comments', True):
mi.comments = None
- if c.get('textconvert', True) and mi.comments is not None \
- and html_check.search(mi.comments) is not None:
- mi.comments = html2text(mi.comments)
if not c.get('tags', True):
mi.tags = []
if self.has_html_comments and mi.comments and \
@@ -151,18 +146,21 @@ class MetadataSource(Plugin): # {{{
setattr(w, '_'+x, cb)
cb.setChecked(c.get(x, True))
w._layout.addWidget(cb)
-
- cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
- setattr(w, '_textcomments', cb)
- cb.setChecked(c.get('textcomments', False))
- w._layout.addWidget(cb)
+
+ if self.has_html_comments:
+ cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
+ setattr(w, '_textcomments', cb)
+ cb.setChecked(c.get('textcomments', False))
+ w._layout.addWidget(cb)
return w
def save_settings(self, w):
dl_settings = {}
- for x in ('rating', 'tags', 'comments', 'textcomments'):
+ for x in ('rating', 'tags', 'comments'):
dl_settings[x] = getattr(w, '_'+x).isChecked()
+ if self.has_html_comments:
+ dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked()
c = self.config_store()
c.set(self.name, dl_settings)
if hasattr(w, '_sc'):
From a0fc1086364cab8d744530274ac5149ecfdda2f1 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 13 Nov 2010 15:22:18 +0100
Subject: [PATCH 027/163] Adding Fictionwise metadata source
---
src/calibre/customize/builtins.py | 4 +-
src/calibre/ebooks/metadata/fetch.py | 18 ++
src/calibre/ebooks/metadata/fictionwise.py | 351 +++++++++++++++++++++
3 files changed, 371 insertions(+), 2 deletions(-)
create mode 100644 src/calibre/ebooks/metadata/fictionwise.py
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index bd766827a5..04364b6b28 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -481,7 +481,7 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
- LibraryThing
+ LibraryThing, Fictionwise
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
@@ -490,7 +490,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
- LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+ LibraryThing, Fictionwise, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers]
plugins += [
ComicInput,
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index dedd251640..c9d6a74cb2 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -267,6 +267,24 @@ class LibraryThing(MetadataSource): # {{{
# }}}
+class Fictionwise(MetadataSource): # {{{
+
+ author = 'Sengian'
+ name = 'Fictionwise'
+ description = _('Downloads metadata from Fictionwise')
+
+ has_html_comments = True
+
+ def fetch(self):
+ from calibre.ebooks.metadata.fictionwise import search
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose)
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ # }}}
def result_index(source, result):
if not result.isbn:
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
new file mode 100644
index 0000000000..2fa9a1bcee
--- /dev/null
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -0,0 +1,351 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, sengian '
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap, re
+from urllib import urlencode
+
+from lxml import html, etree
+from lxml.html import soupparser
+from lxml.etree import tostring
+
+from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import MetaInformation, check_isbn, \
+ authors_to_sort_string
+from calibre.library.comments import sanitize_comments_html
+from calibre.utils.config import OptionParser
+from calibre.utils.date import parse_date, utcnow
+
+
+def report(verbose):
+ if verbose:
+ import traceback
+ traceback.print_exc()
+
+class Query(object):
+
+ BASE_URL = 'http://www.fictionwise.com/servlet/mw'
+
+ def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
+ assert not(title is None and author is None and keywords is None)
+ assert (max_results < 21)
+
+ self.max_results = max_results
+
+ q = { 'template' : 'searchresults_adv.htm' ,
+ 'searchtitle' : '',
+ 'searchauthor' : '',
+ 'searchpublisher' : '',
+ 'searchkeyword' : '',
+ #possibilities startoflast, fullname, lastfirst
+ 'searchauthortype' : 'startoflast',
+ 'searchcategory' : '',
+ 'searchcategory2' : '',
+ 'searchprice_s' : '0',
+ 'searchprice_e' : 'ANY',
+ 'searchformat' : '',
+ 'searchgeo' : 'US',
+ 'searchfwdatetype' : '',
+ #maybe use dates fields if needed?
+ #'sortorder' : 'DESC',
+ #many options available: b.SortTitle, a.SortName,
+ #b.DateFirstPublished, b.FWPublishDate
+ 'sortby' : 'b.SortTitle'
+ }
+ if title is not None:
+ q['searchtitle'] = title
+ if author is not None:
+ q['searchauthor'] = author
+ if publisher is not None:
+ q['searchpublisher'] = publisher
+ if keywords is not None:
+ q['searchkeyword'] = keywords
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.urldata = urlencode(q)
+
+ def __call__(self, browser, verbose):
+ if verbose:
+ print 'Query:', self.BASE_URL+self.urldata
+
+ try:
+ raw = browser.open_novisit(self.BASE_URL, self.urldata).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ raise
+ if '404 - ' in raw:
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ return
+
+ # get list of results as links
+ results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
+ results = results[:self.max_results]
+ results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
+ #return feed if no links ie normally a single book or nothing
+ if not results:
+ results = [feed]
+ return results
+
+class ResultList(list):
+
+ BASE_URL = 'http://www.fictionwise.com'
+ COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
+
+ def __init__(self):
+ self.retitle = re.compile(r'\[[^\[\]]+\]')
+ self.rechkauth = re.compile(r'.*book\s*by', re.I)
+ self.redesc = re.compile(r'book\s*description\s*:\s*( ]+>)*(?P.*)' \
+ + ' ]+>.{,15}publisher\s*:', re.I)
+ self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
+ self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
+ self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
+ self.resplitbr = re.compile(r' ]+>', re.I)
+ self.recomment = re.compile(r'(?s)')
+ self.reimg = re.compile(r']*>', re.I)
+ self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
+ self.renbcom = re.compile('(?P\d+)\s*Reader Ratings:')
+ self.recolor = re.compile('(?P[^/]+).gif')
+ self.resplitbrdiv = re.compile(r'( ]+>|?div[^>]*>)', re.I)
+ self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
+
+ def strip_tags_etree(self, etreeobj, invalid_tags):
+ for itag in invalid_tags:
+ for elt in etreeobj.getiterator(itag):
+ elt.drop_tag()
+ return etreeobj
+
+ def clean_entry(self, entry,
+ invalid_tags = ('font', 'strong', 'b', 'ul', 'span', 'a'),
+ remove_tags_trees = ('script',)):
+ for it in entry[0].iterchildren(tag='table'):
+ entry[0].remove(it)
+ entry[0].remove(entry[0].xpath( 'descendant-or-self::p[1]')[0])
+ entry = entry[0]
+ cleantree = self.strip_tags_etree(entry, invalid_tags)
+ for itag in remove_tags_trees:
+ for elts in cleantree.getiterator(itag):
+ elts.drop_tree()
+ return cleantree
+
+ def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
+ out = tostring(entry, pretty_print=prettyout)
+ reclean = re.compile('(\n+|\t+|\r+|'+htmlrm+';)')
+ return reclean.sub('', out)
+
+ def get_title(self, entry):
+ title = entry.findtext('./')
+ return self.retitle.sub('', title).strip()
+
+ def get_authors(self, entry):
+ authortext = entry.find('./br').tail
+ if not self.rechkauth.search(authortext):
+ return []
+ #TODO: parse all tag if necessary
+ authortext = self.rechkauth.sub('', authortext)
+ return [a.strip() for a in authortext.split('&')]
+
+ def get_rating(self, entrytable, verbose):
+ nbcomment = tostring(entrytable.getprevious())
+ try:
+ nbcomment = self.renbcom.search(nbcomment).group("nbcom")
+ except:
+ report(verbose)
+ return None
+ hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
+ float(image.get('height', default=0))) \
+ for image in entrytable.getiterator('img'))
+ #ratings as x/20, not sure
+ return 5*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues())
+
+ def get_description(self, entry):
+ description = self.output_entry(entry.find('./p'),htmlrm="")
+ description = self.redesc.search(description)
+ if not description and not description.group("desc"):
+ return None
+ #remove invalid tags
+ description = self.reimg.sub('', description.group("desc"))
+ description = self.recomment.sub('', description)
+ description = self.resanitize.sub('', sanitize_comments_html(description))
+ return 'SUMMARY:\n' + re.sub(r'\n\s+','\n', description)
+
+ def get_publisher(self, entry):
+ publisher = self.output_entry(entry.find('./p'))
+ publisher = filter(lambda x: self.repub.search(x) is not None,
+ self.resplitbr.split(publisher))
+ if not len(publisher):
+ return None
+ publisher = self.repub.sub('', publisher[0])
+ return publisher.split(',')[0].strip()
+
+ def get_tags(self, entry):
+ tag = self.output_entry(entry.find('./p'))
+ tag = filter(lambda x: self.retag.search(x) is not None,
+ self.resplitbr.split(tag))
+ if not len(tag):
+ return []
+ return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
+
+ def get_date(self, entry, verbose):
+ date = self.output_entry(entry.find('./p'))
+ date = filter(lambda x: self.redate.search(x) is not None,
+ self.resplitbr.split(date))
+ if not len(date):
+ return None
+ #TODO: parse all tag if necessary
+ try:
+ d = self.redate.sub('', date[0])
+ if d:
+ default = utcnow().replace(day=15)
+ d = parse_date(d, assume_utc=True, default=default)
+ else:
+ d = None
+ except:
+ report(verbose)
+ d = None
+ return d
+
+ def get_ISBN(self, entry):
+ isbns = self.output_entry(entry.getchildren()[2])
+ isbns = filter(lambda x: self.reisbn.search(x) is not None,
+ self.resplitbrdiv.split(isbns))
+ if not len(isbns):
+ return None
+ #TODO: parse all tag if necessary
+ isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
+ return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
+
+ def fill_MI(self, entry, title, authors, ratings, verbose):
+ mi = MetaInformation(title, authors)
+ mi.rating = ratings
+ mi.comments = self.get_description(entry)
+ mi.publisher = self.get_publisher(entry)
+ mi.tags = self.get_tags(entry)
+ mi.pubdate = self.get_date(entry, verbose)
+ mi.isbn = self.get_ISBN(entry)
+ mi.author_sort = authors_to_sort_string(authors)
+ # mi.language = self.get_language(x, verbose)
+ return mi
+
+ def get_individual_metadata(self, browser, linkdata):
+ try:
+ raw = browser.open_novisit(self.BASE_URL + linkdata).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ raise
+ if '404 - ' in raw:
+ report(verbose)
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ return
+
+ # get results
+ return feed.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
+
+ def populate(self, entries, browser, verbose=False):
+ for x in entries:
+ try:
+ entry = self.get_individual_metadata(browser, x)
+ entry = self.clean_entry(entry)
+ title = self.get_title(entry)
+ #ratings: get table for rating then drop
+ for elt in entry.getiterator('table'):
+ ratings = self.get_rating(elt, verbose)
+ elt.getprevious().drop_tree()
+ elt.drop_tree()
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ continue
+ self.append(self.fill_MI(entry, title, authors, ratings, verbose))
+
+ def populate_single(self, feed, verbose=False):
+ try:
+ entry = feed.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
+ entry = self.clean_entry(entry)
+ title = self.get_title(entry)
+ #ratings: get table for rating then drop
+ for elt in entry.getiterator('table'):
+ ratings = self.get_rating(elt, verbose)
+ elt.getprevious().drop_tree()
+ elt.drop_tree()
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ return
+ self.append(self.fill_MI(entry, title, authors, ratings, verbose))
+
+
+def search(title=None, author=None, publisher=None, isbn=None,
+ min_viewability='none', verbose=False, max_results=5,
+ keywords=None):
+ br = browser()
+ entries = Query(title=title, author=author, publisher=publisher,
+ keywords=keywords, max_results=max_results)(br, verbose)
+
+ #List of entry
+ ans = ResultList()
+ if len(entries) > 1:
+ ans.populate(entries, br, verbose)
+ else:
+ ans.populate_single(entries[0], verbose)
+ return ans
+
+
+def option_parser():
+ parser = OptionParser(textwrap.dedent(\
+ '''\
+ %prog [options]
+
+ Fetch book metadata from Fictionwise. You must specify one of title, author,
+ or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
+ so you should make your query as specific as possible.
+ '''
+ ))
+ parser.add_option('-t', '--title', help='Book title')
+ parser.add_option('-a', '--author', help='Book author(s)')
+ parser.add_option('-p', '--publisher', help='Book publisher')
+ parser.add_option('-k', '--keywords', help='Keywords')
+ parser.add_option('-m', '--max-results', default=5,
+ help='Maximum number of results to fetch')
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help='Be more verbose about errors')
+ return parser
+
+def main(args=sys.argv):
+ parser = option_parser()
+ opts, args = parser.parse_args(args)
+ try:
+ results = search(opts.title, opts.author, publisher=opts.publisher,
+ keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ print
+
+if __name__ == '__main__':
+ sys.exit(main())
From 041fbd293227dbc52dc9d823e37512d4ed441c0e Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 13 Nov 2010 22:35:32 +0100
Subject: [PATCH 028/163] Correct rating scale for fictionwise.py
---
src/calibre/ebooks/metadata/fictionwise.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 2fa9a1bcee..ca438805ea 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -165,8 +165,8 @@ class ResultList(list):
hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
float(image.get('height', default=0))) \
for image in entrytable.getiterator('img'))
- #ratings as x/20, not sure
- return 5*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues())
+ #ratings as x/5
+ return 1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues())
def get_description(self, entry):
description = self.output_entry(entry.find('./p'),htmlrm="")
From c92271dc2d8b71a01e6484d611ec0b28d1d9a6ae Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 18 Nov 2010 21:22:21 +0100
Subject: [PATCH 029/163] minor revisions finctionwise plugin
---
src/calibre/ebooks/metadata/fictionwise.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index ca438805ea..de60cd9dca 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -29,10 +29,10 @@ class Query(object):
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
- assert not(title is None and author is None and keywords is None)
+ assert not(title is None and author is None and publisher is None and keywords is None)
assert (max_results < 21)
- self.max_results = max_results
+ self.max_results = int(max_results)
q = { 'template' : 'searchresults_adv.htm' ,
'searchtitle' : '',
@@ -327,7 +327,7 @@ def option_parser():
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-k', '--keywords', help='Keywords')
- parser.add_option('-m', '--max-results', default=5,
+ parser.add_option('-m', '--max-results', default=20,
help='Maximum number of results to fetch')
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
From 78e4aba18ce8cd86f2e91834a866029c0f3ab476 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 18 Nov 2010 21:37:51 +0100
Subject: [PATCH 030/163] Revert
---
resources/catalog/stylesheet.css | 198 +++++++++++++++++--------------
src/calibre/ebooks/rtf/input.py | 74 ++++++++++--
2 files changed, 177 insertions(+), 95 deletions(-)
diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
index afda6ffc05..057c6c9f42 100644
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@@ -1,87 +1,98 @@
-body {
- background-color: white;
-}
+body { background-color: white; }
-p.title {
- margin-top: 0em;
- margin-bottom: 1em;
- text-align: center;
- font-style: italic;
- font-size: xx-large;
- border-bottom: solid black 4px;
-}
+p.title {
+ margin-top:0em;
+ margin-bottom:1em;
+ text-align:center;
+ font-style:italic;
+ font-size:xx-large;
+ border-bottom: solid black 2px;
+ }
p.author {
- margin-top: 0em;
- margin-bottom: 0em;
- text-align: left;
- text-indent: 1em;
- font-size: large;
-}
-
-p.tags {
- margin-top: 0em;
- margin-bottom: 0em;
- text-align: left;
- text-indent: 1em;
- font-size: small;
-}
-
-p.description {
- text-align: left;
- font-style: normal;
- margin-top: 0em;
-}
-
-p.date_index {
- font-size: x-large;
+ margin-top:0em;
+ margin-bottom:0em;
text-align: center;
- font-weight: bold;
- margin-top: 1em;
- margin-bottom: 0px;
-}
-
-p.letter_index {
- font-size: x-large;
- text-align: center;
- font-weight: bold;
- margin-top: 1em;
- margin-bottom: 0px;
-}
+ text-indent: 0em;
+ font-size:large;
+ }
p.author_index {
- font-size: large;
- text-align: left;
- margin-top: 0px;
- margin-bottom: 0px;
+ font-size:large;
+ font-weight:bold;
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:-2px;
text-indent: 0em;
-}
+ }
+
+p.tags {
+ margin-top:0.5em;
+ margin-bottom:0em;
+ text-align: left;
+ text-indent: 0.0in;
+ }
+
+p.formats {
+ font-size:90%;
+ margin-top:0em;
+ margin-bottom:0.5em;
+ text-align: left;
+ text-indent: 0.0in;
+ }
+
+div.description > p:first-child {
+ margin: 0 0 0 0;
+ text-indent: 0em;
+ }
+
+div.description {
+ margin: 0 0 0 0;
+ text-indent: 1em;
+ }
+
+p.date_index {
+ font-size:x-large;
+ text-align:center;
+ font-weight:bold;
+ margin-top:1em;
+ margin-bottom:0px;
+ }
+
+p.letter_index {
+ font-size:x-large;
+ text-align:center;
+ font-weight:bold;
+ margin-top:1em;
+ margin-bottom:0px;
+ }
p.series {
- text-align: left;
- margin-top: 0px;
- margin-bottom: 0px;
- margin-left: 2em;
- text-indent: -2em;
-}
+ font-style:italic;
+ margin-top:2px;
+ margin-bottom:0px;
+ margin-left:2em;
+ text-align:left;
+ text-indent:-2em;
+ }
p.read_book {
- text-align: left;
- margin-top: 0px;
- margin-bottom: 0px;
- margin-left: 2em;
- text-indent: -2em;
-}
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:0px;
+ margin-left:2em;
+ text-indent:-2em;
+ }
p.unread_book {
- text-align: left;
- margin-top: 0px;
- margin-bottom: 0px;
- margin-left: 2em;
- text-indent: -2em;
-}
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:0px;
+ margin-left:2em;
+ text-indent:-2em;
+ }
-p.missing_book {
+p.wishlist_item {
text-align:left;
margin-top:0px;
margin-bottom:0px;
@@ -90,23 +101,36 @@ p.missing_book {
}
p.date_read {
- text-align: left;
- margin-top: 0px;
- margin-bottom: 0px;
- margin-left: 6em;
- text-indent: -6em;
-}
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:0px;
+ margin-left:6em;
+ text-indent:-6em;
+ }
-hr.series_divider {
- width: 50%;
- margin-left: 1em;
- margin-top: 0em;
- margin-bottom: 0em;
-}
+hr.description_divider {
+ width:90%;
+ margin-left:5%;
+ border-top: solid white 0px;
+ border-right: solid white 0px;
+ border-bottom: solid black 1px;
+ border-left: solid white 0px;
+ }
hr.annotations_divider {
- width: 50%;
- margin-left: 1em;
- margin-top: 0em;
- margin-bottom: 0em;
-}
\ No newline at end of file
+ width:50%;
+ margin-left:1em;
+ margin-top:0em;
+ margin-bottom:0em;
+ }
+
+td.publisher, td.date {
+ font-weight:bold;
+ text-align:center;
+ }
+td.rating {
+ text-align: center;
+ }
+td.thumbnail img {
+ -webkit-box-shadow: 4px 4px 12px #999;
+ }
\ No newline at end of file
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index ec6f9a04d3..32de91c011 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -9,6 +9,36 @@ from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.conversion.utils import PreProcessor
+border_style_map = {
+ 'single' : 'solid',
+ 'double-thickness-border' : 'double',
+ 'shadowed-border': 'outset',
+ 'double-border': 'double',
+ 'dotted-border': 'dotted',
+ 'dashed': 'dashed',
+ 'hairline': 'solid',
+ 'inset': 'inset',
+ 'dash-small': 'dashed',
+ 'dot-dash': 'dotted',
+ 'dot-dot-dash': 'dotted',
+ 'outset': 'outset',
+ 'tripple': 'double',
+ 'thick-thin-small': 'solid',
+ 'thin-thick-small': 'solid',
+ 'thin-thick-thin-small': 'solid',
+ 'thick-thin-medium': 'solid',
+ 'thin-thick-medium': 'solid',
+ 'thin-thick-thin-medium': 'solid',
+ 'thick-thin-large': 'solid',
+ 'thin-thick-thin-large': 'solid',
+ 'wavy': 'ridge',
+ 'double-wavy': 'ridge',
+ 'striped': 'ridge',
+ 'emboss': 'inset',
+ 'engrave': 'inset',
+ 'frame': 'ridge',
+}
+
class InlineClass(etree.XSLTExtension):
FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
@@ -51,7 +81,6 @@ class RTFInput(InputFormatPlugin):
parser = ParseRtf(
in_file = stream,
out_file = ofile,
- deb_dir = 'H:\\Temp\\Calibre\\rtfdebug',
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
@@ -138,8 +167,7 @@ class RTFInput(InputFormatPlugin):
return name
-
- def write_inline_css(self, ic):
+ def write_inline_css(self, ic, border_styles):
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
enumerate(ic.font_sizes)]
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
@@ -163,6 +191,10 @@ class RTFInput(InputFormatPlugin):
''')
css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes)
+
+ for cls, val in border_styles.items():
+ css += '\n\n.%s {\n%s\n}'%(cls, val)
+
with open('styles.css', 'ab') as f:
f.write(css)
@@ -182,6 +214,32 @@ class RTFInput(InputFormatPlugin):
'Failed to preprocess RTF to convert unicode sequences, ignoring...')
return fname
+ def convert_borders(self, doc):
+ border_styles = []
+ style_map = {}
+ for elem in doc.xpath(r'//*[local-name()="cell"]'):
+ style = ['border-style: hidden', 'border-width: 1px',
+ 'border-color: black']
+ for x in ('bottom', 'top', 'left', 'right'):
+ bs = elem.get('border-cell-%s-style'%x, None)
+ if bs:
+ cbs = border_style_map.get(bs, 'solid')
+ style.append('border-%s-style: %s'%(x, cbs))
+ bw = elem.get('border-cell-%s-line-width'%x, None)
+ if bw:
+ style.append('border-%s-width: %spt'%(x, bw))
+ bc = elem.get('border-cell-%s-color'%x, None)
+ if bc:
+ style.append('border-%s-color: %s'%(x, bc))
+ style = ';\n'.join(style)
+ if style not in border_styles:
+ border_styles.append(style)
+ idx = border_styles.index(style)
+ cls = 'border_style%d'%idx
+ style_map[cls] = style
+ elem.set('class', cls)
+ return style_map
+
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.metadata.meta import get_metadata
@@ -191,17 +249,16 @@ class RTFInput(InputFormatPlugin):
self.log = log
self.log('Converting RTF to XML...')
#Name of the preprocesssed RTF file
- #fname = self.preprocess(stream.name)
- fname = stream.name
+ fname = self.preprocess(stream.name)
try:
xml = self.generate_xml(fname)
except RtfInvalidCodeException, e:
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
- dataxml = open('dataxml.xml', 'w')
+ '''dataxml = open('dataxml.xml', 'w')
dataxml.write(xml)
- dataxml.close
+ dataxml.close'''
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
@@ -214,6 +271,7 @@ class RTFInput(InputFormatPlugin):
self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser)
+ border_styles = self.convert_borders(doc)
for pict in doc.xpath('//rtf:pict[@num]',
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
num = int(pict.get('num'))
@@ -235,7 +293,7 @@ class RTFInput(InputFormatPlugin):
preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
res = preprocessor(res)
f.write(res)
- self.write_inline_css(inline_class)
+ self.write_inline_css(inline_class, border_styles)
stream.seek(0)
mi = get_metadata(stream, 'rtf')
if not mi.title:
From 8f6cc227cd46db8f008720ef7f50250152a5788e Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 00:08:23 +0100
Subject: [PATCH 031/163] Minor modification mreplace.py
---
src/calibre/utils/mreplace.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/utils/mreplace.py b/src/calibre/utils/mreplace.py
index dff5fab578..b9fbc0bded 100644
--- a/src/calibre/utils/mreplace.py
+++ b/src/calibre/utils/mreplace.py
@@ -17,7 +17,7 @@ class MReplace(UserDict):
if len(self.data) > 0:
keys = sorted(self.data.keys(), key=len)
keys.reverse()
- tmp = "(%s)" % "|".join([re.escape(item) for item in keys])
+ tmp = "(%s)" % "|".join(map(re.escape, keys))
if self.re != tmp:
self.re = tmp
self.regex = re.compile(self.re)
From 229f511202b408f0627685e4eeab39022604b450 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 00:08:53 +0100
Subject: [PATCH 032/163] Minor modif fictionwise.py
---
src/calibre/ebooks/metadata/fictionwise.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index de60cd9dca..706d38b559 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -237,7 +237,7 @@ class ResultList(list):
# mi.language = self.get_language(x, verbose)
return mi
- def get_individual_metadata(self, browser, linkdata):
+ def get_individual_metadata(self, browser, linkdata, verbose):
try:
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
except Exception, e:
@@ -262,7 +262,7 @@ class ResultList(list):
def populate(self, entries, browser, verbose=False):
for x in entries:
try:
- entry = self.get_individual_metadata(browser, x)
+ entry = self.get_individual_metadata(browser, x, verbose)
entry = self.clean_entry(entry)
title = self.get_title(entry)
#ratings: get table for rating then drop
From eb4e7154dbcb63863ee70bb8dcc14c508631272f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 00:16:24 +0100
Subject: [PATCH 033/163] Plugin nicebooks for metadatas and cover. Should be
disable by default.
---
src/calibre/customize/builtins.py | 6 +-
src/calibre/ebooks/metadata/nicebooks.py | 458 +++++++++++++++++++++++
2 files changed, 462 insertions(+), 2 deletions(-)
create mode 100644 src/calibre/ebooks/metadata/nicebooks.py
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 5723da34a8..ce5275d35e 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -483,15 +483,17 @@ from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing, Fictionwise
from calibre.ebooks.metadata.douban import DoubanBooks
+from calibre.ebooks.metadata.nicebooks import NiceBooks
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
+from calibre.ebooks.metadata.nicebooks import NiceBooksCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
- LibraryThing, Fictionwise, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
- Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers]
+ LibraryThing, Fictionwise, DoubanBooks, NiceBooks,CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+ Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers, NiceBooksCovers]
plugins += [
ComicInput,
EPUBInput,
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
new file mode 100644
index 0000000000..28fb2de562
--- /dev/null
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -0,0 +1,458 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, sengian '
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap, re, traceback, socket
+from urllib import urlencode
+from functools import partial
+from math import ceil
+from copy import deepcopy
+
+from lxml import html
+from lxml.html import soupparser
+
+from calibre.utils.date import parse_date, utcnow
+from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import MetaInformation, check_isbn, \
+ authors_to_sort_string
+from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.ebooks.metadata.covers import CoverDownload
+from calibre.utils.config import OptionParser
+
+class NiceBooks(MetadataSource):
+
+ name = 'Nicebooks'
+ description = _('Downloads metadata from french Nicebooks')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Sengian'
+ version = (1, 0, 0)
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose)
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class NiceBooksCovers(CoverDownload):
+
+ name = 'Nicebooks covers'
+ description = _('Downloads covers from french Nicebooks')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Sengian'
+ type = _('Cover download')
+ version = (1, 0, 0)
+
+ def has_cover(self, mi, ans, timeout=5.):
+ if not mi.isbn:
+ return False
+ br = browser()
+ try:
+ entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
+ if Covers(isbn)(entry).check_cover():
+ self.debug('cover for', mi.isbn, 'found')
+ ans.set()
+ except Exception, e:
+ self.debug(e)
+
+ def get_covers(self, mi, result_queue, abort, timeout=5.):
+ if not mi.isbn:
+ return
+ br = browser()
+ try:
+ entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
+ cover_data, ext = Covers(isbn)(entry).get_cover(br, timeout)
+ if not ext:
+ ext = 'jpg'
+ result_queue.put((True, cover_data, ext, self.name))
+ except Exception, e:
+ result_queue.put((False, self.exception_to_string(e),
+ traceback.format_exc(), self.name))
+
+
+def report(verbose):
+ if verbose:
+ import traceback
+ traceback.print_exc()
+
+def replace_monthsfr(datefr):
+ # Replace french months by english equivalent for parse_date
+ frtoen = {
+ u'[jJ]anvier': u'jan',
+ u'[fF].vrier': u'feb',
+ u'[mM]ars': u'mar',
+ u'[aA]vril': u'apr',
+ u'[mM]ai': u'may',
+ u'[jJ]uin': u'jun',
+ u'[jJ]uillet': u'jul',
+ u'[aA]o.t': u'aug',
+ u'[sS]eptembre': u'sep',
+ u'[Oo]ctobre': u'oct',
+ u'[nN]ovembre': u'nov',
+ u'[dD].cembre': u'dec' }
+ for k in frtoen.iterkeys():
+ tmp = re.sub(k, frtoen[k], datefr)
+ if tmp <> datefr: break
+ return tmp
+
+class Query(object):
+
+ BASE_URL = 'http://fr.nicebooks.com/'
+
+ def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None, max_results=20):
+ assert not(title is None and author is None and publisher is None \
+ and isbn is None and keywords is None)
+ assert (max_results < 21)
+
+ self.max_results = int(max_results)
+
+ q = ''
+ if isbn is not None:
+ q += isbn
+ else:
+
+ if title is not None:
+ q += title
+ if author is not None:
+ q += author
+ if publisher is not None:
+ q += publisher
+ if keywords is not None:
+ q += keywords
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
+
+ def __call__(self, browser, verbose, timeout = 5.):
+ if verbose:
+ print 'Query:', self.BASE_URL+self.urldata
+
+ try:
+ raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ raise
+ if '404 - ' in raw:
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ return
+
+ #nb of page to call
+ try:
+ nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
+ except:
+ #direct hit
+ return [feed]
+
+ nbpagetoquery = ceil(min(nbresults, self.max_results)/10)
+ pages =[feed]
+ if nbpagetoquery > 1:
+ for i in xrange(2, nbpagetoquery + 1):
+ try:
+ urldata = self.urldata + '&p=' + str(i)
+ raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
+ except Exception, e:
+ continue
+ if '404 - ' in raw:
+ continue
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ continue
+ pages.append(feed)
+
+ results = []
+ for x in pages:
+ results.extend([i.find_class('title')[0].get('href') \
+ for i in x.xpath("//ul[@id='results']/li")])
+ return results[:self.max_results]
+
+class ResultList(list):
+
+ BASE_URL = 'http://fr.nicebooks.com'
+
+ def __init__(self):
+ self.repub = re.compile(r'\s*.diteur\s*', re.I)
+ self.reauteur = re.compile(r'\s*auteur.*', re.I)
+ self.reautclean = re.compile(r'\s*\(.*\)\s*')
+
+ def get_title(self, entry):
+ title = deepcopy(entry.find("div[@id='book-info']"))
+ title.remove(title.find("dl[@title='Informations sur le livre']"))
+ title = ' '.join([i.text_content() for i in title.iterchildren()])
+ return title.replace('\n', '')
+
+ def get_authors(self, entry):
+ author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ authortext = []
+ for x in author.getiterator('dt'):
+ if self.reauteur.match(x.text):
+ elt = x.getnext()
+ i = 0
+ while elt.tag <> 'dt' and i < 20:
+ authortext.append(elt.text_content())
+ elt = elt.getnext()
+ i += 1
+ break
+ if len(authortext) == 1:
+ authortext = [self.reautclean.sub('', authortext[0])]
+ return authortext
+
+ def get_description(self, entry, verbose):
+ try:
+ return 'RESUME:\n' + entry.xpath("//p[@id='book-description']")[0].text
+ except:
+ report(verbose)
+ return None
+
+ def get_publisher(self, entry):
+ publisher = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ publitext = None
+ for x in publisher.getiterator('dt'):
+ if self.repub.match(x.text):
+ publitext = x.getnext().text_content()
+ break
+ return publitext
+
+ def get_date(self, entry, verbose):
+ date = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ for x in date.getiterator('dt'):
+ if x.text == 'Date de parution':
+ d = x.getnext().text_content()
+ break
+ if not len(d):
+ return None
+ try:
+ default = utcnow().replace(day=15)
+ d = replace_monthsfr(d)
+ d = parse_date(d, assume_utc=True, default=default)
+ except:
+ report(verbose)
+ d = None
+ return d
+
+ def get_ISBN(self, entry):
+ isbn = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ isbntext = None
+ for x in isbn.getiterator('dt'):
+ if x.text == 'ISBN':
+ isbntext = x.getnext().text_content()
+ if not check_isbn(isbntext):
+ return None
+ break
+ return isbntext
+
+ def get_language(self, entry):
+ language = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ langtext = None
+ for x in language.getiterator('dt'):
+ if x.text == 'Langue':
+ langtext = x.getnext().text_content()
+ break
+ return langtext
+
+ def fill_MI(self, entry, title, authors, verbose):
+ mi = MetaInformation(title, authors)
+ mi.comments = self.get_description(entry, verbose)
+ mi.publisher = self.get_publisher(entry)
+ mi.pubdate = self.get_date(entry, verbose)
+ mi.isbn = self.get_ISBN(entry)
+ mi.author_sort = authors_to_sort_string(authors)
+ mi.language = self.get_language(entry)
+ return mi
+
+ def get_individual_metadata(self, browser, linkdata, verbose):
+ try:
+ raw = browser.open_novisit(self.BASE_URL + linkdata).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ raise
+ if '404 - ' in raw:
+ report(verbose)
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ return
+
+ # get results
+ return feed.xpath("//div[@id='container']")[0]
+
+ def populate(self, entries, browser, verbose=False):
+ for x in entries:
+ try:
+ entry = self.get_individual_metadata(browser, x, verbose)
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ continue
+ self.append(self.fill_MI(entry, title, authors, verbose))
+
+ def populate_single(self, feed, verbose=False):
+ try:
+ entry = feed.xpath("//div[@id='container']")[0]
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ return
+ self.append(self.fill_MI(entry, title, authors, verbose))
+
+class NiceBooksError(Exception):
+ pass
+
+class ISBNNotFound(NiceBooksError):
+ pass
+
+class Covers(object):
+
+ def __init__(self, isbn = None):
+ assert isbn is not None
+ self.urlimg = ''
+ self.isbn = isbn
+ self.isbnf = False
+
+ def __call__(self, entry = None):
+ try:
+ self.urlimg = entry.xpath("//div[@id='book-picture']/a")[0].get('href')
+ except:
+ return self
+ isbno = entry.get_element_by_id('book-info').find("dl[@title='Informations sur le livre']")
+ isbntext = None
+ for x in isbno.getiterator('dt'):
+ if x.text == 'ISBN':
+ isbntext = x.getnext().text_content()
+ break
+ if isbntext is not None:
+ self.isbnf = True
+ return self
+
+ def check_cover(self):
+ if self.urlimg:
+ return True
+ else:
+ return False
+
+ def get_cover(self, browser, timeout = 5.):
+ try:
+ return browser.open_novisit(self.urlimg, timeout=timeout).read(), \
+ self.urlimg.rpartition('.')[-1]
+ except Exception, err:
+ if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
+ err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
+ raise err
+ if not len(self.urlimg):
+ if not self.isbnf:
+ raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
+ raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
+
+
+def search(title=None, author=None, publisher=None, isbn=None,
+ verbose=False, max_results=5, keywords=None):
+ br = browser()
+ entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
+ keywords=keywords, max_results=max_results)(br, verbose)
+
+ if entries is None:
+ return
+
+ #List of entry
+ ans = ResultList()
+ if len(entries) > 1:
+ ans.populate(entries, br, verbose)
+ else:
+ ans.populate_single(entries[0], verbose)
+ return ans
+
+def check_for_cover(isbn):
+ br = browser()
+ entry = Query(isbn=isbn, max_results=1)(br, False)[0]
+ return Covers(isbn)(entry).check_cover()
+
+def cover_from_isbn(isbn, timeout = 5.):
+ br = browser()
+ entry = Query(isbn=isbn, max_results=1)(br, False, timeout)[0]
+ return Covers(isbn)(entry).get_cover(br, timeout)
+
+
+def option_parser():
+ parser = OptionParser(textwrap.dedent(\
+ '''\
+ %prog [options]
+
+ Fetch book metadata from Nicebooks. You must specify one of title, author,
+ ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
+ so you should make your query as specific as possible.
+ It can also get covers if the option is activated.
+ '''
+ ))
+ parser.add_option('-t', '--title', help='Book title')
+ parser.add_option('-a', '--author', help='Book author(s)')
+ parser.add_option('-p', '--publisher', help='Book publisher')
+ parser.add_option('-i', '--isbn', help='Book ISBN')
+ parser.add_option('-k', '--keywords', help='Keywords')
+ parser.add_option('-c', '--covers', default=0,
+ help='Covers: 1-Check/ 2-Download')
+ parser.add_option('-p', '--coverspath', default='',
+ help='Covers files path')
+ parser.add_option('-m', '--max-results', default=20,
+ help='Maximum number of results to fetch')
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help='Be more verbose about errors')
+ return parser
+
+def main(args=sys.argv):
+ parser = option_parser()
+ opts, args = parser.parse_args(args)
+ try:
+ results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
+ keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ covact = int(opts.covers)
+ if covact == 1:
+ textcover = 'No cover found!'
+ if check_for_cover(result.isbn):
+ textcover = 'A cover was found for this book'
+ print textcover
+ elif covact == 2:
+ cover_data, ext = cover_from_isbn(result.isbn)
+ if not ext:
+ ext = 'jpg'
+ cpath = result.isbn
+ if len(opts.coverspath):
+ cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
+ oname = os.path.abspath(cpath+'.'+ext)
+ open(oname, 'wb').write(cover_data)
+ print 'Cover saved to file ', oname
+ print
+
+if __name__ == '__main__':
+ sys.exit(main())
\ No newline at end of file
From fd711e6075e2dec43ab37c76fad9ed299fcdc71d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 01:28:56 +0100
Subject: [PATCH 034/163] Minor fix for nicebooks.py
---
src/calibre/ebooks/metadata/nicebooks.py | 49 +++++++++++-------------
1 file changed, 23 insertions(+), 26 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 28fb2de562..98ecdf3625 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -52,7 +52,7 @@ class NiceBooksCovers(CoverDownload):
br = browser()
try:
entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
- if Covers(isbn)(entry).check_cover():
+ if Covers(mi.isbn)(entry).check_cover():
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception, e:
@@ -64,7 +64,7 @@ class NiceBooksCovers(CoverDownload):
br = browser()
try:
entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
- cover_data, ext = Covers(isbn)(entry).get_cover(br, timeout)
+ cover_data, ext = Covers(mi.isbn)(entry).get_cover(br, timeout)
if not ext:
ext = 'jpg'
result_queue.put((True, cover_data, ext, self.name))
@@ -109,20 +109,12 @@ class Query(object):
self.max_results = int(max_results)
- q = ''
if isbn is not None:
- q += isbn
+ q = isbn
else:
-
- if title is not None:
- q += title
- if author is not None:
- q += author
- if publisher is not None:
- q += publisher
- if keywords is not None:
- q += keywords
-
+ q = ' '.join([i for i in (title, author, publisher, keywords) \
+ if i is not None])
+
if isinstance(q, unicode):
q = q.encode('utf-8')
self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
@@ -185,15 +177,15 @@ class ResultList(list):
BASE_URL = 'http://fr.nicebooks.com'
def __init__(self):
- self.repub = re.compile(r'\s*.diteur\s*', re.I)
- self.reauteur = re.compile(r'\s*auteur.*', re.I)
- self.reautclean = re.compile(r'\s*\(.*\)\s*')
+ self.repub = re.compile(u'\s*.diteur\s*', re.I)
+ self.reauteur = re.compile(u'\s*auteur.*', re.I)
+ self.reautclean = re.compile(u'\s*\(.*\)\s*')
def get_title(self, entry):
title = deepcopy(entry.find("div[@id='book-info']"))
title.remove(title.find("dl[@title='Informations sur le livre']"))
title = ' '.join([i.text_content() for i in title.iterchildren()])
- return title.replace('\n', '')
+ return unicode(title.replace('\n', ''))
def get_authors(self, entry):
author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
@@ -203,7 +195,7 @@ class ResultList(list):
elt = x.getnext()
i = 0
while elt.tag <> 'dt' and i < 20:
- authortext.append(elt.text_content())
+ authortext.append(unicode(elt.text_content()))
elt = elt.getnext()
i += 1
break
@@ -213,7 +205,7 @@ class ResultList(list):
def get_description(self, entry, verbose):
try:
- return 'RESUME:\n' + entry.xpath("//p[@id='book-description']")[0].text
+ return 'RESUME:\n' + unicode(entry.xpath("//p[@id='book-description']")[0].text)
except:
report(verbose)
return None
@@ -225,15 +217,16 @@ class ResultList(list):
if self.repub.match(x.text):
publitext = x.getnext().text_content()
break
- return publitext
+ return unicode(publitext).strip()
def get_date(self, entry, verbose):
date = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ d = ''
for x in date.getiterator('dt'):
if x.text == 'Date de parution':
d = x.getnext().text_content()
break
- if not len(d):
+ if len(d) == 0:
return None
try:
default = utcnow().replace(day=15)
@@ -252,8 +245,9 @@ class ResultList(list):
isbntext = x.getnext().text_content()
if not check_isbn(isbntext):
return None
+ isbntext = isbntext.replace('-', '')
break
- return isbntext
+ return unicode(isbntext)
def get_language(self, entry):
language = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
@@ -262,7 +256,7 @@ class ResultList(list):
if x.text == 'Langue':
langtext = x.getnext().text_content()
break
- return langtext
+ return unicode(langtext).strip()
def fill_MI(self, entry, title, authors, verbose):
mi = MetaInformation(title, authors)
@@ -371,12 +365,12 @@ class Covers(object):
def search(title=None, author=None, publisher=None, isbn=None,
- verbose=False, max_results=5, keywords=None):
+ max_results=5, verbose=False, keywords=None):
br = browser()
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose)
- if entries is None:
+ if entries is None or len(entries) == 0:
return
#List of entry
@@ -434,6 +428,9 @@ def main(args=sys.argv):
report(True)
parser.print_help()
return 1
+ if results is None or len(results) == 0:
+ print 'No result found for this search!'
+ return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
covact = int(opts.covers)
From bc98b043fd4a7e7a09ab765c1d94f5782bda8676 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 01:29:22 +0100
Subject: [PATCH 035/163] Fix for download cover regression
---
src/calibre/gui2/dialogs/metadata_single.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 0b9b33868c..1eae761561 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -716,10 +716,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.title.setText(book.title)
self.authors.setText(authors_to_string(book.authors))
if book.author_sort: self.author_sort.setText(book.author_sort)
- if d.opt_overwrite_cover_image.isChecked() and book.has_cover:
- self.fetch_cover()
if book.publisher: self.publisher.setEditText(book.publisher)
if book.isbn: self.isbn.setText(book.isbn)
+ if d.opt_overwrite_cover_image.isChecked() and book.has_cover:
+ self.fetch_cover()
if book.pubdate:
d = book.pubdate
self.pubdate.setDate(QDate(d.year, d.month, d.day))
From 681c451238bbcf4d0f9e7c8102ef9e83de79e9ce Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 09:14:44 +0100
Subject: [PATCH 036/163] Disable by default my plugins
---
src/calibre/customize/ui.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py
index 844269e453..e963a17df9 100644
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@@ -121,6 +121,8 @@ def enable_plugin(plugin_or_name):
default_disabled_plugins = set([
'Douban Books', 'Douban.com covers',
+ 'NiceBooks', 'NiceBooksCovers',
+ 'Fictionwise'
])
def is_disabled(plugin):
From c5cbaffd20b042150a4c654584bbc526e613f5f6 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 09:19:59 +0100
Subject: [PATCH 037/163] Externalize metadata plugin in fictionwise.py
---
src/calibre/customize/builtins.py | 1 +
src/calibre/ebooks/metadata/fetch.py | 18 ------------------
src/calibre/ebooks/metadata/fictionwise.py | 19 +++++++++++++++++++
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index ce5275d35e..4815375563 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -484,6 +484,7 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing, Fictionwise
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks
+from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
from calibre.ebooks.metadata.nicebooks import NiceBooksCovers
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index c9d6a74cb2..dedd251640 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -267,24 +267,6 @@ class LibraryThing(MetadataSource): # {{{
# }}}
-class Fictionwise(MetadataSource): # {{{
-
- author = 'Sengian'
- name = 'Fictionwise'
- description = _('Downloads metadata from Fictionwise')
-
- has_html_comments = True
-
- def fetch(self):
- from calibre.ebooks.metadata.fictionwise import search
- try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10, verbose=self.verbose)
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
-
- # }}}
def result_index(source, result):
if not result.isbn:
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 706d38b559..828ea31c3a 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -15,9 +15,28 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.library.comments import sanitize_comments_html
+from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
+class Fictionwise(MetadataSource): # {{{
+
+ author = 'Sengian'
+ name = 'Fictionwise'
+ description = _('Downloads metadata from Fictionwise')
+
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose)
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ # }}}
+
def report(verbose):
if verbose:
From 9c30a416120d257e5bd9078408287683d150c191 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 09:50:53 +0100
Subject: [PATCH 038/163] Correct nicebook max result problem
---
src/calibre/ebooks/metadata/nicebooks.py | 45 +++++++++++-------------
1 file changed, 21 insertions(+), 24 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 98ecdf3625..e72d4b26ae 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -5,7 +5,6 @@ __docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
from urllib import urlencode
-from functools import partial
from math import ceil
from copy import deepcopy
@@ -147,7 +146,7 @@ class Query(object):
#direct hit
return [feed]
- nbpagetoquery = ceil(min(nbresults, self.max_results)/10)
+ nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
pages =[feed]
if nbpagetoquery > 1:
for i in xrange(2, nbpagetoquery + 1):
@@ -193,11 +192,9 @@ class ResultList(list):
for x in author.getiterator('dt'):
if self.reauteur.match(x.text):
elt = x.getnext()
- i = 0
- while elt.tag <> 'dt' and i < 20:
+ while elt.tag == 'dd':
authortext.append(unicode(elt.text_content()))
elt = elt.getnext()
- i += 1
break
if len(authortext) == 1:
authortext = [self.reautclean.sub('', authortext[0])]
@@ -291,29 +288,32 @@ class ResultList(list):
return feed.xpath("//div[@id='container']")[0]
def populate(self, entries, browser, verbose=False):
- for x in entries:
+ #single entry
+ if len(entries) ==1:
try:
- entry = self.get_individual_metadata(browser, x, verbose)
+ entry = entries[0].xpath("//div[@id='container']")[0]
title = self.get_title(entry)
authors = self.get_authors(entry)
except Exception, e:
if verbose:
print 'Failed to get all details for an entry'
print e
- continue
+ return
self.append(self.fill_MI(entry, title, authors, verbose))
+ else:
+ #multiple entries
+ for x in entries:
+ try:
+ entry = self.get_individual_metadata(browser, x, verbose)
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ continue
+ self.append(self.fill_MI(entry, title, authors, verbose))
- def populate_single(self, feed, verbose=False):
- try:
- entry = feed.xpath("//div[@id='container']")[0]
- title = self.get_title(entry)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print 'Failed to get all details for an entry'
- print e
- return
- self.append(self.fill_MI(entry, title, authors, verbose))
class NiceBooksError(Exception):
pass
@@ -372,13 +372,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
if entries is None or len(entries) == 0:
return
-
+
#List of entry
ans = ResultList()
- if len(entries) > 1:
- ans.populate(entries, br, verbose)
- else:
- ans.populate_single(entries[0], verbose)
+ ans.populate(entries, br, verbose)
return ans
def check_for_cover(isbn):
From 3a37d7e78fa94dff29c86bde480e085463070f56 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 10:24:56 +0100
Subject: [PATCH 039/163] Optimize metadata retrieval
---
src/calibre/ebooks/metadata/nicebooks.py | 65 +++++++++++++++++-------
1 file changed, 47 insertions(+), 18 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index e72d4b26ae..f7cffa959b 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -181,13 +181,15 @@ class ResultList(list):
self.reautclean = re.compile(u'\s*\(.*\)\s*')
def get_title(self, entry):
- title = deepcopy(entry.find("div[@id='book-info']"))
+ # title = deepcopy(entry.find("div[@id='book-info']"))
+ title = deepcopy(entry)
title.remove(title.find("dl[@title='Informations sur le livre']"))
title = ' '.join([i.text_content() for i in title.iterchildren()])
return unicode(title.replace('\n', ''))
def get_authors(self, entry):
- author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ # author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ author = entry.find("dl[@title='Informations sur le livre']")
authortext = []
for x in author.getiterator('dt'):
if self.reauteur.match(x.text):
@@ -202,22 +204,46 @@ class ResultList(list):
def get_description(self, entry, verbose):
try:
- return 'RESUME:\n' + unicode(entry.xpath("//p[@id='book-description']")[0].text)
+ return u'RESUME:\n' + unicode(entry.getparent().xpath("//p[@id='book-description']")[0].text)
except:
report(verbose)
return None
-
+
+ def get_book_info(self, entry, mi):
+ entry = entry.find("dl[@title='Informations sur le livre']")
+ for x in entry.getiterator('dt'):
+ if x.text == 'ISBN':
+ isbntext = x.getnext().text_content().replace('-', '')
+ if check_isbn(isbntext):
+ mi.isbn = unicode(isbntext)
+ elif self.repub.match(x.text):
+ mi.publisher = unicode(x.getnext().text_content())
+ elif x.text == 'Langue':
+ mi.language = unicode(x.getnext().text_content())
+ elif x.text == 'Date de parution':
+ d = x.getnext().text_content()
+ try:
+ default = utcnow().replace(day=15)
+ d = replace_monthsfr(d)
+ d = parse_date(d, assume_utc=True, default=default)
+ mi.pubdate = d
+ except:
+ report(verbose)
+ return mi
+
def get_publisher(self, entry):
- publisher = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ # publisher = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ publisher = entry
publitext = None
for x in publisher.getiterator('dt'):
if self.repub.match(x.text):
publitext = x.getnext().text_content()
break
- return unicode(publitext).strip()
+ return unicode(publitext)
def get_date(self, entry, verbose):
- date = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ # date = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ date = entry
d = ''
for x in date.getiterator('dt'):
if x.text == 'Date de parution':
@@ -235,35 +261,37 @@ class ResultList(list):
return d
def get_ISBN(self, entry):
- isbn = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ # isbn = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ isbn = entry
isbntext = None
for x in isbn.getiterator('dt'):
if x.text == 'ISBN':
- isbntext = x.getnext().text_content()
+ isbntext = x.getnext().text_content().replace('-', '')
if not check_isbn(isbntext):
return None
- isbntext = isbntext.replace('-', '')
break
return unicode(isbntext)
def get_language(self, entry):
- language = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ # language = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
+ language = entry
langtext = None
for x in language.getiterator('dt'):
if x.text == 'Langue':
langtext = x.getnext().text_content()
break
- return unicode(langtext).strip()
+ return unicode(langtext)
def fill_MI(self, entry, title, authors, verbose):
mi = MetaInformation(title, authors)
- mi.comments = self.get_description(entry, verbose)
- mi.publisher = self.get_publisher(entry)
- mi.pubdate = self.get_date(entry, verbose)
- mi.isbn = self.get_ISBN(entry)
mi.author_sort = authors_to_sort_string(authors)
- mi.language = self.get_language(entry)
- return mi
+ mi.comments = self.get_description(entry, verbose)
+ # entry = entry.find("dl[@title='Informations sur le livre']")
+ # mi.publisher = self.get_publisher(entry)
+ # mi.pubdate = self.get_date(entry, verbose)
+ # mi.isbn = self.get_ISBN(entry)
+ # mi.language = self.get_language(entry)
+ return self.get_book_info(entry, mi)
def get_individual_metadata(self, browser, linkdata, verbose):
try:
@@ -292,6 +320,7 @@ class ResultList(list):
if len(entries) ==1:
try:
entry = entries[0].xpath("//div[@id='container']")[0]
+ entry = entry.find("div[@id='book-info']")
title = self.get_title(entry)
authors = self.get_authors(entry)
except Exception, e:
From 4887bac205622d0c6fe486278286b7eecbc30acc Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 10:29:55 +0100
Subject: [PATCH 040/163] bug
---
src/calibre/ebooks/metadata/nicebooks.py | 52 +-----------------------
1 file changed, 1 insertion(+), 51 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index f7cffa959b..9a06bad998 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -230,57 +230,6 @@ class ResultList(list):
except:
report(verbose)
return mi
-
- def get_publisher(self, entry):
- # publisher = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
- publisher = entry
- publitext = None
- for x in publisher.getiterator('dt'):
- if self.repub.match(x.text):
- publitext = x.getnext().text_content()
- break
- return unicode(publitext)
-
- def get_date(self, entry, verbose):
- # date = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
- date = entry
- d = ''
- for x in date.getiterator('dt'):
- if x.text == 'Date de parution':
- d = x.getnext().text_content()
- break
- if len(d) == 0:
- return None
- try:
- default = utcnow().replace(day=15)
- d = replace_monthsfr(d)
- d = parse_date(d, assume_utc=True, default=default)
- except:
- report(verbose)
- d = None
- return d
-
- def get_ISBN(self, entry):
- # isbn = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
- isbn = entry
- isbntext = None
- for x in isbn.getiterator('dt'):
- if x.text == 'ISBN':
- isbntext = x.getnext().text_content().replace('-', '')
- if not check_isbn(isbntext):
- return None
- break
- return unicode(isbntext)
-
- def get_language(self, entry):
- # language = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
- language = entry
- langtext = None
- for x in language.getiterator('dt'):
- if x.text == 'Langue':
- langtext = x.getnext().text_content()
- break
- return unicode(langtext)
def fill_MI(self, entry, title, authors, verbose):
mi = MetaInformation(title, authors)
@@ -334,6 +283,7 @@ class ResultList(list):
for x in entries:
try:
entry = self.get_individual_metadata(browser, x, verbose)
+ entry = entry.find("div[@id='book-info']")
title = self.get_title(entry)
authors = self.get_authors(entry)
except Exception, e:
From 3490c73ad93fa9bd55fd0d9ed513ded5eb6ea1c9 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 21 Nov 2010 11:10:21 +0100
Subject: [PATCH 041/163] Optimisation of nicebooks covers
---
src/calibre/ebooks/metadata/nicebooks.py | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 9a06bad998..51858e4b77 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -266,7 +266,7 @@ class ResultList(list):
def populate(self, entries, browser, verbose=False):
#single entry
- if len(entries) ==1:
+ if len(entries) == 1 and not isinstance(entries[0], str):
try:
entry = entries[0].xpath("//div[@id='container']")[0]
entry = entry.find("div[@id='book-info']")
@@ -314,25 +314,20 @@ class Covers(object):
except:
return self
isbno = entry.get_element_by_id('book-info').find("dl[@title='Informations sur le livre']")
- isbntext = None
for x in isbno.getiterator('dt'):
- if x.text == 'ISBN':
- isbntext = x.getnext().text_content()
+ if x.text == 'ISBN' and check_isbn(x.getnext().text_content()):
+ self.isbnf = True
break
- if isbntext is not None:
- self.isbnf = True
return self
def check_cover(self):
- if self.urlimg:
- return True
- else:
- return False
+ return True if self.urlimg else False
def get_cover(self, browser, timeout = 5.):
try:
- return browser.open_novisit(self.urlimg, timeout=timeout).read(), \
+ cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \
self.urlimg.rpartition('.')[-1]
+ return cover, ext if ext else 'jpg'
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
@@ -417,8 +412,6 @@ def main(args=sys.argv):
print textcover
elif covact == 2:
cover_data, ext = cover_from_isbn(result.isbn)
- if not ext:
- ext = 'jpg'
cpath = result.isbn
if len(opts.coverspath):
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
From 251cde290283b6b2f29fed61ad638a9c5a504e72 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 7 Dec 2010 21:40:34 +0100
Subject: [PATCH 042/163] Remove unecessary check
---
src/calibre/gui2/dialogs/metadata_single.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 3205b1d23c..eb9ae71397 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -781,7 +781,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
if book.series_index is not None:
self.series_index.setValue(book.series_index)
if book.has_cover:
- if d.opt_auto_download_cover.isChecked() and book.has_cover:
+ if d.opt_auto_download_cover.isChecked():
self.fetch_cover()
else:
self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
From 824f8b5a67fc354e1dd9dad7dc8dd1c183275295 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 7 Dec 2010 21:43:09 +0100
Subject: [PATCH 043/163] Use clean_ascii_chars in txt/processor
---
src/calibre/ebooks/txt/processor.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index dac1e34df7..50d8419110 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -9,6 +9,7 @@ import os, re
from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.utils.cleantext import clean_ascii_chars
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember '
@@ -31,10 +32,8 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
txt = re.sub('(?<=.)\s+$', '', txt)
# Remove excessive line breaks.
txt = re.sub('\n{3,}', '\n\n', txt)
- #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
- chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
- illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
- txt = illegal_chars.sub('', txt)
+ #remove ASCII invalid chars
+ txt = clean_ascii_chars(txt)
#Takes care if there is no point to split
if epub_split_size_kb > 0:
if isinstance(txt, unicode):
From da4cdeb1d1763ef7b4fdf19a5538ba0439b5d97f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 7 Dec 2010 21:50:10 +0100
Subject: [PATCH 044/163] Introduce fictionwise as a disabled plugin
---
src/calibre/customize/builtins.py | 5 +++--
src/calibre/customize/ui.py | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 5f3aab142e..06da355d6a 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -484,6 +484,7 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
+from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
@@ -491,8 +492,8 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
- LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
- Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
+ LibraryThing, DoubanBooks, NiceBooks, Fictionwise, CSV_XML, EPUB_MOBI, BIBTEX,
+ Unmanifested, Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
NiceBooksCovers]
plugins += [
ComicInput,
diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py
index c360122842..2c9daed994 100644
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@@ -120,7 +120,8 @@ def enable_plugin(plugin_or_name):
config['enabled_plugins'] = ep
default_disabled_plugins = set([
- 'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers'
+ 'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
+ 'Fictionwise'
])
def is_disabled(plugin):
From 4d20351e8b583e883cdfa4695c987ed70fc7d6bc Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 8 Dec 2010 06:47:25 +0100
Subject: [PATCH 045/163] Add threading to nicebooks.py
---
src/calibre/ebooks/metadata/nicebooks.py | 142 ++++++++++++++---------
1 file changed, 85 insertions(+), 57 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 8914e2d985..7beececd7e 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -3,7 +3,8 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
-import sys, textwrap, re, traceback, socket
+import sys, textwrap, re, traceback, socket, threading
+from Queue import Queue
from urllib import urlencode
from math import ceil
from copy import deepcopy
@@ -23,7 +24,7 @@ from calibre.utils.config import OptionParser
class NiceBooks(MetadataSource):
name = 'Nicebooks'
- description = _('Downloads metadata from french Nicebooks')
+ description = _('Downloads metadata from French Nicebooks')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
@@ -78,10 +79,50 @@ class NiceBooksError(Exception):
class ISBNNotFound(NiceBooksError):
pass
+class BrowserThread(threading.Thread):
+
+ def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
+ self.url = url
+ self.ex = ex
+ self.name = name
+ self.verbose = verbose
+ self.timeout = timeout
+ self.result = None
+ threading.Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ try:
+ raw = browser().open_novisit(self.url, timeout=self.timeout).read()
+ except Exception, e:
+ report(self.verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ self.result = None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise self.ex(_('%s timed out. Try again later.') % self.name)
+ raise self.ex(_('%s encountered an error.') % self.name)
+ if '404 - ' in raw:
+ report(self.verbose)
+ self.result = None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ self.result = soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ self.result = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ self.result = None
+
def report(verbose):
if verbose:
traceback.print_exc()
+
class Query(object):
BASE_URL = 'http://fr.nicebooks.com/'
@@ -224,68 +265,53 @@ class ResultList(list):
report(verbose)
return mi
- def fill_MI(self, entry, title, authors, verbose):
+ def fill_MI(self, data, verbose):
+ '''create and return an mi if possible, None otherwise'''
+ try:
+ entry = data.xpath("//div[@id='container']/div[@id='book-info']")[0]
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ return None
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
mi.comments = self.get_description(entry, verbose)
return self.get_book_info(entry, mi, verbose)
- def get_individual_metadata(self, browser, linkdata, verbose):
- try:
- raw = browser.open_novisit(self.BASE_URL + linkdata).read()
- except Exception, e:
- report(verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
- raise NiceBooksError(_('Nicebooks encountered an error.'))
- if '404 - ' in raw:
- report(verbose)
- return
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- feed = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- feed = soupparser.fromstring(clean_ascii_chars(raw))
- except:
- return None
+ def producer(self, q, data, verbose=False):
+ for x in data:
+ thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=NiceBooksError,
+ name='Nicebooks')
+ thread.start()
+ q.put(thread, True)
- # get results
- return feed.xpath("//div[@id='container']")[0]
+ def consumer(self, q, total_entries, verbose=False):
+ while len(self) < total_entries:
+ thread = q.get(True)
+ thread.join()
+ mi, order = thread.get_result()
+ if mi is None:
+ self.append(None)
+ self.append(self.fill_MI(mi, verbose))
- def populate(self, entries, browser, verbose=False):
- #single entry
+ def populate(self, entries, verbose=False, brcall=3):
if len(entries) == 1 and not isinstance(entries[0], str):
- try:
- entry = entries[0].xpath("//div[@id='container']")[0]
- entry = entry.find("div[@id='book-info']")
- title = self.get_title(entry)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print 'Failed to get all details for an entry'
- print e
- return
- self.append(self.fill_MI(entry, title, authors, verbose))
+ #single entry
+ mi = self.fill_MI(entries[0], verbose)
+ if mi:
+ self.append(mi)
else:
- #multiple entries
- for x in entries:
- try:
- entry = self.get_individual_metadata(browser, x, verbose)
- entry = entry.find("div[@id='book-info']")
- title = self.get_title(entry)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print 'Failed to get all details for an entry'
- print e
- continue
- self.append(self.fill_MI(entry, title, authors, verbose))
+ #multiple entries
+ q = Queue(brcall)
+ prod_thread = threading.Thread(target=self.producer, args=(q, entries, verbose))
+ cons_thread = threading.Thread(target=self.consumer, args=(q, len(entries), verbose))
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
class Covers(object):
@@ -328,14 +354,14 @@ def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None):
br = browser()
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
- keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
+ keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
if entries is None or len(entries) == 0:
return None
#List of entry
ans = ResultList()
- ans.populate(entries, br, verbose)
+ ans.populate(entries, verbose)
return ans
def check_for_cover(isbn):
@@ -409,3 +435,5 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
+
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\nicebooks.py" -m 5 -a mankel >data.html
\ No newline at end of file
From 1610a739afb09ccb9d211234eafec5e635daf532 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 8 Dec 2010 20:47:47 +0100
Subject: [PATCH 046/163] Threading in fictionwise and some cleanup
---
src/calibre/ebooks/metadata/fictionwise.py | 160 ++++++++++++---------
src/calibre/ebooks/metadata/nicebooks.py | 6 +-
src/calibre/utils/cleantext.py | 32 ++++-
3 files changed, 127 insertions(+), 71 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 1d6aceecdd..a06516c7dc 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -4,6 +4,7 @@ __copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
+from threading import Thread
from Queue import Queue
from urllib import urlencode
@@ -17,7 +18,7 @@ from calibre.library.comments import sanitize_comments_html
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
-from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.cleantext import clean_ascii_chars, unescape
class Fictionwise(MetadataSource): # {{{
@@ -40,7 +41,45 @@ class Fictionwise(MetadataSource): # {{{
class FictionwiseError(Exception):
pass
-
+class BrowserThread(Thread):
+
+ def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
+ self.url = url
+ self.ex = ex
+ self.plugname = name
+ self.verbose = verbose
+ self.timeout = timeout
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ try:
+ raw = browser().open_novisit(self.url, timeout=self.timeout).read()
+ except Exception, e:
+ report(self.verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ self.result = None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise self.ex(_('%s timed out. Try again later.') % self.plugname)
+ raise self.ex(_('%s encountered an error.') % self.plugname)
+ if '404 - ' in raw:
+ report(self.verbose)
+ self.result = None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ self.result = soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ self.result = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ self.result = None
+
def report(verbose):
if verbose:
@@ -180,10 +219,13 @@ class ResultList(list):
for elt in elts:
elt.drop_tree()
- def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
+ def output_entry(self, entry, prettyout = True, rmhtmlchar=True):
out = tostring(entry, pretty_print=prettyout)
- #try to work around tostring to remove this encoding for exemle
- reclean = re.compile('(\n+|\t+|\r+|'+htmlrm+';)')
+ #remove html chars
+ if rmhtmlchar:
+ out = unescape(out, rm=True)
+ # Remove \n\t\r.
+ reclean = re.compile('(\n+|\t+|\r+)')
return reclean.sub('', out)
def get_title(self, entry):
@@ -211,7 +253,7 @@ class ResultList(list):
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
def get_description(self, entry):
- description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
+ description = self.output_entry(entry.xpath('./p')[1],rmhtmlchar=False)
description = self.redesc.search(description)
if not description or not description.group("desc"):
return None
@@ -265,9 +307,24 @@ class ResultList(list):
isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
- def fill_MI(self, entry, title, authors, ratings, verbose):
+ def fill_MI(self, data, verbose):
+ inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
+ 'ul': False, 'span': False}
+ inv_xpath =('./table',)
+ try:
+ entry = data.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
+ self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ return None
mi = MetaInformation(title, authors)
- mi.rating = ratings
+ ratings = entry.xpath("./p/table")
+ if len(ratings) >= 2:
+ mi.rating = self.get_rating(ratings[1], verbose)
mi.comments = self.get_description(entry)
mi.publisher = self.get_publisher(entry)
mi.tags = self.get_tags(entry)
@@ -276,67 +333,36 @@ class ResultList(list):
mi.author_sort = authors_to_sort_string(authors)
return mi
- def get_individual_metadata(self, browser, linkdata, verbose):
- try:
- raw = browser.open_novisit(self.BASE_URL + linkdata).read()
- except Exception, e:
- report(verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
- raise FictionwiseError(_('Fictionwise encountered an error.'))
- if '404 - ' in raw:
- report(verbose)
- return
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- return soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- return None
+ def producer(self, q, data, verbose=False):
+ for x in data:
+ thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError,
+ name='Fictionwise')
+ thread.start()
+ q.put(thread, True)
- def populate(self, entries, browser, verbose=False):
- inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
- 'ul': False, 'span': False}
- inv_xpath =('./table',)
- #single entry
+ def consumer(self, q, total_entries, verbose=False):
+ while len(self) < total_entries:
+ thread = q.get(True)
+ thread.join()
+ mi = thread.get_result()
+ if mi is None:
+ self.append(None)
+ else:
+ self.append(self.fill_MI(mi, verbose))
+
+ def populate(self, entries, verbose=False, brcall=3):
if len(entries) == 1 and not isinstance(entries[0], str):
- try:
- entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
- self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
- title = self.get_title(entry)
- #maybe strenghten the search
- ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print _('Failed to get all details for an entry')
- print e
- return
- self.append(self.fill_MI(entry, title, authors, ratings, verbose))
+ #single entry
+ self.append(self.fill_MI(entries[0], verbose))
else:
#multiple entries
- for x in entries:
- try:
- entry = self.get_individual_metadata(browser, x, verbose)
- entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
- self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
- title = self.get_title(entry)
- #maybe strenghten the search
- ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print _('Failed to get all details for an entry')
- print e
- continue
- self.append(self.fill_MI(entry, title, authors, ratings, verbose))
+ q = Queue(brcall)
+ prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
+ cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
def search(title=None, author=None, publisher=None, isbn=None,
@@ -349,7 +375,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList()
ans.populate(entries, br, verbose)
- return ans
+ return [x for x in ans if x is not None]
def option_parser():
@@ -391,3 +417,5 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
+
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\fictionwise.py" -m 5 -a gore -v>data.html
\ No newline at end of file
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 580e645320..5bd360ed6c 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -302,9 +302,7 @@ class ResultList(list):
def populate(self, entries, verbose=False, brcall=3):
if len(entries) == 1 and not isinstance(entries[0], str):
#single entry
- mi = self.fill_MI(entries[0], verbose)
- if mi:
- self.append(mi)
+ self.append(self.fill_MI(entries[0], verbose))
else:
#multiple entries
q = Queue(brcall)
@@ -364,7 +362,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList()
ans.populate(entries, verbose)
- return [x for x in ans if x]
+ return [x for x in ans if x is not None]
def check_for_cover(isbn):
br = browser()
diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py
index b4afe7576d..a27f74529e 100644
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@@ -3,7 +3,8 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
-import re
+import re, htmlentitydefs
+from functools import partial
_ascii_pat = None
@@ -21,3 +22,32 @@ def clean_ascii_chars(txt, charlist=None):
pat = re.compile(u'|'.join(map(unichr, charlist)))
return pat.sub('', txt)
+##
+# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+
+def unescape(text, rm=False, rchar=u''):
+ def fixup(m, rm=rm, rchar=rchar):
+ text = m.group(0)
+ if text[:2] == "":
+ # character reference
+ try:
+ if text[:3] == "":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+ except KeyError:
+ pass
+ if rm:
+ return rchar #replace by char
+ return text # leave as is
+ return re.sub("?\w+;", fixup, text)
\ No newline at end of file
From f766eb871c54fa249f2c0e6b71067ee9517b5de8 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 8 Dec 2010 22:50:57 +0100
Subject: [PATCH 047/163] Add threading to Amazon (still lagging like hell)
---
src/calibre/ebooks/metadata/amazonfr.py | 151 +++++++++++++++------
src/calibre/ebooks/metadata/fictionwise.py | 21 +--
src/calibre/ebooks/metadata/nicebooks.py | 8 +-
3 files changed, 128 insertions(+), 52 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonfr.py
index 156fff3d75..6d8c2e407c 100644
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@@ -3,11 +3,12 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
import sys, textwrap, re, traceback
+from threading import Thread
+from Queue import Queue
from urllib import urlencode
from math import ceil
-from lxml import html
-from lxml.html import soupparser
+from lxml.html import soupparser, tostring
from calibre.utils.date import parse_date, utcnow, replace_months
from calibre.utils.cleantext import clean_ascii_chars
@@ -116,6 +117,48 @@ def report(verbose):
if verbose:
traceback.print_exc()
+class AmazonError(Exception):
+ pass
+
+class BrowserThread(Thread):
+
+ def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
+ self.url = url
+ self.ex = ex
+ self.plugname = name
+ self.verbose = verbose
+ self.timeout = timeout
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ try:
+ raw = browser().open_novisit(self.url, timeout=self.timeout).read()
+ except Exception, e:
+ report(self.verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ self.result = None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise self.ex(_('%s timed out. Try again later.') % self.plugname)
+ raise self.ex(_('%s encountered an error.') % self.plugname)
+ if '404 - ' in raw:
+ report(self.verbose)
+ self.result = None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ self.result = soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ self.result = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ self.result = None
+
class Query(object):
@@ -189,7 +232,7 @@ class Query(object):
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
- print 'Query:', self.urldata
+ print _('Query: %s') % self.urldata
try:
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
@@ -197,10 +240,12 @@ class Query(object):
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
- return
- raise
+ return None, self.urldata
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise AmazonError(_('Amazon timed out. Try again later.'))
+ raise AmazonError(_('Amazon encountered an error.'))
if '404 - ' in raw:
- return
+ return None, self.urldata
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
@@ -315,7 +360,7 @@ class ResultList(list):
inv_class = ('seeAll', 'emptyClear')
inv_tags ={'img': True, 'a': False}
self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
- description = html.tostring(description, method='html', encoding=unicode).strip()
+ description = tostring(description, method='html', encoding=unicode).strip()
# remove all attributes from tags
description = self.reattr.sub(r'<\1>', description)
# Remove the notice about text referring to out of print editions
@@ -327,7 +372,7 @@ class ResultList(list):
report(verbose)
return None
- def get_tags(self, entry, browser, verbose):
+ def get_tags(self, entry, verbose):
try:
tags = entry.get_element_by_id('tagContentHolder')
testptag = tags.find_class('see-all')
@@ -338,7 +383,7 @@ class ResultList(list):
if alink[0].get('class') == 'tgJsActive':
continue
link = self.baseurl + alink[0].get('href')
- entry = self.get_individual_metadata(browser, link, verbose)
+ entry = self.get_individual_metadata(link, verbose)
tags = entry.get_element_by_id('tagContentHolder')
break
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
@@ -402,26 +447,41 @@ class ResultList(list):
mi.rating = float(ratings[0])/float(ratings[1]) * 5
return mi
- def fill_MI(self, entry, title, authors, browser, verbose):
+ def fill_MI(self, entry, verbose):
+ try:
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ print _('URL who failed: %s') % x
+ report(verbose)
+ return None
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
- mi.comments = self.get_description(entry, verbose)
- mi = self.get_book_info(entry, mi, verbose)
- mi.tags = self.get_tags(entry, browser, verbose)
+ try:
+ mi.comments = self.get_description(entry, verbose)
+ mi = self.get_book_info(entry, mi, verbose)
+ mi.tags = self.get_tags(entry, verbose)
+ except:
+ pass
return mi
- def get_individual_metadata(self, browser, linkdata, verbose):
+ def get_individual_metadata(self, url, verbose):
try:
- raw = browser.open_novisit(linkdata).read()
+ raw = browser().open_novisit(url).read()
except Exception, e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
- return
- raise
+ return None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise AmazonError(_('Amazon timed out. Try again later.'))
+ raise AmazonError(_('Amazon encountered an error.'))
if '404 - ' in raw:
report(verbose)
- return
+ return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
@@ -432,27 +492,34 @@ class ResultList(list):
return soupparser.fromstring(clean_ascii_chars(raw))
except:
report(verbose)
- return
+ return None
- def populate(self, entries, browser, verbose=False):
- for x in entries:
- try:
- entry = self.get_individual_metadata(browser, x, verbose)
- # clean results
- # inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
- # inv_class = ('buyingDetailsGrid', 'productImageGrid')
- # inv_tags ={'script': True, 'style': True, 'form': False}
- # self.clean_entry(entry, invalid_id=inv_ids)
- title = self.get_title(entry)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print 'Failed to get all details for an entry'
- print e
- print 'URL who failed:', x
- report(verbose)
- continue
- self.append(self.fill_MI(entry, title, authors, browser, verbose))
+ def producer(self, q, data, verbose=False):
+ for x in data:
+ thread = BrowserThread(x, verbose=verbose, ex=AmazonError,
+ name='Amazon')
+ thread.start()
+ q.put(thread, True)
+
+ def consumer(self, q, total_entries, verbose=False):
+ while len(self) < total_entries:
+ thread = q.get(True)
+ thread.join()
+ mi = thread.get_result()
+ if mi is None:
+ self.append(None)
+ else:
+ self.append(self.fill_MI(mi, verbose))
+
+ def populate(self, entries, verbose=False, brcall=5):
+ #multiple entries
+ q = Queue(brcall)
+ prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
+ cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
def search(title=None, author=None, publisher=None, isbn=None,
@@ -466,8 +533,8 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList(baseurl, lang)
- ans.populate(entries, br, verbose)
- return ans
+ ans.populate(entries, verbose)
+ return [x for x in ans if x is not None]
def option_parser():
parser = OptionParser(textwrap.dedent(\
@@ -506,7 +573,7 @@ def main(args=sys.argv):
parser.print_help()
return 1
if results is None or len(results) == 0:
- print 'No result found for this search!'
+ print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
@@ -514,3 +581,5 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
+
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html
\ No newline at end of file
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index a06516c7dc..892e286810 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -80,11 +80,11 @@ class BrowserThread(Thread):
except:
self.result = None
-
def report(verbose):
if verbose:
traceback.print_exc()
+
class Query(object):
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
@@ -322,15 +322,18 @@ class ResultList(list):
print e
return None
mi = MetaInformation(title, authors)
- ratings = entry.xpath("./p/table")
- if len(ratings) >= 2:
- mi.rating = self.get_rating(ratings[1], verbose)
- mi.comments = self.get_description(entry)
- mi.publisher = self.get_publisher(entry)
- mi.tags = self.get_tags(entry)
- mi.pubdate = self.get_date(entry, verbose)
- mi.isbn = self.get_ISBN(entry)
mi.author_sort = authors_to_sort_string(authors)
+ try:
+ ratings = entry.xpath("./p/table")
+ if len(ratings) >= 2:
+ mi.rating = self.get_rating(ratings[1], verbose)
+ mi.comments = self.get_description(entry)
+ mi.publisher = self.get_publisher(entry)
+ mi.tags = self.get_tags(entry)
+ mi.pubdate = self.get_date(entry, verbose)
+ mi.isbn = self.get_ISBN(entry)
+ except:
+ pass
return mi
def producer(self, q, data, verbose=False):
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 5bd360ed6c..8911b31c08 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -279,8 +279,12 @@ class ResultList(list):
return None
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
- mi.comments = self.get_description(entry, verbose)
- return self.get_book_info(entry, mi, verbose)
+ try:
+ mi.comments = self.get_description(entry, verbose)
+ mi = self.get_book_info(entry, mi, verbose)
+ except:
+ pass
+ return mi
def producer(self, q, data, verbose=False):
for x in data:
From 8f7bc53128ca3b1d1b9e7fb4e607b4610f527a62 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 9 Dec 2010 00:13:08 +0100
Subject: [PATCH 048/163] Improve speed: first minimization of browser
creation calls
---
src/calibre/ebooks/metadata/amazonfr.py | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonfr.py
index 6d8c2e407c..eaab7001b7 100644
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@@ -129,14 +129,15 @@ class BrowserThread(Thread):
self.verbose = verbose
self.timeout = timeout
self.result = None
+ self.br = browser()
Thread.__init__(self)
def get_result(self):
- return self.result
+ return self.result, self.br
def run(self):
try:
- raw = browser().open_novisit(self.url, timeout=self.timeout).read()
+ raw = self.br.open_novisit(self.url, timeout=self.timeout).read()
except Exception, e:
report(self.verbose)
if callable(getattr(e, 'getcode', None)) and \
@@ -447,7 +448,7 @@ class ResultList(list):
mi.rating = float(ratings[0])/float(ratings[1]) * 5
return mi
- def fill_MI(self, entry, verbose):
+ def fill_MI(self, entry, br, verbose):
try:
title = self.get_title(entry)
authors = self.get_authors(entry)
@@ -463,14 +464,14 @@ class ResultList(list):
try:
mi.comments = self.get_description(entry, verbose)
mi = self.get_book_info(entry, mi, verbose)
- mi.tags = self.get_tags(entry, verbose)
+ mi.tags = self.get_tags(entry, br, verbose)
except:
pass
return mi
- def get_individual_metadata(self, url, verbose):
+ def get_individual_metadata(self, url, br, verbose):
try:
- raw = browser().open_novisit(url).read()
+ raw = br.open_novisit(url).read()
except Exception, e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
@@ -505,11 +506,11 @@ class ResultList(list):
while len(self) < total_entries:
thread = q.get(True)
thread.join()
- mi = thread.get_result()
+ mi, br = thread.get_result()
if mi is None:
self.append(None)
else:
- self.append(self.fill_MI(mi, verbose))
+ self.append(self.fill_MI(mi, br, verbose))
def populate(self, entries, verbose=False, brcall=5):
#multiple entries
@@ -581,5 +582,8 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
+ # import cProfile
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()"))
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp"))
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From a74346498729e91e18f65b15bb536b04581f4a1e Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 11 Dec 2010 13:55:31 +0100
Subject: [PATCH 049/163] Minor modifications to Nicebooks/Fictionwise
---
src/calibre/ebooks/metadata/fictionwise.py | 5 +++--
src/calibre/ebooks/metadata/nicebooks.py | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 892e286810..efb19ca249 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -69,6 +69,7 @@ class BrowserThread(Thread):
if '404 - ' in raw:
report(self.verbose)
self.result = None
+ return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
@@ -137,12 +138,12 @@ class Query(object):
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
- return
+ return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '404 - ' in raw:
- return
+ return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 8911b31c08..cdf915c827 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -108,6 +108,7 @@ class BrowserThread(Thread):
if '404 - ' in raw:
report(self.verbose)
self.result = None
+ return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
From 8aa50c106e0c2f0db9c8ef294fa71e94173b3d2c Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 11 Dec 2010 13:57:06 +0100
Subject: [PATCH 050/163] Amazon threading
---
src/calibre/ebooks/metadata/amazonfr.py | 143 +++++++++++++-----------
1 file changed, 79 insertions(+), 64 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonfr.py
index eaab7001b7..96bac89690 100644
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
import sys, textwrap, re, traceback
-from threading import Thread
+from threading import Thread, Lock
from Queue import Queue
from urllib import urlencode
from math import ceil
@@ -122,9 +122,12 @@ class AmazonError(Exception):
class BrowserThread(Thread):
- def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
+ def __init__(self, url, qbr, qsync, nb, verbose=False, timeout=10., ex=Exception, name='Meta'):
self.url = url
self.ex = ex
+ self.qbr = qbr
+ self.qsync = qsync
+ self.nb = nb
self.plugname = name
self.verbose = verbose
self.timeout = timeout
@@ -133,10 +136,11 @@ class BrowserThread(Thread):
Thread.__init__(self)
def get_result(self):
- return self.result, self.br
+ return self.result
def run(self):
try:
+ browser = self.qbr.get(True)
raw = self.br.open_novisit(self.url, timeout=self.timeout).read()
except Exception, e:
report(self.verbose)
@@ -146,9 +150,13 @@ class BrowserThread(Thread):
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise self.ex(_('%s timed out. Try again later.') % self.plugname)
raise self.ex(_('%s encountered an error.') % self.plugname)
+ finally:
+ self.qbr.put(browser, True)
+
if '404 - ' in raw:
report(self.verbose)
self.result = None
+ return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
@@ -159,6 +167,8 @@ class BrowserThread(Thread):
self.result = soupparser.fromstring(clean_ascii_chars(raw))
except:
self.result = None
+ finally:
+ self.qsync.put(self.nb, True)
class Query(object):
@@ -174,7 +184,7 @@ class Query(object):
assert (max_results < 21)
self.max_results = int(max_results)
- self.renbres = re.compile(u'\s*(\d+)\s*')
+ self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
q = { 'search-alias' : 'stripbooks' ,
'unfiltered' : '1',
@@ -262,6 +272,7 @@ class Query(object):
#nb of page
try:
nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
+ nbresults = [re.sub(r'[.,]', '', x) for x in nbresults]
except:
return None, self.urldata
@@ -294,11 +305,14 @@ class Query(object):
for i in x.xpath("//a/span[@class='srTitle']")])
return results[:self.max_results], self.baseurl
-class ResultList(list):
+class ResultList(object):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
self.lang = lang
+ self.thread = []
+ self.res = []
+ self.nbtag = 0
self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@@ -383,15 +397,12 @@ class ResultList(list):
if alink:
if alink[0].get('class') == 'tgJsActive':
continue
- link = self.baseurl + alink[0].get('href')
- entry = self.get_individual_metadata(link, verbose)
- tags = entry.get_element_by_id('tagContentHolder')
- break
+ return self.baseurl + alink[0].get('href'), True
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
except:
report(verbose)
- tags = []
- return tags
+ tags = [], False
+ return tags, False
def get_book_info(self, entry, mi, verbose):
try:
@@ -429,9 +440,12 @@ class ResultList(list):
if check_isbn(isbn):
mi.isbn = unicode(isbn)
elif len(elt) > 1:
- isbn = elt[1].find('b').tail.replace('-', '').strip()
- if check_isbn(isbn):
- mi.isbn = unicode(isbn)
+ isbnone = elt[1].find('b').tail.replace('-', '').strip()
+ if check_isbn(isbnone):
+ mi.isbn = unicode(isbnone)
+ else:
+ #assume ASIN-> find a check for asin
+ mi.isbn = unicode(isbn)
#Langue
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
if elt:
@@ -448,7 +462,7 @@ class ResultList(list):
mi.rating = float(ratings[0])/float(ratings[1]) * 5
return mi
- def fill_MI(self, entry, br, verbose):
+ def fill_MI(self, entry, verbose):
try:
title = self.get_title(entry)
authors = self.get_authors(entry)
@@ -464,63 +478,65 @@ class ResultList(list):
try:
mi.comments = self.get_description(entry, verbose)
mi = self.get_book_info(entry, mi, verbose)
- mi.tags = self.get_tags(entry, br, verbose)
except:
pass
return mi
- def get_individual_metadata(self, url, br, verbose):
- try:
- raw = br.open_novisit(url).read()
- except Exception, e:
- report(verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise AmazonError(_('Amazon timed out. Try again later.'))
- raise AmazonError(_('Amazon encountered an error.'))
- if '404 - ' in raw:
- report(verbose)
- return None
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- return soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- report(verbose)
- return None
-
- def producer(self, q, data, verbose=False):
- for x in data:
- thread = BrowserThread(x, verbose=verbose, ex=AmazonError,
+ def producer(self, sync, data, br, verbose=False):
+ for i in xrange(len(data)):
+ thread = BrowserThread(data[i], br, sync, i, verbose=verbose, ex=AmazonError,
name='Amazon')
thread.start()
- q.put(thread, True)
+ self.thread.append(thread)
- def consumer(self, q, total_entries, verbose=False):
- while len(self) < total_entries:
- thread = q.get(True)
- thread.join()
- mi, br = thread.get_result()
- if mi is None:
- self.append(None)
- else:
- self.append(self.fill_MI(mi, br, verbose))
+ def consumer(self, sync, syncbis, br, total_entries, verbose=False):
+ i=0
+ while i < total_entries:
+ nb = int(sync.get(True))
+ entry = self.thread[nb].get_result()
+ i+=1
+ if entry is not None:
+ mi = self.fill_MI(entry, verbose)
+ if mi is not None:
+ mi.tags, atag = self.get_tags(entry, verbose)
+ self.res[nb] = mi
+ if atag:
+ threadbis = BrowserThread(mi.tags, br, syncbis, nb, verbose=verbose, ex=AmazonError,
+ name='Amazon')
+ self.thread[nb] = threadbis
+ self.nbtag +=1
+ threadbis.start()
- def populate(self, entries, verbose=False, brcall=5):
+ def populate(self, entries, ibr, verbose=False, brcall=3):
#multiple entries
- q = Queue(brcall)
- prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
- cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
+ br = Queue(brcall)
+ cbr = Queue(brcall-1)
+
+ syncp = Queue(1)
+ syncc = Queue(len(entries))
+
+ for i in xrange(brcall-1):
+ br.put(browser(), True)
+ cbr.put(browser(), True)
+ br.put(ibr, True)
+
+ self.res = [None]*len(entries)
+
+ prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
+ cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
+
+ #finish processing
+ for i in xrange(self.nbtag):
+ nb = int(syncc.get(True))
+ tags = self.thread[nb].get_result()
+ if tags is not None:
+ self.res[nb].tags = self.get_tags(tags, verbose)[0]
+
+ return self.res
def search(title=None, author=None, publisher=None, isbn=None,
@@ -534,8 +550,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList(baseurl, lang)
- ans.populate(entries, verbose)
- return [x for x in ans if x is not None]
+ return [x for x in ans.populate(entries, br, verbose) if x is not None]
def option_parser():
parser = OptionParser(textwrap.dedent(\
@@ -581,9 +596,9 @@ def main(args=sys.argv):
print
if __name__ == '__main__':
- sys.exit(main())
- # import cProfile
+ # sys.exit(main())
+ import cProfile
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()"))
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp"))
+ sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp_threading_1"))
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From 34c6caeeecfa2ea5d6f934c2e79e057155351854 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 11 Dec 2010 18:22:33 +0100
Subject: [PATCH 051/163] Remove threading (no gain)
---
src/calibre/ebooks/metadata/amazonfr.py | 150 ++++++------------------
1 file changed, 38 insertions(+), 112 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonfr.py
index 96bac89690..3842977654 100644
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@@ -2,9 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, sengian '
-import sys, textwrap, re, traceback
-from threading import Thread, Lock
-from Queue import Queue
+import sys, textwrap, re, traceback, socket
from urllib import urlencode
from math import ceil
@@ -108,10 +106,6 @@ class Amazon(MetadataSource):
self.exception = e
self.tb = traceback.format_exc()
- # @property
- # def string_customization_help(self):
- # return _('You can select here the language for metadata search with amazon.com')
-
def report(verbose):
if verbose:
@@ -120,56 +114,6 @@ def report(verbose):
class AmazonError(Exception):
pass
-class BrowserThread(Thread):
-
- def __init__(self, url, qbr, qsync, nb, verbose=False, timeout=10., ex=Exception, name='Meta'):
- self.url = url
- self.ex = ex
- self.qbr = qbr
- self.qsync = qsync
- self.nb = nb
- self.plugname = name
- self.verbose = verbose
- self.timeout = timeout
- self.result = None
- self.br = browser()
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- try:
- browser = self.qbr.get(True)
- raw = self.br.open_novisit(self.url, timeout=self.timeout).read()
- except Exception, e:
- report(self.verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- self.result = None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise self.ex(_('%s timed out. Try again later.') % self.plugname)
- raise self.ex(_('%s encountered an error.') % self.plugname)
- finally:
- self.qbr.put(browser, True)
-
- if '404 - ' in raw:
- report(self.verbose)
- self.result = None
- return None
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- self.result = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- self.result = soupparser.fromstring(clean_ascii_chars(raw))
- except:
- self.result = None
- finally:
- self.qsync.put(self.nb, True)
-
class Query(object):
@@ -305,14 +249,11 @@ class Query(object):
for i in x.xpath("//a/span[@class='srTitle']")])
return results[:self.max_results], self.baseurl
-class ResultList(object):
+class ResultList(list):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
self.lang = lang
- self.thread = []
- self.res = []
- self.nbtag = 0
self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@@ -482,61 +423,45 @@ class ResultList(object):
pass
return mi
- def producer(self, sync, data, br, verbose=False):
- for i in xrange(len(data)):
- thread = BrowserThread(data[i], br, sync, i, verbose=verbose, ex=AmazonError,
- name='Amazon')
- thread.start()
- self.thread.append(thread)
+ def get_individual_metadata(self, url, br, verbose):
+ try:
+ raw = br.open_novisit(url).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise AmazonError(_('Amazon timed out. Try again later.'))
+ raise AmazonError(_('Amazon encountered an error.'))
+ if '404 - ' in raw:
+ report(verbose)
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ report(verbose)
+ return None
- def consumer(self, sync, syncbis, br, total_entries, verbose=False):
- i=0
- while i < total_entries:
- nb = int(sync.get(True))
- entry = self.thread[nb].get_result()
- i+=1
+ def populate(self, entries, br, verbose=False):
+ #multiple entries
+ for x in entries:
+ entry = self.get_individual_metadata(x, br, verbose)
if entry is not None:
mi = self.fill_MI(entry, verbose)
if mi is not None:
mi.tags, atag = self.get_tags(entry, verbose)
- self.res[nb] = mi
if atag:
- threadbis = BrowserThread(mi.tags, br, syncbis, nb, verbose=verbose, ex=AmazonError,
- name='Amazon')
- self.thread[nb] = threadbis
- self.nbtag +=1
- threadbis.start()
-
- def populate(self, entries, ibr, verbose=False, brcall=3):
- #multiple entries
- br = Queue(brcall)
- cbr = Queue(brcall-1)
-
- syncp = Queue(1)
- syncc = Queue(len(entries))
-
- for i in xrange(brcall-1):
- br.put(browser(), True)
- cbr.put(browser(), True)
- br.put(ibr, True)
-
- self.res = [None]*len(entries)
-
- prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
- cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
- prod_thread.start()
- cons_thread.start()
- prod_thread.join()
- cons_thread.join()
-
- #finish processing
- for i in xrange(self.nbtag):
- nb = int(syncc.get(True))
- tags = self.thread[nb].get_result()
- if tags is not None:
- self.res[nb].tags = self.get_tags(tags, verbose)[0]
-
- return self.res
+ tags = self.get_individual_metadata(mi.tags, br, verbose)
+ if tags is not None:
+ mi.tags = self.get_tags(tags, verbose)[0]
+ self.append(mi)
def search(title=None, author=None, publisher=None, isbn=None,
@@ -550,7 +475,8 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList(baseurl, lang)
- return [x for x in ans.populate(entries, br, verbose) if x is not None]
+ ans.populate(entries, br, verbose)
+ return [x for x in ans if x is not None]
def option_parser():
parser = OptionParser(textwrap.dedent(\
@@ -599,6 +525,6 @@ if __name__ == '__main__':
# sys.exit(main())
import cProfile
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()"))
- sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp_threading_1"))
+ sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp_2"))
# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From d5bc18b5c2b3ab0bb2dfa86e65191b8ccf4c7a67 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 11 Dec 2010 22:07:35 +0100
Subject: [PATCH 052/163] Modify amazon to get social meta and split in 2
plugins
---
src/calibre/customize/builtins.py | 6 +-
.../metadata/{amazonfr.py => amazonbis.py} | 207 +++++++++++-------
src/calibre/ebooks/metadata/fetch.py | 30 +--
3 files changed, 143 insertions(+), 100 deletions(-)
rename src/calibre/ebooks/metadata/{amazonfr.py => amazonbis.py} (76%)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 06da355d6a..4798c46516 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -480,10 +480,10 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
-from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
- LibraryThing
+from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
+from calibre.ebooks.metadata.amazonbis import Amazon, AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
@@ -491,7 +491,7 @@ from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
-plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
+plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, AmazonSocial,
LibraryThing, DoubanBooks, NiceBooks, Fictionwise, CSV_XML, EPUB_MOBI, BIBTEX,
Unmanifested, Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
NiceBooksCovers]
diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonbis.py
similarity index 76%
rename from src/calibre/ebooks/metadata/amazonfr.py
rename to src/calibre/ebooks/metadata/amazonbis.py
index 3842977654..a94883b003 100644
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@@ -19,73 +19,56 @@ from calibre.utils.config import OptionParser
from calibre.library.comments import sanitize_comments_html
-class AmazonFr(MetadataSource):
+# class AmazonFr(MetadataSource):
- name = 'Amazon French'
- description = _('Downloads metadata from amazon.fr')
- supported_platforms = ['windows', 'osx', 'linux']
- author = 'Sengian'
- version = (1, 0, 0)
- has_html_comments = True
+ # name = 'Amazon French'
+ # description = _('Downloads metadata from amazon.fr')
+ # supported_platforms = ['windows', 'osx', 'linux']
+ # author = 'Sengian'
+ # version = (1, 0, 0)
+ # has_html_comments = True
- def fetch(self):
- try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10, verbose=self.verbose, lang='fr')
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # try:
+ # self.results = search(self.title, self.book_author, self.publisher,
+ # self.isbn, max_results=10, verbose=self.verbose, lang='fr')
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
-class AmazonEs(MetadataSource):
+# class AmazonEs(MetadataSource):
- name = 'Amazon Spanish'
- description = _('Downloads metadata from amazon.com in spanish')
- supported_platforms = ['windows', 'osx', 'linux']
- author = 'Sengian'
- version = (1, 0, 0)
- has_html_comments = True
+ # name = 'Amazon Spanish'
+ # description = _('Downloads metadata from amazon.com in spanish')
+ # supported_platforms = ['windows', 'osx', 'linux']
+ # author = 'Sengian'
+ # version = (1, 0, 0)
+ # has_html_comments = True
- def fetch(self):
- try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10, verbose=self.verbose, lang='es')
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # try:
+ # self.results = search(self.title, self.book_author, self.publisher,
+ # self.isbn, max_results=10, verbose=self.verbose, lang='es')
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
-class AmazonEn(MetadataSource):
+# class AmazonDe(MetadataSource):
- name = 'Amazon English'
- description = _('Downloads metadata from amazon.com in english')
- supported_platforms = ['windows', 'osx', 'linux']
- author = 'Sengian'
- version = (1, 0, 0)
- has_html_comments = True
+ # name = 'Amazon German'
+ # description = _('Downloads metadata from amazon.de')
+ # supported_platforms = ['windows', 'osx', 'linux']
+ # author = 'Sengian'
+ # version = (1, 0, 0)
+ # has_html_comments = True
- def fetch(self):
- try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10, verbose=self.verbose, lang='en')
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
-
-class AmazonDe(MetadataSource):
-
- name = 'Amazon German'
- description = _('Downloads metadata from amazon.de')
- supported_platforms = ['windows', 'osx', 'linux']
- author = 'Sengian'
- version = (1, 0, 0)
- has_html_comments = True
-
- def fetch(self):
- try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10, verbose=self.verbose, lang='de')
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # try:
+ # self.results = search(self.title, self.book_author, self.publisher,
+ # self.isbn, max_results=10, verbose=self.verbose, lang='de')
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
class Amazon(MetadataSource):
@@ -93,15 +76,31 @@ class Amazon(MetadataSource):
description = _('Downloads metadata from amazon.com')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Kovid Goyal & Sengian'
- version = (1, 1, 0)
+ version = (1, 0, 0)
has_html_comments = True
def fetch(self):
- # if not self.site_customization:
- # return
try:
self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10, verbose=self.verbose, lang='all')
+ self.isbn, max_results=5, verbose=self.verbose, lang='all')
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class AmazonSocial(MetadataSource):
+
+ name = 'AmazonSocial'
+ metadata_type = 'social'
+ description = _('Downloads social metadata from amazon.com')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Kovid Goyal & Sengian'
+ version = (1, 0, 1)
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=5, verbose=self.verbose, lang='all')
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
@@ -450,7 +449,6 @@ class ResultList(list):
return None
def populate(self, entries, br, verbose=False):
- #multiple entries
for x in entries:
entry = self.get_individual_metadata(x, br, verbose)
if entry is not None:
@@ -471,13 +469,40 @@ def search(title=None, author=None, publisher=None, isbn=None,
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
if entries is None or len(entries) == 0:
- return
+ return None
#List of entry
ans = ResultList(baseurl, lang)
ans.populate(entries, br, verbose)
return [x for x in ans if x is not None]
+def get_social_metadata(title, authors, publisher, isbn, verbose=False,
+ max_results=1, lang='all'):
+ mi = MetaInformation(title, authors)
+ if not isbn or not check_isbn(isbn):
+ return [mi]
+
+ amazresults = search(isbn=isbn, verbose=verbose,
+ max_results=max_results, lang='all')
+ if amazresults is None or amazresults[0] is None:
+ from calibre.ebooks.metadata.xisbn import xisbn
+ for i in xisbn.get_associated_isbns(isbn):
+ amazresults = search(isbn=i, verbose=verbose,
+ max_results=max_results, lang='all')
+ if amazresults is not None and amazresults[0] is not None:
+ break
+ if amazresults is None or amazresults[0] is None:
+ return [mi]
+
+ miaz = amazresults[0]
+ if miaz.rating is not None:
+ mi.rating = miaz.rating
+ if miaz.comments is not None:
+ mi.comments = miaz.comments
+ if miaz.tags is not None:
+ mi.tags = miaz.tags
+ return [mi]
+
def option_parser():
parser = OptionParser(textwrap.dedent(\
_('''\
@@ -490,41 +515,59 @@ def option_parser():
All & english & french & german & spanish
'''
)))
- parser.add_option('-t', '--title', help='Book title')
- parser.add_option('-a', '--author', help='Book author(s)')
- parser.add_option('-p', '--publisher', help='Book publisher')
- parser.add_option('-i', '--isbn', help='Book ISBN')
- parser.add_option('-k', '--keywords', help='Keywords')
+ parser.add_option('-t', '--title', help=_('Book title'))
+ parser.add_option('-a', '--author', help=_('Book author(s)'))
+ parser.add_option('-p', '--publisher', help=_('Book publisher'))
+ parser.add_option('-i', '--isbn', help=_('Book ISBN'))
+ parser.add_option('-k', '--keywords', help=_('Keywords'))
+ parser.add_option('-s', '--social', default=0, action='count',
+ help=_('Get social data only'))
parser.add_option('-m', '--max-results', default=10,
- help='Maximum number of results to fetch')
+ help=_('Maximum number of results to fetch'))
parser.add_option('-l', '--lang', default='all',
- help='Chosen language for metadata search (all, en, fr, es, de)')
+ help=_('Chosen language for metadata search (all, en, fr, es, de)'))
parser.add_option('-v', '--verbose', default=0, action='count',
- help='Be more verbose about errors')
+ help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
try:
- results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
- keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
- lang=opts.lang)
+ if opts.social:
+ results = get_social_metadata(opts.title, opts.author,
+ opts.publisher, opts.isbn, verbose=opts.verbose, lang=opts.lang)
+ else:
+ results = search(opts.title, opts.author, isbn=opts.isbn,
+ publisher=opts.publisher, keywords=opts.keywords, verbose=opts.verbose,
+ max_results=opts.max_results, lang=opts.lang)
except AssertionError:
report(True)
parser.print_help()
return 1
- if results is None or len(results) == 0:
+ if results is None and len(results) == 0:
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
print
+
+ #test social
+ # '''Test xisbn'''
+ # print get_social_metadata('Learning Python', None, None, '8324616489')[0]
+ # print
+ # '''Test sophisticated comment formatting'''
+ # print get_social_metadata('Angels & Demons', None, None, '9781416580829')[0]
+ # print
+ # '''Random tests'''
+ # print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')[0]
+ # print
+ # print get_social_metadata('The Great Gatsby', None, None, '0743273567')[0]
if __name__ == '__main__':
- # sys.exit(main())
- import cProfile
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()"))
- sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonfr; calibre.ebooks.metadata.amazonfr.main()", "profile_tmp_2"))
+ sys.exit(main())
+ # import cProfile
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
-# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonfr.py" -m 5 -a gore -v>data.html
\ No newline at end of file
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
\ No newline at end of file
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index b797a477d6..f1bf88da84 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -226,24 +226,24 @@ class ISBNDB(MetadataSource): # {{{
# }}}
-class Amazon(MetadataSource): # {{{
+# class Amazon(MetadataSource): # {{{
- name = 'Amazon'
- metadata_type = 'social'
- description = _('Downloads social metadata from amazon.com')
+ # name = 'Amazon'
+ # metadata_type = 'social'
+ # description = _('Downloads social metadata from amazon.com')
- has_html_comments = True
+ # has_html_comments = True
- def fetch(self):
- if not self.isbn:
- return
- from calibre.ebooks.metadata.amazon import get_social_metadata
- try:
- self.results = get_social_metadata(self.title, self.book_author,
- self.publisher, self.isbn)
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # if not self.isbn:
+ # return
+ # from calibre.ebooks.metadata.amazon import get_social_metadata
+ # try:
+ # self.results = get_social_metadata(self.title, self.book_author,
+ # self.publisher, self.isbn)
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
# }}}
From b2004ad77bb3e1d7f6630f417740cc3cbd089cb1 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 11 Dec 2010 22:41:37 +0100
Subject: [PATCH 053/163] Remove threading from fictionwise
---
src/calibre/ebooks/metadata/fictionwise.py | 112 +++++++--------------
1 file changed, 38 insertions(+), 74 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index efb19ca249..418a8ca771 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -4,8 +4,6 @@ __copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
-from threading import Thread
-from Queue import Queue
from urllib import urlencode
from lxml.html import soupparser, tostring
@@ -20,7 +18,7 @@ from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars, unescape
-class Fictionwise(MetadataSource): # {{{
+class Fictionwise(MetadataSource):
author = 'Sengian'
name = 'Fictionwise'
@@ -36,51 +34,10 @@ class Fictionwise(MetadataSource): # {{{
self.exception = e
self.tb = traceback.format_exc()
- # }}}
class FictionwiseError(Exception):
pass
-class BrowserThread(Thread):
-
- def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
- self.url = url
- self.ex = ex
- self.plugname = name
- self.verbose = verbose
- self.timeout = timeout
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- try:
- raw = browser().open_novisit(self.url, timeout=self.timeout).read()
- except Exception, e:
- report(self.verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- self.result = None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise self.ex(_('%s timed out. Try again later.') % self.plugname)
- raise self.ex(_('%s encountered an error.') % self.plugname)
- if '404 - ' in raw:
- report(self.verbose)
- self.result = None
- return None
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- self.result = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- self.result = soupparser.fromstring(clean_ascii_chars(raw))
- except:
- self.result = None
-
def report(verbose):
if verbose:
traceback.print_exc()
@@ -161,15 +118,16 @@ class Query(object):
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
#return feed if no links ie normally a single book or nothing
if not results:
- results = [feed]
- return results
+ return [feed], False
+ return results, True
class ResultList(list):
BASE_URL = 'http://www.fictionwise.com'
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
- def __init__(self):
+ def __init__(self, islink):
+ self.islink = islink
self.retitle = re.compile(r'\[[^\[\]]+\]')
self.rechkauth = re.compile(r'.*book\s*by', re.I)
self.redesc = re.compile(r'book\s*description\s*:\s*( ]+>)*(?P.*) ]*>.{,15}publisher\s*:', re.I)
@@ -337,47 +295,53 @@ class ResultList(list):
pass
return mi
- def producer(self, q, data, verbose=False):
- for x in data:
- thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=FictionwiseError,
- name='Fictionwise')
- thread.start()
- q.put(thread, True)
+ def get_individual_metadata(self, url, br, verbose):
+ try:
+ raw = br.open_novisit(url).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
+ raise FictionwiseError(_('Fictionwise encountered an error.'))
+ if '404 - ' in raw:
+ report(verbose)
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ report(verbose)
+ return None
- def consumer(self, q, total_entries, verbose=False):
- while len(self) < total_entries:
- thread = q.get(True)
- thread.join()
- mi = thread.get_result()
- if mi is None:
- self.append(None)
- else:
- self.append(self.fill_MI(mi, verbose))
-
- def populate(self, entries, verbose=False, brcall=3):
- if len(entries) == 1 and not isinstance(entries[0], str):
+ def populate(self, entries, br, verbose=False):
+ if not self.islink:
#single entry
self.append(self.fill_MI(entries[0], verbose))
else:
#multiple entries
- q = Queue(brcall)
- prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
- cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
- prod_thread.start()
- cons_thread.start()
- prod_thread.join()
- cons_thread.join()
+ for x in entries:
+ entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
+ if entry is not None:
+ self.append(self.fill_MI(entry, verbose))
def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=5,
keywords=None):
br = browser()
- entries = Query(title=title, author=author, publisher=publisher,
+ entries, islink = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
#List of entry
- ans = ResultList()
+ ans = ResultList(islink)
ans.populate(entries, br, verbose)
return [x for x in ans if x is not None]
From 1d968f71b71bdbf01a7d7ef654dd953e6806a5cb Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 11 Dec 2010 23:19:25 +0100
Subject: [PATCH 054/163] Remove threading from fictionwise and nicebooks
---
src/calibre/ebooks/metadata/fictionwise.py | 1 +
src/calibre/ebooks/metadata/nicebooks.py | 112 ++++++++-------------
2 files changed, 41 insertions(+), 72 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 418a8ca771..914fa2b228 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -337,6 +337,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=5,
keywords=None):
br = browser()
+ islink = False
entries, islink = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index cdf915c827..3886eae201 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -80,46 +80,6 @@ class NiceBooksError(Exception):
class ISBNNotFound(NiceBooksError):
pass
-class BrowserThread(Thread):
-
- def __init__(self, url, verbose=False, timeout=10., ex=Exception, name='Meta'):
- self.url = url
- self.ex = ex
- self.plugname = name
- self.verbose = verbose
- self.timeout = timeout
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- try:
- raw = browser().open_novisit(self.url, timeout=self.timeout).read()
- except Exception, e:
- report(self.verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- self.result = None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise self.ex(_('%s timed out. Try again later.') % self.plugname)
- raise self.ex(_('%s encountered an error.') % self.plugname)
- if '404 - ' in raw:
- report(self.verbose)
- self.result = None
- return None
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- self.result = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- self.result = soupparser.fromstring(clean_ascii_chars(raw))
- except:
- self.result = None
-
def report(verbose):
if verbose:
traceback.print_exc()
@@ -156,7 +116,7 @@ class Query(object):
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
- return
+ return None
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
@@ -178,7 +138,7 @@ class Query(object):
nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
except:
#direct hit
- return [feed]
+ return [feed], False
nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
pages =[feed]
@@ -207,13 +167,14 @@ class Query(object):
for x in pages:
results.extend([i.find_class('title')[0].get('href') \
for i in x.xpath("//ul[@id='results']/li")])
- return results[:self.max_results]
+ return results[:self.max_results], True
class ResultList(list):
BASE_URL = 'http://fr.nicebooks.com'
- def __init__(self):
+ def __init__(self, islink):
+ self.islink = islink
self.repub = re.compile(u'\s*.diteur\s*', re.I)
self.reauteur = re.compile(u'\s*auteur.*', re.I)
self.reautclean = re.compile(u'\s*\(.*\)\s*')
@@ -287,36 +248,42 @@ class ResultList(list):
pass
return mi
- def producer(self, q, data, verbose=False):
- for x in data:
- thread = BrowserThread(self.BASE_URL+x, verbose=verbose, ex=NiceBooksError,
- name='Nicebooks')
- thread.start()
- q.put(thread, True)
+ def get_individual_metadata(self, url, br, verbose):
+ try:
+ raw = br.open_novisit(url).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise NiceBooksError(_('NiceBooks timed out. Try again later.'))
+ raise NiceBooksError(_('NiceBooks encountered an error.'))
+ if '404 - ' in raw:
+ report(verbose)
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ report(verbose)
+ return None
- def consumer(self, q, total_entries, verbose=False):
- while len(self) < total_entries:
- thread = q.get(True)
- thread.join()
- mi = thread.get_result()
- if mi is None:
- self.append(None)
- else:
- self.append(self.fill_MI(mi, verbose))
-
- def populate(self, entries, verbose=False, brcall=3):
- if len(entries) == 1 and not isinstance(entries[0], str):
+ def populate(self, entries, br, verbose=False):
+ if not self.islink:
#single entry
self.append(self.fill_MI(entries[0], verbose))
else:
#multiple entries
- q = Queue(brcall)
- prod_thread = Thread(target=self.producer, args=(q, entries, verbose))
- cons_thread = Thread(target=self.consumer, args=(q, len(entries), verbose))
- prod_thread.start()
- cons_thread.start()
- prod_thread.join()
- cons_thread.join()
+ for x in entries:
+ entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
+ if entry is not None:
+ self.append(self.fill_MI(entry, verbose))
class Covers(object):
@@ -358,15 +325,16 @@ class Covers(object):
def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None):
br = browser()
- entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
+ islink = False
+ entries, islink = Query(title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
if entries is None or len(entries) == 0:
return None
#List of entry
- ans = ResultList()
- ans.populate(entries, verbose)
+ ans = ResultList(islink)
+ ans.populate(entries, br, verbose)
return [x for x in ans if x is not None]
def check_for_cover(isbn):
From 9a3933354ab261cc35fb2fc9ff8ad7a47b75b58f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 00:23:47 +0100
Subject: [PATCH 055/163] Minor fix to amazon social
---
src/calibre/ebooks/metadata/amazonbis.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonbis.py b/src/calibre/ebooks/metadata/amazonbis.py
index a94883b003..f86f00b94f 100644
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@@ -98,9 +98,11 @@ class AmazonSocial(MetadataSource):
has_html_comments = True
def fetch(self):
+ if not self.isbn:
+ return
try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=5, verbose=self.verbose, lang='all')
+ self.results = get_social_metadata(self.title, self.book_author, self.publisher,
+ self.isbn, verbose=self.verbose, lang='all')[0]
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
From f5736c59316d98042266006394cc2fc8b65b0ad7 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 00:45:08 +0100
Subject: [PATCH 056/163] ...
---
src/calibre/ebooks/metadata/nicebooks.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 3886eae201..c852a81873 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -4,8 +4,6 @@ __copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
-from threading import Thread
-from Queue import Queue
from urllib import urlencode
from math import ceil
from copy import deepcopy
From ae781ae61433d57ab14ddb6d033246105f70afd1 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 03:19:23 +0100
Subject: [PATCH 057/163] Add localisation site in amazon social (fr, de)
---
src/calibre/ebooks/metadata/amazonbis.py | 90 ++++++++++++++++++------
1 file changed, 70 insertions(+), 20 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonbis.py b/src/calibre/ebooks/metadata/amazonbis.py
index f86f00b94f..acd7f97c1e 100644
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@@ -3,6 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
import sys, textwrap, re, traceback, socket
+from threading import Thread
from urllib import urlencode
from math import ceil
@@ -10,6 +11,7 @@ from lxml.html import soupparser, tostring
from calibre.utils.date import parse_date, utcnow, replace_months
from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.localization import get_lang
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
@@ -101,8 +103,36 @@ class AmazonSocial(MetadataSource):
if not self.isbn:
return
try:
- self.results = get_social_metadata(self.title, self.book_author, self.publisher,
+ lang = get_lang()
+ lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
+ if lang == 'all':
+ self.results = get_social_metadata(self.title, self.book_author, self.publisher,
self.isbn, verbose=self.verbose, lang='all')[0]
+ else:
+ tmploc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
+ self.book_author, self.publisher,self.isbn, verbose=self.verbose, lang=lang)
+ tmpnoloc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
+ self.book_author, self.publisher, self.isbn, verbose=self.verbose, lang='all')
+ tmploc.start()
+ tmpnoloc.start()
+ tmploc.join()
+ tmpnoloc.join()
+ tmploc= tmploc.get_result()
+ if tmploc is not None:
+ tmploc = tmploc[0]
+ tmpnoloc= tmpnoloc.get_result()
+ if tmpnoloc is not None:
+ tmpnoloc = tmpnoloc[0]
+ print tmpnoloc
+
+ if tmploc is not None and tmpnoloc is not None:
+ if tmploc.rating is None:
+ tmploc.rating = tmpnoloc.rating
+ if tmploc.comments is not None:
+ tmploc.comments = tmpnoloc.comments
+ if tmploc.tags is None:
+ tmploc.tags = tmpnoloc.tags
+ self.results = tmploc
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
@@ -115,6 +145,25 @@ def report(verbose):
class AmazonError(Exception):
pass
+class ThreadwithResults(Thread):
+ def __init__(self, error, verb, func, *args, **kargs):
+ self.func = func
+ self.args = args
+ self.kargs = kargs
+ self.verbose = verb
+ self.ex = error
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ try:
+ self.result = self.func(*self.args, **self.kargs)
+ except Exception, e:
+ report(self.verbose)
+ raise self.ex(_('An error was encountered in the function threading'))
class Query(object):
@@ -123,10 +172,10 @@ class Query(object):
BASE_URL_DE = 'http://www.amazon.de'
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
- max_results=20, rlang='all'):
+ max_results=10, rlang='all'):
assert not(title is None and author is None and publisher is None \
and isbn is None and keywords is None)
- assert (max_results < 21)
+ assert (max_results < 11)
self.max_results = int(max_results)
self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
@@ -151,17 +200,17 @@ class Query(object):
#many options available
}
- if rlang =='all':
+ if rlang =='all' or rlang =='en':
q['sort'] = 'relevanceexprank'
self.urldata = self.BASE_URL_ALL
- elif rlang =='es':
- q['sort'] = 'relevanceexprank'
- q['field-language'] = 'Spanish'
- self.urldata = self.BASE_URL_ALL
- elif rlang =='en':
- q['sort'] = 'relevanceexprank'
- q['field-language'] = 'English'
- self.urldata = self.BASE_URL_ALL
+ # elif rlang =='es':
+ # q['sort'] = 'relevanceexprank'
+ # q['field-language'] = 'Spanish'
+ # self.urldata = self.BASE_URL_ALL
+ # elif rlang =='en':
+ # q['sort'] = 'relevanceexprank'
+ # q['field-language'] = 'English'
+ # self.urldata = self.BASE_URL_ALL
elif rlang =='fr':
q['sort'] = 'relevancerank'
self.urldata = self.BASE_URL_FR
@@ -250,7 +299,7 @@ class Query(object):
for i in x.xpath("//a/span[@class='srTitle']")])
return results[:self.max_results], self.baseurl
-class ResultList(list):
+class ResultList(object):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
@@ -451,6 +500,7 @@ class ResultList(list):
return None
def populate(self, entries, br, verbose=False):
+ res = []
for x in entries:
entry = self.get_individual_metadata(x, br, verbose)
if entry is not None:
@@ -461,7 +511,8 @@ class ResultList(list):
tags = self.get_individual_metadata(mi.tags, br, verbose)
if tags is not None:
mi.tags = self.get_tags(tags, verbose)[0]
- self.append(mi)
+ res.append(mi)
+ return res
def search(title=None, author=None, publisher=None, isbn=None,
@@ -475,8 +526,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList(baseurl, lang)
- ans.populate(entries, br, verbose)
- return [x for x in ans if x is not None]
+ return [x for x in ans.populate(entries, br, verbose) if x is not None]
def get_social_metadata(title, authors, publisher, isbn, verbose=False,
max_results=1, lang='all'):
@@ -485,12 +535,12 @@ def get_social_metadata(title, authors, publisher, isbn, verbose=False,
return [mi]
amazresults = search(isbn=isbn, verbose=verbose,
- max_results=max_results, lang='all')
+ max_results=max_results, lang=lang)
if amazresults is None or amazresults[0] is None:
from calibre.ebooks.metadata.xisbn import xisbn
for i in xisbn.get_associated_isbns(isbn):
amazresults = search(isbn=i, verbose=verbose,
- max_results=max_results, lang='all')
+ max_results=max_results, lang=lang)
if amazresults is not None and amazresults[0] is not None:
break
if amazresults is None or amazresults[0] is None:
@@ -514,7 +564,7 @@ def option_parser():
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
so you should make your query as specific as possible.
You can chose the language for metadata retrieval:
- All & english & french & german & spanish
+ english & french & german
'''
)))
parser.add_option('-t', '--title', help=_('Book title'))
@@ -527,7 +577,7 @@ def option_parser():
parser.add_option('-m', '--max-results', default=10,
help=_('Maximum number of results to fetch'))
parser.add_option('-l', '--lang', default='all',
- help=_('Chosen language for metadata search (all, en, fr, es, de)'))
+ help=_('Chosen language for metadata search (en, fr, de)'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Be more verbose about errors'))
return parser
From 5c89b576e31b85e17cf14e85a72b1b876f87579c Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 11:57:00 +0100
Subject: [PATCH 058/163] Fix threading in amazon
---
src/calibre/ebooks/metadata/amazonbis.py | 195 +++++++++++++----------
1 file changed, 109 insertions(+), 86 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonbis.py b/src/calibre/ebooks/metadata/amazonbis.py
index acd7f97c1e..7060ca4cb5 100644
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@@ -4,6 +4,7 @@ __copyright__ = '2010, sengian '
import sys, textwrap, re, traceback, socket
from threading import Thread
+from Queue import Queue
from urllib import urlencode
from math import ceil
@@ -21,57 +22,6 @@ from calibre.utils.config import OptionParser
from calibre.library.comments import sanitize_comments_html
-# class AmazonFr(MetadataSource):
-
- # name = 'Amazon French'
- # description = _('Downloads metadata from amazon.fr')
- # supported_platforms = ['windows', 'osx', 'linux']
- # author = 'Sengian'
- # version = (1, 0, 0)
- # has_html_comments = True
-
- # def fetch(self):
- # try:
- # self.results = search(self.title, self.book_author, self.publisher,
- # self.isbn, max_results=10, verbose=self.verbose, lang='fr')
- # except Exception, e:
- # self.exception = e
- # self.tb = traceback.format_exc()
-
-# class AmazonEs(MetadataSource):
-
- # name = 'Amazon Spanish'
- # description = _('Downloads metadata from amazon.com in spanish')
- # supported_platforms = ['windows', 'osx', 'linux']
- # author = 'Sengian'
- # version = (1, 0, 0)
- # has_html_comments = True
-
- # def fetch(self):
- # try:
- # self.results = search(self.title, self.book_author, self.publisher,
- # self.isbn, max_results=10, verbose=self.verbose, lang='es')
- # except Exception, e:
- # self.exception = e
- # self.tb = traceback.format_exc()
-
-# class AmazonDe(MetadataSource):
-
- # name = 'Amazon German'
- # description = _('Downloads metadata from amazon.de')
- # supported_platforms = ['windows', 'osx', 'linux']
- # author = 'Sengian'
- # version = (1, 0, 0)
- # has_html_comments = True
-
- # def fetch(self):
- # try:
- # self.results = search(self.title, self.book_author, self.publisher,
- # self.isbn, max_results=10, verbose=self.verbose, lang='de')
- # except Exception, e:
- # self.exception = e
- # self.tb = traceback.format_exc()
-
class Amazon(MetadataSource):
name = 'Amazon'
@@ -83,8 +33,33 @@ class Amazon(MetadataSource):
def fetch(self):
try:
- self.results = search(self.title, self.book_author, self.publisher,
+ lang = get_lang()
+ lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
+ if lang == 'all':
+ self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=5, verbose=self.verbose, lang='all')
+ else:
+ tmploc = ThreadwithResults(search, self.title, self.book_author,
+ self.publisher,self.isbn, max_results=5,
+ verbose=self.verbose, lang=lang)
+ tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
+ self.publisher, self.isbn, max_results=5,
+ verbose=self.verbose, lang='all')
+ tmploc.start()
+ tmpnoloc.start()
+ tmploc.join()
+ tmpnoloc.join()
+ tmploc= tmploc.get_result()
+ tmpnoloc= tmpnoloc.get_result()
+
+ tempres = None
+ if tmpnoloc is not None:
+ tempres = tmpnoloc
+ if tmploc is not None:
+ tempres = tmploc
+ if tmpnoloc is not None:
+ tempres.extend(tmpnoloc)
+ self.results = tmpres
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
@@ -107,12 +82,12 @@ class AmazonSocial(MetadataSource):
lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
if lang == 'all':
self.results = get_social_metadata(self.title, self.book_author, self.publisher,
- self.isbn, verbose=self.verbose, lang='all')[0]
+ self.isbn, verbose=self.verbose, lang='all')[0]
else:
- tmploc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
- self.book_author, self.publisher,self.isbn, verbose=self.verbose, lang=lang)
- tmpnoloc = ThreadwithResults(AmazonError, self.verbose, get_social_metadata, self.title,
- self.book_author, self.publisher, self.isbn, verbose=self.verbose, lang='all')
+ tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
+ self.publisher,self.isbn, verbose=self.verbose, lang=lang)
+ tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
+ self.publisher, self.isbn, verbose=self.verbose, lang='all')
tmploc.start()
tmpnoloc.start()
tmploc.join()
@@ -123,15 +98,13 @@ class AmazonSocial(MetadataSource):
tmpnoloc= tmpnoloc.get_result()
if tmpnoloc is not None:
tmpnoloc = tmpnoloc[0]
- print tmpnoloc
-
- if tmploc is not None and tmpnoloc is not None:
- if tmploc.rating is None:
- tmploc.rating = tmpnoloc.rating
- if tmploc.comments is not None:
- tmploc.comments = tmpnoloc.comments
- if tmploc.tags is None:
- tmploc.tags = tmpnoloc.tags
+ if tmpnoloc is not None:
+ if tmploc.rating is None:
+ tmploc.rating = tmpnoloc.rating
+ if tmploc.comments is not None:
+ tmploc.comments = tmpnoloc.comments
+ if tmploc.tags is None:
+ tmploc.tags = tmpnoloc.tags
self.results = tmploc
except Exception, e:
self.exception = e
@@ -146,12 +119,10 @@ class AmazonError(Exception):
pass
class ThreadwithResults(Thread):
- def __init__(self, error, verb, func, *args, **kargs):
+ def __init__(self, func, *args, **kargs):
self.func = func
self.args = args
self.kargs = kargs
- self.verbose = verb
- self.ex = error
self.result = None
Thread.__init__(self)
@@ -159,11 +130,8 @@ class ThreadwithResults(Thread):
return self.result
def run(self):
- try:
- self.result = self.func(*self.args, **self.kargs)
- except Exception, e:
- report(self.verbose)
- raise self.ex(_('An error was encountered in the function threading'))
+ self.result = self.func(*self.args, **self.kargs)
+
class Query(object):
@@ -172,10 +140,10 @@ class Query(object):
BASE_URL_DE = 'http://www.amazon.de'
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
- max_results=10, rlang='all'):
+ max_results=20, rlang='all'):
assert not(title is None and author is None and publisher is None \
and isbn is None and keywords is None)
- assert (max_results < 11)
+ assert (max_results < 21)
self.max_results = int(max_results)
self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
@@ -304,6 +272,9 @@ class ResultList(object):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
self.lang = lang
+ self.thread = []
+ self.res = []
+ self.nbtag = 0
self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@@ -499,20 +470,72 @@ class ResultList(object):
report(verbose)
return None
- def populate(self, entries, br, verbose=False):
- res = []
- for x in entries:
- entry = self.get_individual_metadata(x, br, verbose)
+ def fetchdatathread(self, qbr, qsync, nb, url, verbose):
+ try:
+ browser = qbr.get(True)
+ entry = self.get_individual_metadata(url, browser, verbose)
+ except:
+ report(verbose)
+ entry = None
+ finally:
+ qbr.put(browser, True)
+ qsync.put(nb, True)
+ return entry
+
+ def producer(self, sync, urls, br, verbose=False):
+ for i in xrange(len(urls)):
+ thread = ThreadwithResults(self.fetchdatathread, br, sync,
+ i, urls[i], verbose)
+ thread.start()
+ self.thread.append(thread)
+
+ def consumer(self, sync, syncbis, br, total_entries, verbose=False):
+ i=0
+ while i < total_entries:
+ nb = int(sync.get(True))
+ self.thread[nb].join()
+ entry = self.thread[nb].get_result()
+ i+=1
if entry is not None:
mi = self.fill_MI(entry, verbose)
if mi is not None:
mi.tags, atag = self.get_tags(entry, verbose)
+ self.res[nb] = mi
if atag:
- tags = self.get_individual_metadata(mi.tags, br, verbose)
- if tags is not None:
- mi.tags = self.get_tags(tags, verbose)[0]
- res.append(mi)
- return res
+ threadbis = ThreadwithResults(self.fetchdatathread,
+ br, syncbis, nb, mi.tags, verbose)
+ self.thread[nb] = threadbis
+ self.nbtag +=1
+ threadbis.start()
+
+ def populate(self, entries, ibr, verbose=False, brcall=3):
+ br = Queue(brcall)
+ cbr = Queue(brcall-1)
+
+ syncp = Queue(1)
+ syncc = Queue(len(entries))
+
+ for i in xrange(brcall-1):
+ br.put(browser(), True)
+ cbr.put(browser(), True)
+ br.put(ibr, True)
+
+ self.res = [None]*len(entries)
+
+ prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
+ cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
+
+ #finish processing
+ for i in xrange(self.nbtag):
+ nb = int(syncc.get(True))
+ tags = self.thread[nb].get_result()
+ if tags is not None:
+ self.res[nb].tags = self.get_tags(tags, verbose)[0]
+ return self.res
def search(title=None, author=None, publisher=None, isbn=None,
@@ -561,7 +584,7 @@ def option_parser():
%prog [options]
Fetch book metadata from Amazon. You must specify one of title, author,
- ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
+ ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
You can chose the language for metadata retrieval:
english & french & german
From 0a2b5d4c2381d12e8cf711b701408cffbf621593 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 13:36:34 +0100
Subject: [PATCH 059/163] Add threadind to nicebooks (some problems with autor=
mankell in interface with multiple authors, not the plugin)
---
src/calibre/ebooks/metadata/amazonbis.py | 7 ++-
src/calibre/ebooks/metadata/nicebooks.py | 80 +++++++++++++++++++++---
2 files changed, 77 insertions(+), 10 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazonbis.py b/src/calibre/ebooks/metadata/amazonbis.py
index 7060ca4cb5..dd973ba3d8 100644
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ b/src/calibre/ebooks/metadata/amazonbis.py
@@ -186,7 +186,12 @@ class Query(object):
q['sort'] = 'relevancerank'
self.urldata = self.BASE_URL_DE
self.baseurl = self.urldata
-
+
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
+
if isbn is not None:
q['field-isbn'] = isbn.replace('-', '')
else:
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index c852a81873..3f4f24902c 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -4,6 +4,8 @@ __copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
+from threading import Thread
+from Queue import Queue
from urllib import urlencode
from math import ceil
from copy import deepcopy
@@ -39,7 +41,7 @@ class NiceBooks(MetadataSource):
class NiceBooksCovers(CoverDownload):
name = 'Nicebooks covers'
- description = _('Downloads covers from french Nicebooks')
+ description = _('Downloads covers from French Nicebooks')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
type = _('Cover download')
@@ -78,6 +80,20 @@ class NiceBooksError(Exception):
class ISBNNotFound(NiceBooksError):
pass
+class ThreadwithResults(Thread):
+ def __init__(self, func, *args, **kargs):
+ self.func = func
+ self.args = args
+ self.kargs = kargs
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ self.result = self.func(*self.args, **self.kargs)
+
def report(verbose):
if verbose:
traceback.print_exc()
@@ -97,6 +113,10 @@ class Query(object):
if isbn is not None:
q = isbn
else:
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
q = ' '.join([i for i in (title, author, publisher, keywords) \
if i is not None])
@@ -173,6 +193,7 @@ class ResultList(list):
def __init__(self, islink):
self.islink = islink
+ self.thread = []
self.repub = re.compile(u'\s*.diteur\s*', re.I)
self.reauteur = re.compile(u'\s*auteur.*', re.I)
self.reautclean = re.compile(u'\s*\(.*\)\s*')
@@ -227,7 +248,6 @@ class ResultList(list):
return mi
def fill_MI(self, data, verbose):
- '''create and return an mi if possible, None otherwise'''
try:
entry = data.xpath("//div[@id='container']/div[@id='book-info']")[0]
title = self.get_title(entry)
@@ -272,16 +292,58 @@ class ResultList(list):
report(verbose)
return None
- def populate(self, entries, br, verbose=False):
+ def fetchdatathread(self, qbr, qsync, nb, url, verbose):
+ try:
+ browser = qbr.get(True)
+ entry = self.get_individual_metadata(url, browser, verbose)
+ except:
+ report(verbose)
+ entry = None
+ finally:
+ qbr.put(browser, True)
+ qsync.put(nb, True)
+ return entry
+
+ def producer(self, sync, urls, br, verbose=False):
+ for i in xrange(len(urls)):
+ thread = ThreadwithResults(self.fetchdatathread, br, sync,
+ i, self.BASE_URL+urls[i], verbose)
+ thread.start()
+ self.thread.append(thread)
+
+ def consumer(self, sync, total_entries, verbose=False):
+ res=[None]*total_entries
+ i=0
+ while i < total_entries:
+ nb = int(sync.get(True))
+ self.thread[nb].join()
+ entry = self.thread[nb].get_result()
+ mi = None
+ i+=1
+ if entry is not None:
+ mi = self.fill_MI(entry, verbose)
+ res[nb]=mi
+ return res
+
+ def populate(self, entries, br, verbose=False, brcall=3):
if not self.islink:
#single entry
self.append(self.fill_MI(entries[0], verbose))
else:
#multiple entries
- for x in entries:
- entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
- if entry is not None:
- self.append(self.fill_MI(entry, verbose))
+ pbr = Queue(brcall)
+ sync = Queue(1)
+ for i in xrange(brcall-1):
+ pbr.put(browser(), True)
+ pbr.put(br, True)
+
+ prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
+ cons_thread = ThreadwithResults(self.consumer, sync, len(entries), verbose)
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
+ self.extend(cons_thread.get_result())
class Covers(object):
@@ -321,7 +383,7 @@ class Covers(object):
def search(title=None, author=None, publisher=None, isbn=None,
- max_results=5, verbose=False, keywords=None):
+ max_results=10, verbose=False, keywords=None):
br = browser()
islink = False
entries, islink = Query(title=title, author=author, isbn=isbn, publisher=publisher,
@@ -407,4 +469,4 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
-# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\nicebooks.py" -m 5 -a mankel >data.html
\ No newline at end of file
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\nicebooks.py" -m 10 -a mankel >data.html
\ No newline at end of file
From 43ecf8c40d8f447dbfcbcaf686fa353ab8e3a57e Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 14:39:36 +0100
Subject: [PATCH 060/163] Add threading to fictionwise
---
src/calibre/ebooks/metadata/fictionwise.py | 76 ++++++++++++++++++++--
src/calibre/ebooks/metadata/nicebooks.py | 4 +-
2 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 914fa2b228..909d186702 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -4,6 +4,8 @@ __copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
+from threading import Thread
+from Queue import Queue
from urllib import urlencode
from lxml.html import soupparser, tostring
@@ -23,7 +25,6 @@ class Fictionwise(MetadataSource):
author = 'Sengian'
name = 'Fictionwise'
description = _('Downloads metadata from Fictionwise')
-
has_html_comments = True
def fetch(self):
@@ -38,6 +39,20 @@ class Fictionwise(MetadataSource):
class FictionwiseError(Exception):
pass
+class ThreadwithResults(Thread):
+ def __init__(self, func, *args, **kargs):
+ self.func = func
+ self.args = args
+ self.kargs = kargs
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ self.result = self.func(*self.args, **self.kargs)
+
def report(verbose):
if verbose:
traceback.print_exc()
@@ -50,8 +65,13 @@ class Query(object):
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
assert not(title is None and author is None and publisher is None and keywords is None)
assert (max_results < 21)
-
+
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
self.max_results = int(max_results)
+
q = { 'template' : 'searchresults_adv.htm' ,
'searchtitle' : '',
'searchauthor' : '',
@@ -72,6 +92,7 @@ class Query(object):
#b.DateFirstPublished, b.FWPublishDate
'sortby' : 'b.SortTitle'
}
+
if title is not None:
q['searchtitle'] = title
if author is not None:
@@ -128,6 +149,7 @@ class ResultList(list):
def __init__(self, islink):
self.islink = islink
+ self.thread = []
self.retitle = re.compile(r'\[[^\[\]]+\]')
self.rechkauth = re.compile(r'.*book\s*by', re.I)
self.redesc = re.compile(r'book\s*description\s*:\s*( ]+>)*(?P.*) ]*>.{,15}publisher\s*:', re.I)
@@ -321,16 +343,56 @@ class ResultList(list):
report(verbose)
return None
- def populate(self, entries, br, verbose=False):
+ def fetchdatathread(self, qbr, qsync, nb, url, verbose):
+ try:
+ browser = qbr.get(True)
+ entry = self.get_individual_metadata(url, browser, verbose)
+ except:
+ report(verbose)
+ entry = None
+ finally:
+ qbr.put(browser, True)
+ qsync.put(nb, True)
+ return entry
+
+ def producer(self, sync, urls, br, verbose=False):
+ for i in xrange(len(urls)):
+ thread = ThreadwithResults(self.fetchdatathread, br, sync,
+ i, self.BASE_URL+urls[i], verbose)
+ thread.start()
+ self.thread.append(thread)
+
+ def consumer(self, sync, total_entries, verbose=False):
+ res=[None]*total_entries
+ i=0
+ while i < total_entries:
+ nb = int(sync.get(True))
+ self.thread[nb].join()
+ entry = self.thread[nb].get_result()
+ i+=1
+ if entry is not None:
+ res[nb] = self.fill_MI(entry, verbose)
+ return res
+
+ def populate(self, entries, br, verbose=False, brcall=3):
if not self.islink:
#single entry
self.append(self.fill_MI(entries[0], verbose))
else:
#multiple entries
- for x in entries:
- entry = self.get_individual_metadata(self.BASE_URL+x, br, verbose)
- if entry is not None:
- self.append(self.fill_MI(entry, verbose))
+ pbr = Queue(brcall)
+ sync = Queue(1)
+ for i in xrange(brcall-1):
+ pbr.put(browser(), True)
+ pbr.put(br, True)
+
+ prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
+ cons_thread = ThreadwithResults(self.consumer, sync, len(entries), verbose)
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
+ self.extend(cons_thread.get_result())
def search(title=None, author=None, publisher=None, isbn=None,
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 3f4f24902c..6cb7c9a6ae 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -318,11 +318,9 @@ class ResultList(list):
nb = int(sync.get(True))
self.thread[nb].join()
entry = self.thread[nb].get_result()
- mi = None
i+=1
if entry is not None:
- mi = self.fill_MI(entry, verbose)
- res[nb]=mi
+ res[nb] = self.fill_MI(entry, verbose)
return res
def populate(self, entries, br, verbose=False, brcall=3):
From d4e4c8b1564de4acc09850c1b66207fca3ca2741 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 12 Dec 2010 18:31:18 +0100
Subject: [PATCH 061/163] Replace amazon default plugin
---
src/calibre/customize/builtins.py | 2 +-
src/calibre/ebooks/metadata/amazon.py | 741 +++++++++++++++++++----
src/calibre/ebooks/metadata/amazonbis.py | 653 --------------------
3 files changed, 633 insertions(+), 763 deletions(-)
delete mode 100644 src/calibre/ebooks/metadata/amazonbis.py
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 4798c46516..342d0e8456 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -483,7 +483,7 @@ from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-from calibre.ebooks.metadata.amazonbis import Amazon, AmazonSocial
+from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index cf96c9732c..1362349685 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -1,130 +1,653 @@
-#!/usr/bin/env python
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, sengian '
-'''
-Fetch metadata using Amazon AWS
-'''
-import sys, re
+import sys, textwrap, re, traceback, socket
+from threading import Thread
+from Queue import Queue
+from urllib import urlencode
+from math import ceil
-from lxml import html
-from lxml.html import soupparser
+from lxml.html import soupparser, tostring
-from calibre import browser
-from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.book.base import Metadata
+from calibre.utils.date import parse_date, utcnow, replace_months
+from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.localization import get_lang
+from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import MetaInformation, check_isbn, \
+ authors_to_sort_string
+from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.utils.config import OptionParser
from calibre.library.comments import sanitize_comments_html
-def find_asin(br, isbn):
- q = 'http://www.amazon.com/s?field-keywords='+isbn
- raw = br.open_novisit(q).read()
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- root = html.fromstring(raw)
- revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
- revs = [x.get('name') for x in revs]
- if revs:
- return revs[0]
-def to_asin(br, isbn):
- if len(isbn) == 13:
+class Amazon(MetadataSource):
+
+ name = 'Amazon'
+ description = _('Downloads metadata from amazon.com')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Kovid Goyal & Sengian'
+ version = (1, 0, 0)
+ has_html_comments = True
+
+ def fetch(self):
try:
- asin = find_asin(br, isbn)
+ lang = get_lang()
+ lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
+ if lang == 'all':
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose, lang='all')
+ else:
+ tmploc = ThreadwithResults(search, self.title, self.book_author,
+ self.publisher,self.isbn, max_results=5,
+ verbose=self.verbose, lang=lang)
+ tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
+ self.publisher, self.isbn, max_results=5,
+ verbose=self.verbose, lang='all')
+ tmploc.start()
+ tmpnoloc.start()
+ tmploc.join()
+ tmpnoloc.join()
+ tmploc= tmploc.get_result()
+ tmpnoloc= tmpnoloc.get_result()
+
+ tempres = None
+ if tmpnoloc is not None:
+ tempres = tmpnoloc
+ if tmploc is not None:
+ tempres = tmploc
+ if tmpnoloc is not None:
+ tempres.extend(tmpnoloc)
+ self.results = tempres
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class AmazonSocial(MetadataSource):
+
+ name = 'AmazonSocial'
+ metadata_type = 'social'
+ description = _('Downloads social metadata from amazon.com')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Kovid Goyal & Sengian'
+ version = (1, 0, 1)
+ has_html_comments = True
+
+ def fetch(self):
+ if not self.isbn:
+ return
+ try:
+ lang = get_lang()
+ lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
+ if lang == 'all':
+ self.results = get_social_metadata(self.title, self.book_author, self.publisher,
+ self.isbn, verbose=self.verbose, lang='all')[0]
+ else:
+ tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
+ self.publisher,self.isbn, verbose=self.verbose, lang=lang)
+ tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
+ self.publisher, self.isbn, verbose=self.verbose, lang='all')
+ tmploc.start()
+ tmpnoloc.start()
+ tmploc.join()
+ tmpnoloc.join()
+ tmploc= tmploc.get_result()
+ if tmploc is not None:
+ tmploc = tmploc[0]
+ tmpnoloc= tmpnoloc.get_result()
+ if tmpnoloc is not None:
+ tmpnoloc = tmpnoloc[0]
+ if tmpnoloc is not None:
+ if tmploc.rating is None:
+ tmploc.rating = tmpnoloc.rating
+ if tmploc.comments is not None:
+ tmploc.comments = tmpnoloc.comments
+ if tmploc.tags is None:
+ tmploc.tags = tmpnoloc.tags
+ self.results = tmploc
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+
+def report(verbose):
+ if verbose:
+ traceback.print_exc()
+
+class AmazonError(Exception):
+ pass
+
+class ThreadwithResults(Thread):
+ def __init__(self, func, *args, **kargs):
+ self.func = func
+ self.args = args
+ self.kargs = kargs
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ self.result = self.func(*self.args, **self.kargs)
+
+
+class Query(object):
+
+ BASE_URL_ALL = 'http://www.amazon.com'
+ BASE_URL_FR = 'http://www.amazon.fr'
+ BASE_URL_DE = 'http://www.amazon.de'
+
+ def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
+ max_results=20, rlang='all'):
+ assert not(title is None and author is None and publisher is None \
+ and isbn is None and keywords is None)
+ assert (max_results < 21)
+
+ self.max_results = int(max_results)
+ self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
+
+ q = { 'search-alias' : 'stripbooks' ,
+ 'unfiltered' : '1',
+ 'field-keywords' : '',
+ 'field-author' : '',
+ 'field-title' : '',
+ 'field-isbn' : '',
+ 'field-publisher' : ''
+ #get to amazon detailed search page to get all options
+ # 'node' : '',
+ # 'field-binding' : '',
+ #before, during, after
+ # 'field-dateop' : '',
+ #month as number
+ # 'field-datemod' : '',
+ # 'field-dateyear' : '',
+ #french only
+ # 'field-collection' : '',
+ #many options available
+ }
+
+ if rlang =='all' or rlang =='en':
+ q['sort'] = 'relevanceexprank'
+ self.urldata = self.BASE_URL_ALL
+ # elif rlang =='es':
+ # q['sort'] = 'relevanceexprank'
+ # q['field-language'] = 'Spanish'
+ # self.urldata = self.BASE_URL_ALL
+ # elif rlang =='en':
+ # q['sort'] = 'relevanceexprank'
+ # q['field-language'] = 'English'
+ # self.urldata = self.BASE_URL_ALL
+ elif rlang =='fr':
+ q['sort'] = 'relevancerank'
+ self.urldata = self.BASE_URL_FR
+ elif rlang =='de':
+ q['sort'] = 'relevancerank'
+ self.urldata = self.BASE_URL_DE
+ self.baseurl = self.urldata
+
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
+
+ if isbn is not None:
+ q['field-isbn'] = isbn.replace('-', '')
+ else:
+ if title is not None:
+ q['field-title'] = title
+ if author is not None:
+ q['field-author'] = author
+ if publisher is not None:
+ q['field-publisher'] = publisher
+ if keywords is not None:
+ q['field-keywords'] = keywords
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
+
+ def __call__(self, browser, verbose, timeout = 5.):
+ if verbose:
+ print _('Query: %s') % self.urldata
+
+ try:
+ raw = browser.open_novisit(self.urldata, timeout=timeout).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None, self.urldata
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise AmazonError(_('Amazon timed out. Try again later.'))
+ raise AmazonError(_('Amazon encountered an error.'))
+ if '404 - ' in raw:
+ return None, self.urldata
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+
+ try:
+ feed = soupparser.fromstring(raw)
except:
- import traceback
- traceback.print_exc()
- asin = None
- else:
- asin = isbn
- return asin
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ return None, self.urldata
-def get_social_metadata(title, authors, publisher, isbn):
- mi = Metadata(title, authors)
- if not isbn:
- return mi
- isbn = check_isbn(isbn)
- if not isbn:
- return mi
- br = browser()
- asin = to_asin(br, isbn)
- if asin and get_metadata(br, asin, mi):
- return mi
- from calibre.ebooks.metadata.xisbn import xisbn
- for i in xisbn.get_associated_isbns(isbn):
- asin = to_asin(br, i)
- if asin and get_metadata(br, asin, mi):
- return mi
- return mi
+ #nb of page
+ try:
+ nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
+ nbresults = [re.sub(r'[.,]', '', x) for x in nbresults]
+ except:
+ return None, self.urldata
-def get_metadata(br, asin, mi):
- q = 'http://amzn.com/'+asin
- try:
- raw = br.open_novisit(q).read()
- except Exception, e:
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return False
- raise
- if '404 - ' in raw:
- return False
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- root = soupparser.fromstring(raw)
- except:
- return False
- ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
- if ratings:
- pat = re.compile(r'([0-9.]+) out of (\d+) stars')
- r = ratings[0]
- for elem in r.xpath('descendant::*[@title]'):
- t = elem.get('title')
- m = pat.match(t)
- if m is not None:
+ pages =[feed]
+ if len(nbresults) > 1:
+ nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
+ for i in xrange(2, nbpagetoquery + 1):
try:
- mi.rating = float(m.group(1))/float(m.group(2)) * 5
- break
+ urldata = self.urldata + '&page=' + str(i)
+ raw = browser.open_novisit(urldata, timeout=timeout).read()
+ except Exception, e:
+ continue
+ if '404 - ' in raw:
+ continue
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
except:
- pass
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ continue
+ pages.append(feed)
- desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
- if desc:
- desc = desc[0]
- for c in desc.xpath('descendant::*[@class="seeAll" or'
- ' @class="emptyClear" or @href]'):
- c.getparent().remove(c)
- desc = html.tostring(desc, method='html', encoding=unicode).strip()
- # remove all attributes from tags
- desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
- # Collapse whitespace
- #desc = re.sub('\n+', '\n', desc)
- #desc = re.sub(' +', ' ', desc)
- # Remove the notice about text referring to out of print editions
- desc = re.sub(r'(?s)--This text ref.*?', '', desc)
- # Remove comments
- desc = re.sub(r'(?s)', '', desc)
- mi.comments = sanitize_comments_html(desc)
+ results = []
+ for x in pages:
+ results.extend([i.getparent().get('href') \
+ for i in x.xpath("//a/span[@class='srTitle']")])
+ return results[:self.max_results], self.baseurl
- return True
+class ResultList(object):
+ def __init__(self, baseurl, lang = 'all'):
+ self.baseurl = baseurl
+ self.lang = lang
+ self.thread = []
+ self.res = []
+ self.nbtag = 0
+ self.repub = re.compile(u'\((.*)\)')
+ self.rerat = re.compile(u'([0-9.]+)')
+ self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
+ self.reoutp = re.compile(r'(?s)--This text ref.*?')
+ self.recom = re.compile(r'(?s)')
+ self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
+ self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
+ self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
+ self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
+ self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
+
+ def strip_tags_etree(self, etreeobj, invalid_tags):
+ for (itag, rmv) in invalid_tags.iteritems():
+ if rmv:
+ for elts in etreeobj.getiterator(itag):
+ elts.drop_tree()
+ else:
+ for elts in etreeobj.getiterator(itag):
+ elts.drop_tag()
+
+ def clean_entry(self, entry, invalid_tags = {'script': True},
+ invalid_id = (), invalid_class=()):
+ #invalid_tags: remove tag and keep content if False else remove
+ #remove tags
+ if invalid_tags:
+ self.strip_tags_etree(entry, invalid_tags)
+ #remove id
+ if invalid_id:
+ for eltid in invalid_id:
+ elt = entry.get_element_by_id(eltid)
+ if elt is not None:
+ elt.drop_tree()
+ #remove class
+ if invalid_class:
+ for eltclass in invalid_class:
+ elts = entry.find_class(eltclass)
+ if elts is not None:
+ for elt in elts:
+ elt.drop_tree()
+
+ def get_title(self, entry):
+ title = entry.get_element_by_id('btAsinTitle')
+ if title is not None:
+ title = title.text
+ return unicode(title.replace('\n', '').strip())
+
+ def get_authors(self, entry):
+ author = entry.get_element_by_id('btAsinTitle')
+ while author.getparent().tag != 'div':
+ author = author.getparent()
+ author = author.getparent()
+ authortext = []
+ for x in author.getiterator('a'):
+ authortext.append(unicode(x.text_content().strip()))
+ return authortext
+
+ def get_description(self, entry, verbose):
+ try:
+ description = entry.get_element_by_id("productDescription").find("div[@class='content']")
+ inv_class = ('seeAll', 'emptyClear')
+ inv_tags ={'img': True, 'a': False}
+ self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
+ description = tostring(description, method='html', encoding=unicode).strip()
+ # remove all attributes from tags
+ description = self.reattr.sub(r'<\1>', description)
+ # Remove the notice about text referring to out of print editions
+ description = self.reoutp.sub('', description)
+ # Remove comments
+ description = self.recom.sub('', description)
+ return unicode(sanitize_comments_html(description))
+ except:
+ report(verbose)
+ return None
+
+ def get_tags(self, entry, verbose):
+ try:
+ tags = entry.get_element_by_id('tagContentHolder')
+ testptag = tags.find_class('see-all')
+ if testptag:
+ for x in testptag:
+ alink = x.xpath('descendant-or-self::a')
+ if alink:
+ if alink[0].get('class') == 'tgJsActive':
+ continue
+ return self.baseurl + alink[0].get('href'), True
+ tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
+ except:
+ report(verbose)
+ tags = [], False
+ return tags, False
+
+ def get_book_info(self, entry, mi, verbose):
+ try:
+ entry = entry.get_element_by_id('SalesRank').getparent()
+ except:
+ try:
+ for z in entry.getiterator('h2'):
+ if self.reprod.search(z.text_content()):
+ entry = z.getparent().find("div[@class='content']/ul")
+ break
+ except:
+ report(verbose)
+ return mi
+ elts = entry.findall('li')
+ #pub & date
+ elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
+ if elt:
+ pub = elt[0].find('b').tail
+ mi.publisher = unicode(self.repub.sub('', pub).strip())
+ d = self.repub.search(pub)
+ if d is not None:
+ d = d.group(1)
+ try:
+ default = utcnow().replace(day=15)
+ if self.lang != 'all':
+ d = replace_months(d, self.lang)
+ d = parse_date(d, assume_utc=True, default=default)
+ mi.pubdate = d
+ except:
+ report(verbose)
+ #ISBN
+ elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
+ if elt:
+ isbn = elt[0].find('b').tail.replace('-', '').strip()
+ if check_isbn(isbn):
+ mi.isbn = unicode(isbn)
+ elif len(elt) > 1:
+ isbnone = elt[1].find('b').tail.replace('-', '').strip()
+ if check_isbn(isbnone):
+ mi.isbn = unicode(isbnone)
+ else:
+ #assume ASIN-> find a check for asin
+ mi.isbn = unicode(isbn)
+ #Langue
+ elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
+ if elt:
+ langue = elt[0].find('b').tail.strip()
+ if langue:
+ mi.language = unicode(langue)
+ #ratings
+ elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
+ if elt:
+ ratings = elt[0].find_class('swSprite')
+ if ratings:
+ ratings = self.rerat.findall(ratings[0].get('title'))
+ if len(ratings) == 2:
+ mi.rating = float(ratings[0])/float(ratings[1]) * 5
+ return mi
+
+ def fill_MI(self, entry, verbose):
+ try:
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ print _('URL who failed: %s') % x
+ report(verbose)
+ return None
+ mi = MetaInformation(title, authors)
+ mi.author_sort = authors_to_sort_string(authors)
+ try:
+ mi.comments = self.get_description(entry, verbose)
+ mi = self.get_book_info(entry, mi, verbose)
+ except:
+ pass
+ return mi
+
+ def get_individual_metadata(self, url, br, verbose):
+ try:
+ raw = br.open_novisit(url).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise AmazonError(_('Amazon timed out. Try again later.'))
+ raise AmazonError(_('Amazon encountered an error.'))
+ if '404 - ' in raw:
+ report(verbose)
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ report(verbose)
+ return None
+
+ def fetchdatathread(self, qbr, qsync, nb, url, verbose):
+ try:
+ browser = qbr.get(True)
+ entry = self.get_individual_metadata(url, browser, verbose)
+ except:
+ report(verbose)
+ entry = None
+ finally:
+ qbr.put(browser, True)
+ qsync.put(nb, True)
+ return entry
+
+ def producer(self, sync, urls, br, verbose=False):
+ for i in xrange(len(urls)):
+ thread = ThreadwithResults(self.fetchdatathread, br, sync,
+ i, urls[i], verbose)
+ thread.start()
+ self.thread.append(thread)
+
+ def consumer(self, sync, syncbis, br, total_entries, verbose=False):
+ i=0
+ while i < total_entries:
+ nb = int(sync.get(True))
+ self.thread[nb].join()
+ entry = self.thread[nb].get_result()
+ i+=1
+ if entry is not None:
+ mi = self.fill_MI(entry, verbose)
+ if mi is not None:
+ mi.tags, atag = self.get_tags(entry, verbose)
+ self.res[nb] = mi
+ if atag:
+ threadbis = ThreadwithResults(self.fetchdatathread,
+ br, syncbis, nb, mi.tags, verbose)
+ self.thread[nb] = threadbis
+ self.nbtag +=1
+ threadbis.start()
+
+ def populate(self, entries, ibr, verbose=False, brcall=3):
+ br = Queue(brcall)
+ cbr = Queue(brcall-1)
+
+ syncp = Queue(1)
+ syncc = Queue(len(entries))
+
+ for i in xrange(brcall-1):
+ br.put(browser(), True)
+ cbr.put(browser(), True)
+ br.put(ibr, True)
+
+ self.res = [None]*len(entries)
+
+ prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
+ cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
+
+ #finish processing
+ for i in xrange(self.nbtag):
+ nb = int(syncc.get(True))
+ tags = self.thread[nb].get_result()
+ if tags is not None:
+ self.res[nb].tags = self.get_tags(tags, verbose)[0]
+ return self.res
+
+
+def search(title=None, author=None, publisher=None, isbn=None,
+ max_results=5, verbose=False, keywords=None, lang='all'):
+ br = browser()
+ entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
+ keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
+
+ if entries is None or len(entries) == 0:
+ return None
+
+ #List of entry
+ ans = ResultList(baseurl, lang)
+ return [x for x in ans.populate(entries, br, verbose) if x is not None]
+
+def get_social_metadata(title, authors, publisher, isbn, verbose=False,
+ max_results=1, lang='all'):
+ mi = MetaInformation(title, authors)
+ if not isbn or not check_isbn(isbn):
+ return [mi]
+
+ amazresults = search(isbn=isbn, verbose=verbose,
+ max_results=max_results, lang=lang)
+ if amazresults is None or amazresults[0] is None:
+ from calibre.ebooks.metadata.xisbn import xisbn
+ for i in xisbn.get_associated_isbns(isbn):
+ amazresults = search(isbn=i, verbose=verbose,
+ max_results=max_results, lang=lang)
+ if amazresults is not None and amazresults[0] is not None:
+ break
+ if amazresults is None or amazresults[0] is None:
+ return [mi]
+
+ miaz = amazresults[0]
+ if miaz.rating is not None:
+ mi.rating = miaz.rating
+ if miaz.comments is not None:
+ mi.comments = miaz.comments
+ if miaz.tags is not None:
+ mi.tags = miaz.tags
+ return [mi]
+
+def option_parser():
+ parser = OptionParser(textwrap.dedent(\
+ _('''\
+ %prog [options]
+
+ Fetch book metadata from Amazon. You must specify one of title, author,
+ ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
+ so you should make your query as specific as possible.
+ You can chose the language for metadata retrieval:
+ english & french & german
+ '''
+ )))
+ parser.add_option('-t', '--title', help=_('Book title'))
+ parser.add_option('-a', '--author', help=_('Book author(s)'))
+ parser.add_option('-p', '--publisher', help=_('Book publisher'))
+ parser.add_option('-i', '--isbn', help=_('Book ISBN'))
+ parser.add_option('-k', '--keywords', help=_('Keywords'))
+ parser.add_option('-s', '--social', default=0, action='count',
+ help=_('Get social data only'))
+ parser.add_option('-m', '--max-results', default=10,
+ help=_('Maximum number of results to fetch'))
+ parser.add_option('-l', '--lang', default='all',
+ help=_('Chosen language for metadata search (en, fr, de)'))
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help=_('Be more verbose about errors'))
+ return parser
def main(args=sys.argv):
- # Test xisbn
- print get_social_metadata('Learning Python', None, None, '8324616489')
- print
-
- # Test sophisticated comment formatting
- print get_social_metadata('Angels & Demons', None, None, '9781416580829')
- print
-
- # Random tests
- print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
- print
- print get_social_metadata('The Great Gatsby', None, None, '0743273567')
-
- return 0
+ parser = option_parser()
+ opts, args = parser.parse_args(args)
+ try:
+ if opts.social:
+ results = get_social_metadata(opts.title, opts.author,
+ opts.publisher, opts.isbn, verbose=opts.verbose, lang=opts.lang)
+ else:
+ results = search(opts.title, opts.author, isbn=opts.isbn,
+ publisher=opts.publisher, keywords=opts.keywords, verbose=opts.verbose,
+ max_results=opts.max_results, lang=opts.lang)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ if results is None and len(results) == 0:
+ print _('No result found for this search!')
+ return 0
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ print
+
+ #test social
+ # '''Test xisbn'''
+ # print get_social_metadata('Learning Python', None, None, '8324616489')[0]
+ # print
+ # '''Test sophisticated comment formatting'''
+ # print get_social_metadata('Angels & Demons', None, None, '9781416580829')[0]
+ # print
+ # '''Random tests'''
+ # print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')[0]
+ # print
+ # print get_social_metadata('The Great Gatsby', None, None, '0743273567')[0]
if __name__ == '__main__':
sys.exit(main())
+ # import cProfile
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
+
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
\ No newline at end of file
diff --git a/src/calibre/ebooks/metadata/amazonbis.py b/src/calibre/ebooks/metadata/amazonbis.py
deleted file mode 100644
index dd973ba3d8..0000000000
--- a/src/calibre/ebooks/metadata/amazonbis.py
+++ /dev/null
@@ -1,653 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2010, sengian '
-
-import sys, textwrap, re, traceback, socket
-from threading import Thread
-from Queue import Queue
-from urllib import urlencode
-from math import ceil
-
-from lxml.html import soupparser, tostring
-
-from calibre.utils.date import parse_date, utcnow, replace_months
-from calibre.utils.cleantext import clean_ascii_chars
-from calibre.utils.localization import get_lang
-from calibre import browser, preferred_encoding
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata import MetaInformation, check_isbn, \
- authors_to_sort_string
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.utils.config import OptionParser
-from calibre.library.comments import sanitize_comments_html
-
-
-class Amazon(MetadataSource):
-
- name = 'Amazon'
- description = _('Downloads metadata from amazon.com')
- supported_platforms = ['windows', 'osx', 'linux']
- author = 'Kovid Goyal & Sengian'
- version = (1, 0, 0)
- has_html_comments = True
-
- def fetch(self):
- try:
- lang = get_lang()
- lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
- if lang == 'all':
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=5, verbose=self.verbose, lang='all')
- else:
- tmploc = ThreadwithResults(search, self.title, self.book_author,
- self.publisher,self.isbn, max_results=5,
- verbose=self.verbose, lang=lang)
- tmpnoloc = ThreadwithResults(search, self.title, self.book_author,
- self.publisher, self.isbn, max_results=5,
- verbose=self.verbose, lang='all')
- tmploc.start()
- tmpnoloc.start()
- tmploc.join()
- tmpnoloc.join()
- tmploc= tmploc.get_result()
- tmpnoloc= tmpnoloc.get_result()
-
- tempres = None
- if tmpnoloc is not None:
- tempres = tmpnoloc
- if tmploc is not None:
- tempres = tmploc
- if tmpnoloc is not None:
- tempres.extend(tmpnoloc)
- self.results = tmpres
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
-
-class AmazonSocial(MetadataSource):
-
- name = 'AmazonSocial'
- metadata_type = 'social'
- description = _('Downloads social metadata from amazon.com')
- supported_platforms = ['windows', 'osx', 'linux']
- author = 'Kovid Goyal & Sengian'
- version = (1, 0, 1)
- has_html_comments = True
-
- def fetch(self):
- if not self.isbn:
- return
- try:
- lang = get_lang()
- lang = lang[:2] if re.match(r'(fr.*|de.*)', lang) else 'all'
- if lang == 'all':
- self.results = get_social_metadata(self.title, self.book_author, self.publisher,
- self.isbn, verbose=self.verbose, lang='all')[0]
- else:
- tmploc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
- self.publisher,self.isbn, verbose=self.verbose, lang=lang)
- tmpnoloc = ThreadwithResults(get_social_metadata, self.title, self.book_author,
- self.publisher, self.isbn, verbose=self.verbose, lang='all')
- tmploc.start()
- tmpnoloc.start()
- tmploc.join()
- tmpnoloc.join()
- tmploc= tmploc.get_result()
- if tmploc is not None:
- tmploc = tmploc[0]
- tmpnoloc= tmpnoloc.get_result()
- if tmpnoloc is not None:
- tmpnoloc = tmpnoloc[0]
- if tmpnoloc is not None:
- if tmploc.rating is None:
- tmploc.rating = tmpnoloc.rating
- if tmploc.comments is not None:
- tmploc.comments = tmpnoloc.comments
- if tmploc.tags is None:
- tmploc.tags = tmpnoloc.tags
- self.results = tmploc
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
-
-
-def report(verbose):
- if verbose:
- traceback.print_exc()
-
-class AmazonError(Exception):
- pass
-
-class ThreadwithResults(Thread):
- def __init__(self, func, *args, **kargs):
- self.func = func
- self.args = args
- self.kargs = kargs
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- self.result = self.func(*self.args, **self.kargs)
-
-
-class Query(object):
-
- BASE_URL_ALL = 'http://www.amazon.com'
- BASE_URL_FR = 'http://www.amazon.fr'
- BASE_URL_DE = 'http://www.amazon.de'
-
- def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
- max_results=20, rlang='all'):
- assert not(title is None and author is None and publisher is None \
- and isbn is None and keywords is None)
- assert (max_results < 21)
-
- self.max_results = int(max_results)
- self.renbres = re.compile(u'\s*([0-9.,]+)\s*')
-
- q = { 'search-alias' : 'stripbooks' ,
- 'unfiltered' : '1',
- 'field-keywords' : '',
- 'field-author' : '',
- 'field-title' : '',
- 'field-isbn' : '',
- 'field-publisher' : ''
- #get to amazon detailed search page to get all options
- # 'node' : '',
- # 'field-binding' : '',
- #before, during, after
- # 'field-dateop' : '',
- #month as number
- # 'field-datemod' : '',
- # 'field-dateyear' : '',
- #french only
- # 'field-collection' : '',
- #many options available
- }
-
- if rlang =='all' or rlang =='en':
- q['sort'] = 'relevanceexprank'
- self.urldata = self.BASE_URL_ALL
- # elif rlang =='es':
- # q['sort'] = 'relevanceexprank'
- # q['field-language'] = 'Spanish'
- # self.urldata = self.BASE_URL_ALL
- # elif rlang =='en':
- # q['sort'] = 'relevanceexprank'
- # q['field-language'] = 'English'
- # self.urldata = self.BASE_URL_ALL
- elif rlang =='fr':
- q['sort'] = 'relevancerank'
- self.urldata = self.BASE_URL_FR
- elif rlang =='de':
- q['sort'] = 'relevancerank'
- self.urldata = self.BASE_URL_DE
- self.baseurl = self.urldata
-
- if title == _('Unknown'):
- title=None
- if author == _('Unknown'):
- author=None
-
- if isbn is not None:
- q['field-isbn'] = isbn.replace('-', '')
- else:
- if title is not None:
- q['field-title'] = title
- if author is not None:
- q['field-author'] = author
- if publisher is not None:
- q['field-publisher'] = publisher
- if keywords is not None:
- q['field-keywords'] = keywords
-
- if isinstance(q, unicode):
- q = q.encode('utf-8')
- self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
-
- def __call__(self, browser, verbose, timeout = 5.):
- if verbose:
- print _('Query: %s') % self.urldata
-
- try:
- raw = browser.open_novisit(self.urldata, timeout=timeout).read()
- except Exception, e:
- report(verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return None, self.urldata
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise AmazonError(_('Amazon timed out. Try again later.'))
- raise AmazonError(_('Amazon encountered an error.'))
- if '404 - ' in raw:
- return None, self.urldata
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
-
- try:
- feed = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- return None, self.urldata
-
- #nb of page
- try:
- nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
- nbresults = [re.sub(r'[.,]', '', x) for x in nbresults]
- except:
- return None, self.urldata
-
- pages =[feed]
- if len(nbresults) > 1:
- nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
- for i in xrange(2, nbpagetoquery + 1):
- try:
- urldata = self.urldata + '&page=' + str(i)
- raw = browser.open_novisit(urldata, timeout=timeout).read()
- except Exception, e:
- continue
- if '404 - ' in raw:
- continue
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- feed = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- continue
- pages.append(feed)
-
- results = []
- for x in pages:
- results.extend([i.getparent().get('href') \
- for i in x.xpath("//a/span[@class='srTitle']")])
- return results[:self.max_results], self.baseurl
-
-class ResultList(object):
-
- def __init__(self, baseurl, lang = 'all'):
- self.baseurl = baseurl
- self.lang = lang
- self.thread = []
- self.res = []
- self.nbtag = 0
- self.repub = re.compile(u'\((.*)\)')
- self.rerat = re.compile(u'([0-9.]+)')
- self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
- self.reoutp = re.compile(r'(?s)--This text ref.*?')
- self.recom = re.compile(r'(?s)')
- self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
- self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
- self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
- self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
- self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
-
- def strip_tags_etree(self, etreeobj, invalid_tags):
- for (itag, rmv) in invalid_tags.iteritems():
- if rmv:
- for elts in etreeobj.getiterator(itag):
- elts.drop_tree()
- else:
- for elts in etreeobj.getiterator(itag):
- elts.drop_tag()
-
- def clean_entry(self, entry, invalid_tags = {'script': True},
- invalid_id = (), invalid_class=()):
- #invalid_tags: remove tag and keep content if False else remove
- #remove tags
- if invalid_tags:
- self.strip_tags_etree(entry, invalid_tags)
- #remove id
- if invalid_id:
- for eltid in invalid_id:
- elt = entry.get_element_by_id(eltid)
- if elt is not None:
- elt.drop_tree()
- #remove class
- if invalid_class:
- for eltclass in invalid_class:
- elts = entry.find_class(eltclass)
- if elts is not None:
- for elt in elts:
- elt.drop_tree()
-
- def get_title(self, entry):
- title = entry.get_element_by_id('btAsinTitle')
- if title is not None:
- title = title.text
- return unicode(title.replace('\n', '').strip())
-
- def get_authors(self, entry):
- author = entry.get_element_by_id('btAsinTitle')
- while author.getparent().tag != 'div':
- author = author.getparent()
- author = author.getparent()
- authortext = []
- for x in author.getiterator('a'):
- authortext.append(unicode(x.text_content().strip()))
- return authortext
-
- def get_description(self, entry, verbose):
- try:
- description = entry.get_element_by_id("productDescription").find("div[@class='content']")
- inv_class = ('seeAll', 'emptyClear')
- inv_tags ={'img': True, 'a': False}
- self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
- description = tostring(description, method='html', encoding=unicode).strip()
- # remove all attributes from tags
- description = self.reattr.sub(r'<\1>', description)
- # Remove the notice about text referring to out of print editions
- description = self.reoutp.sub('', description)
- # Remove comments
- description = self.recom.sub('', description)
- return unicode(sanitize_comments_html(description))
- except:
- report(verbose)
- return None
-
- def get_tags(self, entry, verbose):
- try:
- tags = entry.get_element_by_id('tagContentHolder')
- testptag = tags.find_class('see-all')
- if testptag:
- for x in testptag:
- alink = x.xpath('descendant-or-self::a')
- if alink:
- if alink[0].get('class') == 'tgJsActive':
- continue
- return self.baseurl + alink[0].get('href'), True
- tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
- except:
- report(verbose)
- tags = [], False
- return tags, False
-
- def get_book_info(self, entry, mi, verbose):
- try:
- entry = entry.get_element_by_id('SalesRank').getparent()
- except:
- try:
- for z in entry.getiterator('h2'):
- if self.reprod.search(z.text_content()):
- entry = z.getparent().find("div[@class='content']/ul")
- break
- except:
- report(verbose)
- return mi
- elts = entry.findall('li')
- #pub & date
- elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
- if elt:
- pub = elt[0].find('b').tail
- mi.publisher = unicode(self.repub.sub('', pub).strip())
- d = self.repub.search(pub)
- if d is not None:
- d = d.group(1)
- try:
- default = utcnow().replace(day=15)
- if self.lang != 'all':
- d = replace_months(d, self.lang)
- d = parse_date(d, assume_utc=True, default=default)
- mi.pubdate = d
- except:
- report(verbose)
- #ISBN
- elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
- if elt:
- isbn = elt[0].find('b').tail.replace('-', '').strip()
- if check_isbn(isbn):
- mi.isbn = unicode(isbn)
- elif len(elt) > 1:
- isbnone = elt[1].find('b').tail.replace('-', '').strip()
- if check_isbn(isbnone):
- mi.isbn = unicode(isbnone)
- else:
- #assume ASIN-> find a check for asin
- mi.isbn = unicode(isbn)
- #Langue
- elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
- if elt:
- langue = elt[0].find('b').tail.strip()
- if langue:
- mi.language = unicode(langue)
- #ratings
- elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
- if elt:
- ratings = elt[0].find_class('swSprite')
- if ratings:
- ratings = self.rerat.findall(ratings[0].get('title'))
- if len(ratings) == 2:
- mi.rating = float(ratings[0])/float(ratings[1]) * 5
- return mi
-
- def fill_MI(self, entry, verbose):
- try:
- title = self.get_title(entry)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print _('Failed to get all details for an entry')
- print e
- print _('URL who failed: %s') % x
- report(verbose)
- return None
- mi = MetaInformation(title, authors)
- mi.author_sort = authors_to_sort_string(authors)
- try:
- mi.comments = self.get_description(entry, verbose)
- mi = self.get_book_info(entry, mi, verbose)
- except:
- pass
- return mi
-
- def get_individual_metadata(self, url, br, verbose):
- try:
- raw = br.open_novisit(url).read()
- except Exception, e:
- report(verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise AmazonError(_('Amazon timed out. Try again later.'))
- raise AmazonError(_('Amazon encountered an error.'))
- if '404 - ' in raw:
- report(verbose)
- return None
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- return soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- report(verbose)
- return None
-
- def fetchdatathread(self, qbr, qsync, nb, url, verbose):
- try:
- browser = qbr.get(True)
- entry = self.get_individual_metadata(url, browser, verbose)
- except:
- report(verbose)
- entry = None
- finally:
- qbr.put(browser, True)
- qsync.put(nb, True)
- return entry
-
- def producer(self, sync, urls, br, verbose=False):
- for i in xrange(len(urls)):
- thread = ThreadwithResults(self.fetchdatathread, br, sync,
- i, urls[i], verbose)
- thread.start()
- self.thread.append(thread)
-
- def consumer(self, sync, syncbis, br, total_entries, verbose=False):
- i=0
- while i < total_entries:
- nb = int(sync.get(True))
- self.thread[nb].join()
- entry = self.thread[nb].get_result()
- i+=1
- if entry is not None:
- mi = self.fill_MI(entry, verbose)
- if mi is not None:
- mi.tags, atag = self.get_tags(entry, verbose)
- self.res[nb] = mi
- if atag:
- threadbis = ThreadwithResults(self.fetchdatathread,
- br, syncbis, nb, mi.tags, verbose)
- self.thread[nb] = threadbis
- self.nbtag +=1
- threadbis.start()
-
- def populate(self, entries, ibr, verbose=False, brcall=3):
- br = Queue(brcall)
- cbr = Queue(brcall-1)
-
- syncp = Queue(1)
- syncc = Queue(len(entries))
-
- for i in xrange(brcall-1):
- br.put(browser(), True)
- cbr.put(browser(), True)
- br.put(ibr, True)
-
- self.res = [None]*len(entries)
-
- prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
- cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
- prod_thread.start()
- cons_thread.start()
- prod_thread.join()
- cons_thread.join()
-
- #finish processing
- for i in xrange(self.nbtag):
- nb = int(syncc.get(True))
- tags = self.thread[nb].get_result()
- if tags is not None:
- self.res[nb].tags = self.get_tags(tags, verbose)[0]
- return self.res
-
-
-def search(title=None, author=None, publisher=None, isbn=None,
- max_results=5, verbose=False, keywords=None, lang='all'):
- br = browser()
- entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
- keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
-
- if entries is None or len(entries) == 0:
- return None
-
- #List of entry
- ans = ResultList(baseurl, lang)
- return [x for x in ans.populate(entries, br, verbose) if x is not None]
-
-def get_social_metadata(title, authors, publisher, isbn, verbose=False,
- max_results=1, lang='all'):
- mi = MetaInformation(title, authors)
- if not isbn or not check_isbn(isbn):
- return [mi]
-
- amazresults = search(isbn=isbn, verbose=verbose,
- max_results=max_results, lang=lang)
- if amazresults is None or amazresults[0] is None:
- from calibre.ebooks.metadata.xisbn import xisbn
- for i in xisbn.get_associated_isbns(isbn):
- amazresults = search(isbn=i, verbose=verbose,
- max_results=max_results, lang=lang)
- if amazresults is not None and amazresults[0] is not None:
- break
- if amazresults is None or amazresults[0] is None:
- return [mi]
-
- miaz = amazresults[0]
- if miaz.rating is not None:
- mi.rating = miaz.rating
- if miaz.comments is not None:
- mi.comments = miaz.comments
- if miaz.tags is not None:
- mi.tags = miaz.tags
- return [mi]
-
-def option_parser():
- parser = OptionParser(textwrap.dedent(\
- _('''\
- %prog [options]
-
- Fetch book metadata from Amazon. You must specify one of title, author,
- ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
- so you should make your query as specific as possible.
- You can chose the language for metadata retrieval:
- english & french & german
- '''
- )))
- parser.add_option('-t', '--title', help=_('Book title'))
- parser.add_option('-a', '--author', help=_('Book author(s)'))
- parser.add_option('-p', '--publisher', help=_('Book publisher'))
- parser.add_option('-i', '--isbn', help=_('Book ISBN'))
- parser.add_option('-k', '--keywords', help=_('Keywords'))
- parser.add_option('-s', '--social', default=0, action='count',
- help=_('Get social data only'))
- parser.add_option('-m', '--max-results', default=10,
- help=_('Maximum number of results to fetch'))
- parser.add_option('-l', '--lang', default='all',
- help=_('Chosen language for metadata search (en, fr, de)'))
- parser.add_option('-v', '--verbose', default=0, action='count',
- help=_('Be more verbose about errors'))
- return parser
-
-def main(args=sys.argv):
- parser = option_parser()
- opts, args = parser.parse_args(args)
- try:
- if opts.social:
- results = get_social_metadata(opts.title, opts.author,
- opts.publisher, opts.isbn, verbose=opts.verbose, lang=opts.lang)
- else:
- results = search(opts.title, opts.author, isbn=opts.isbn,
- publisher=opts.publisher, keywords=opts.keywords, verbose=opts.verbose,
- max_results=opts.max_results, lang=opts.lang)
- except AssertionError:
- report(True)
- parser.print_help()
- return 1
- if results is None and len(results) == 0:
- print _('No result found for this search!')
- return 0
- for result in results:
- print unicode(result).encode(preferred_encoding, 'replace')
- print
-
- #test social
- # '''Test xisbn'''
- # print get_social_metadata('Learning Python', None, None, '8324616489')[0]
- # print
- # '''Test sophisticated comment formatting'''
- # print get_social_metadata('Angels & Demons', None, None, '9781416580829')[0]
- # print
- # '''Random tests'''
- # print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')[0]
- # print
- # print get_social_metadata('The Great Gatsby', None, None, '0743273567')[0]
-
-if __name__ == '__main__':
- sys.exit(main())
- # import cProfile
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
-
-# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From a54cbc1a91ea517d3457857d23691bd5d971c8f4 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 13 Dec 2010 00:50:53 +0100
Subject: [PATCH 062/163] First draft of google books refactoring & adding
threading
---
src/calibre/ebooks/metadata/google_books.py | 243 +++++++++++++++-----
1 file changed, 190 insertions(+), 53 deletions(-)
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 2087b7c489..12d92ca5ae 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -3,7 +3,9 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import sys, textwrap
+import sys, textwrap, traceback, socket
+from threading import Thread
+from Queue import Queue
from urllib import urlencode
from functools import partial
@@ -11,8 +13,10 @@ from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
+from calibre.utils.cleantext import clean_ascii_chars
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
@@ -35,9 +39,25 @@ subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
+class GoogleBooksError(Exception):
+ pass
+
+class ThreadwithResults(Thread):
+ def __init__(self, func, *args, **kargs):
+ self.func = func
+ self.args = args
+ self.kargs = kargs
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ self.result = self.func(*self.args, **self.kargs)
+
def report(verbose):
if verbose:
- import traceback
traceback.print_exc()
@@ -46,48 +66,93 @@ class Query(object):
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
def __init__(self, title=None, author=None, publisher=None, isbn=None,
- max_results=20, min_viewability='none', start_index=1):
+ max_results=40, min_viewability='none', start_index=1):
assert not(title is None and author is None and publisher is None and \
isbn is None)
- assert (max_results < 21)
+ assert (max_results < 41)
assert (min_viewability in ('none', 'partial', 'full'))
- q = ''
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
+ self.sindex = str(start_index)
+ self.maxresults = int(max_results)
+
+ q = []
if isbn is not None:
- q += 'isbn:'+isbn
+ q.append(('isbn:%s') % (isbn,))
else:
def build_term(prefix, parts):
- return ' '.join('in'+prefix + ':' + x for x in parts)
+ return ' '.join(('in%s:%s') % (prefix, x) for x in parts)
if title is not None:
- q += build_term('title', title.split())
+ q.append(build_term('title', title.split()))
if author is not None:
- q += ('+' if q else '')+build_term('author', author.split())
+ q.append(build_term('author', author.split()))
if publisher is not None:
- q += ('+' if q else '')+build_term('publisher', publisher.split())
-
+ q.append(build_term('publisher', publisher.split()))
+ q='+'.join(q)
+
if isinstance(q, unicode):
q = q.encode('utf-8')
- self.url = self.BASE_URL+urlencode({
+ self.urlbase = self.BASE_URL+urlencode({
'q':q,
'max-results':max_results,
- 'start-index':start_index,
'min-viewability':min_viewability,
- })
+ })+'&start-index='
- def __call__(self, browser, verbose):
+ def brcall(self, browser, url, verbose, timeout):
if verbose:
- print 'Query:', self.url
- feed = etree.fromstring(browser.open(self.url).read())
- #print etree.tostring(feed, pretty_print=True)
+ print _('Query: %s') % url
+
+ try:
+ raw = browser.open_novisit(url, timeout=timeout).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
+ raise GoogleBooksError(_('GoogleBooks timed out. Try again later.'))
+ raise GoogleBooksError(_('GoogleBooks encountered an error.'))
+ if '404 - ' in raw:
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return etree.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars (normally not needed)
+ return etree.fromstring(clean_ascii_chars(raw))
+ except:
+ return None
+
+ def __call__(self, browser, verbose, timeout = 5.):
+ #get a feed
+ url = self.urlbase+self.sindex
+ feed = self.brcall(browser, url, verbose, timeout)
+ if feed is None:
+ return None
+
+ # print etree.tostring(feed, pretty_print=True)
total = int(total_results(feed)[0].text)
+ nbresultstoget = total if total total:
- new_start = 0
- return entries, new_start
-
+ while len(entries) < nbresultstoget:
+ url = self.urlbase+str(start+len(entries))
+ feed = self.brcall(browser, url, verbose, timeout)
+ if feed is None:
+ break
+ entries.extend(entry(feed))
+ return entries
class ResultList(list):
+ def __init__(self):
+ self.thread = []
def get_description(self, entry, verbose):
try:
@@ -164,44 +229,114 @@ class ResultList(list):
d = None
return d
- def populate(self, entries, browser, verbose=False):
- for x in entries:
+ def fill_MI(self, entry, data, verbose):
+ x = entry
+ try:
+ title = self.get_title(entry)
+ x = entry(data)[0]
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ authors = self.get_authors(x)
+ mi = MetaInformation(title, authors)
+ mi.author_sort = self.get_author_sort(x, verbose)
+ mi.comments = self.get_description(x, verbose)
+ self.get_identifiers(x, mi)
+ mi.tags = self.get_tags(x, verbose)
+ mi.publisher = self.get_publisher(x, verbose)
+ mi.pubdate = self.get_date(x, verbose)
+ mi.language = self.get_language(x, verbose)
+ return mi
+
+ def get_individual_metadata(self, url, br, verbose):
+ if url is None:
+ return None
+ try:
+ raw = br.open_novisit(url).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
+ raise GoogleBooksError(_('GoogleBooks timed out. Try again later.'))
+ raise GoogleBooksError(_('GoogleBooks encountered an error.'))
+ if '404 - ' in raw:
+ report(verbose)
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return etree.fromstring(raw)
+ except:
try:
- id_url = entry_id(x)[0].text
- title = self.get_title(x)
+ #remove ASCII invalid chars
+ return etree.fromstring(clean_ascii_chars(raw))
except:
report(verbose)
- mi = MetaInformation(title, self.get_authors(x))
+ return None
+
+ def fetchdatathread(self, qbr, qsync, nb, url, verbose):
+ try:
+ browser = qbr.get(True)
+ entry = self.get_individual_metadata(url, browser, verbose)
+ except:
+ report(verbose)
+ entry = None
+ finally:
+ qbr.put(browser, True)
+ qsync.put(nb, True)
+ return entry
+
+ def producer(self, sync, entries, br, verbose=False):
+ for i in xrange(len(entries)):
try:
- raw = browser.open(id_url).read()
- feed = etree.fromstring(raw)
- x = entry(feed)[0]
- except Exception, e:
- if verbose:
- print 'Failed to get all details for an entry'
- print e
- mi.author_sort = self.get_author_sort(x, verbose)
- mi.comments = self.get_description(x, verbose)
- self.get_identifiers(x, mi)
- mi.tags = self.get_tags(x, verbose)
- mi.publisher = self.get_publisher(x, verbose)
- mi.pubdate = self.get_date(x, verbose)
- mi.language = self.get_language(x, verbose)
- self.append(mi)
+ id_url = entry_id(entries[i])[0].text
+ except:
+ id_url = None
+ report(verbose)
+ thread = ThreadwithResults(self.fetchdatathread, br, sync,
+ i, id_url, verbose)
+ thread.start()
+ self.thread.append(thread)
+
+ def consumer(self, entries, sync, total_entries, verbose=False):
+ res=[None]*total_entries #remove?
+ i=0
+ while i < total_entries:
+ nb = int(sync.get(True))
+ self.thread[nb].join()
+ data = self.thread[nb].get_result()
+ res[nb] = self.fill_MI(entries[nb], data, verbose)
+ i+=1
+ return res
+
+ def populate(self, entries, br, verbose=False, brcall=3):
+ #multiple entries
+ pbr = Queue(brcall)
+ sync = Queue(1)
+ for i in xrange(brcall-1):
+ pbr.put(browser(), True)
+ pbr.put(br, True)
+
+ prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
+ cons_thread = ThreadwithResults(self.consumer, entries, sync, len(entries), verbose)
+ prod_thread.start()
+ cons_thread.start()
+ prod_thread.join()
+ cons_thread.join()
+ self.extend(cons_thread.get_result())
def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=40):
br = browser()
- start, entries = 1, []
- while start > 0 and len(entries) <= max_results:
- new, start = Query(title=title, author=author, publisher=publisher,
- isbn=isbn, min_viewability=min_viewability)(br, verbose)
- if not new:
- break
- entries.extend(new)
-
- entries = entries[:max_results]
+ entries = Query(title=title, author=author, publisher=publisher,
+ isbn=isbn, max_results=max_results,
+ min_viewability=min_viewability)(br, verbose)
ans = ResultList()
ans.populate(entries, br, verbose)
@@ -214,7 +349,7 @@ def option_parser():
Fetch book metadata from Google. You must specify one of title, author,
publisher or ISBN. If you specify ISBN the others are ignored. Will
- fetch a maximum of 100 matches, so you should make your query as
+ fetch a maximum of 20 matches, so you should make your query as
specific as possible.
'''
))
@@ -244,3 +379,5 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
+
+# C:\Users\Pierre>calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\google_books.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From aa7630f392aa05ec97bfe525b644c78417817cc9 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 13 Dec 2010 08:59:20 +0100
Subject: [PATCH 063/163] Finish to add threading to google_books & minor
changes
---
src/calibre/customize/builtins.py | 3 +-
src/calibre/ebooks/metadata/fetch.py | 24 +++---
src/calibre/ebooks/metadata/google_books.py | 92 ++++++++++++---------
3 files changed, 69 insertions(+), 50 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 342d0e8456..9e34d33941 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -480,8 +480,9 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
-from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, LibraryThing
+from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks
+from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index f1bf88da84..d6494de54d 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -172,20 +172,20 @@ class MetadataSource(Plugin): # {{{
# }}}
-class GoogleBooks(MetadataSource): # {{{
+# class GoogleBooks(MetadataSource): # {{{
- name = 'Google Books'
- description = _('Downloads metadata from Google Books')
+ # name = 'Google Books'
+ # description = _('Downloads metadata from Google Books')
- def fetch(self):
- from calibre.ebooks.metadata.google_books import search
- try:
- self.results = search(self.title, self.book_author, self.publisher,
- self.isbn, max_results=10,
- verbose=self.verbose)
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # from calibre.ebooks.metadata.google_books import search
+ # try:
+ # self.results = search(self.title, self.book_author, self.publisher,
+ # self.isbn, max_results=10,
+ # verbose=self.verbose)
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
# }}}
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 12d92ca5ae..1eb5d11441 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -12,7 +12,9 @@ from functools import partial
from lxml import etree
from calibre import browser, preferred_encoding
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, check_isbn, \
+ authors_to_sort_string
+from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
@@ -39,6 +41,22 @@ subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
+
+class GoogleBooks(MetadataSource):
+
+ name = 'Google Books'
+ description = _('Downloads metadata from Google Books')
+ version = (1, 0, 1)
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose)
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+
class GoogleBooksError(Exception):
pass
@@ -158,7 +176,7 @@ class ResultList(list):
try:
desc = description(entry)
if desc:
- return 'SUMMARY:\n'+desc[0].text
+ return _('SUMMARY:\n %s') % desc[0].text
except:
report(verbose)
@@ -171,29 +189,27 @@ class ResultList(list):
report(verbose)
def get_title(self, entry):
- candidates = [x.text for x in title(entry)]
- return ': '.join(candidates)
+ return ': '.join([x.text for x in title(entry)])
def get_authors(self, entry):
m = creator(entry)
- if not m:
- m = []
- m = [x.text for x in m]
- return m
+ return [x.text for x in m] if m else []
def get_author_sort(self, entry, verbose):
for x in creator(entry):
- for key, val in x.attrib.items():
+ for key, val in x.attrib.iteritems():
if key.endswith('file-as'):
return val
def get_identifiers(self, entry, mi):
- isbns = []
- for x in identifier(entry):
- t = str(x.text).strip()
- if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
- if t[:5].upper() == 'ISBN:':
- isbns.append(t[5:])
+ isbns = [str(x.text).strip() for x in identifier(entry)]
+ isbns = [t[5:] for t in isbns \
+ if t[:5].upper() == 'ISBN:' and check_isbn(t[5:])]
+ # for x in identifier(entry):
+ # t = str(x.text).strip()
+ # if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
+ # if t[:5].upper() == 'ISBN:':
+ # isbns.append(t[5:])
if isbns:
mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
@@ -211,28 +227,26 @@ class ResultList(list):
def get_publisher(self, entry, verbose):
try:
- pub = publisher(entry)[0].text
+ return publisher(entry)[0].text
except:
- pub = None
- return pub
+ return None
def get_date(self, entry, verbose):
try:
d = date(entry)
if d:
default = utcnow().replace(day=15)
- d = parse_date(d[0].text, assume_utc=True, default=default)
+ return parse_date(d[0].text, assume_utc=True, default=default)
else:
- d = None
+ return None
except:
report(verbose)
- d = None
- return d
+ return None
- def fill_MI(self, entry, data, verbose):
- x = entry
+ def fill_MI(self, ent, data, verbose):
+ x = ent
try:
- title = self.get_title(entry)
+ title = self.get_title(x)
x = entry(data)[0]
except Exception, e:
if verbose:
@@ -240,7 +254,9 @@ class ResultList(list):
print e
authors = self.get_authors(x)
mi = MetaInformation(title, authors)
- mi.author_sort = self.get_author_sort(x, verbose)
+ tmpautsort = self.get_author_sort(x, verbose)
+ mi.author_sort = tmpautsort if tmpautsort \
+ else authors_to_sort_string(authors)
mi.comments = self.get_description(x, verbose)
self.get_identifiers(x, mi)
mi.tags = self.get_tags(x, verbose)
@@ -315,7 +331,6 @@ class ResultList(list):
return res
def populate(self, entries, br, verbose=False, brcall=3):
- #multiple entries
pbr = Queue(brcall)
sync = Queue(1)
for i in xrange(brcall-1):
@@ -344,23 +359,23 @@ def search(title=None, author=None, publisher=None, isbn=None,
def option_parser():
parser = OptionParser(textwrap.dedent(
- '''\
+ _('''\
%prog [options]
Fetch book metadata from Google. You must specify one of title, author,
publisher or ISBN. If you specify ISBN the others are ignored. Will
- fetch a maximum of 20 matches, so you should make your query as
+ fetch a maximum of 40 matches, so you should make your query as
specific as possible.
'''
- ))
- parser.add_option('-t', '--title', help='Book title')
- parser.add_option('-a', '--author', help='Book author(s)')
- parser.add_option('-p', '--publisher', help='Book publisher')
- parser.add_option('-i', '--isbn', help='Book ISBN')
+ )))
+ parser.add_option('-t', '--title', help=_('Book title'))
+ parser.add_option('-a', '--author', help=_('Book author(s)'))
+ parser.add_option('-p', '--publisher', help=_('Book publisher'))
+ parser.add_option('-i', '--isbn', help=_('Book ISBN'))
parser.add_option('-m', '--max-results', default=10,
- help='Maximum number of results to fetch')
+ help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
- help='Be more verbose about errors')
+ help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
@@ -373,6 +388,9 @@ def main(args=sys.argv):
report(True)
parser.print_help()
return 1
+ if results is None or len(results) == 0:
+ print _('No result found for this search!')
+ return 0
for result in results:
print unicode(result).encode(preferred_encoding)
print
@@ -380,4 +398,4 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
-# C:\Users\Pierre>calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\google_books.py" -m 5 -a gore -v>data.html
\ No newline at end of file
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\google_books.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From 6ca1bf64efffd353f9e934bf9649b0f3e92b75bd Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 13 Dec 2010 20:15:28 +0100
Subject: [PATCH 064/163] import modifications
---
src/calibre/ebooks/metadata/fetch.py | 38 ---------------------
src/calibre/ebooks/metadata/fictionwise.py | 9 +++--
src/calibre/ebooks/metadata/google_books.py | 13 +++++--
3 files changed, 18 insertions(+), 42 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index d6494de54d..0c607b9bb7 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -172,23 +172,6 @@ class MetadataSource(Plugin): # {{{
# }}}
-# class GoogleBooks(MetadataSource): # {{{
-
- # name = 'Google Books'
- # description = _('Downloads metadata from Google Books')
-
- # def fetch(self):
- # from calibre.ebooks.metadata.google_books import search
- # try:
- # self.results = search(self.title, self.book_author, self.publisher,
- # self.isbn, max_results=10,
- # verbose=self.verbose)
- # except Exception, e:
- # self.exception = e
- # self.tb = traceback.format_exc()
-
- # }}}
-
class ISBNDB(MetadataSource): # {{{
name = 'IsbnDB'
@@ -226,27 +209,6 @@ class ISBNDB(MetadataSource): # {{{
# }}}
-# class Amazon(MetadataSource): # {{{
-
- # name = 'Amazon'
- # metadata_type = 'social'
- # description = _('Downloads social metadata from amazon.com')
-
- # has_html_comments = True
-
- # def fetch(self):
- # if not self.isbn:
- # return
- # from calibre.ebooks.metadata.amazon import get_social_metadata
- # try:
- # self.results = get_social_metadata(self.title, self.book_author,
- # self.publisher, self.isbn)
- # except Exception, e:
- # self.exception = e
- # self.tb = traceback.format_exc()
-
- # }}}
-
class LibraryThing(MetadataSource): # {{{
name = 'LibraryThing'
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 909d186702..3ab960c846 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
-import sys, textwrap, re, traceback, socket
+import sys, re
from threading import Thread
from Queue import Queue
from urllib import urlencode
@@ -32,6 +32,7 @@ class Fictionwise(MetadataSource):
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose)
except Exception, e:
+ import traceback
self.exception = e
self.tb = traceback.format_exc()
@@ -55,6 +56,7 @@ class ThreadwithResults(Thread):
def report(verbose):
if verbose:
+ import traceback
traceback.print_exc()
@@ -108,11 +110,12 @@ class Query(object):
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
- print _('Query: %s') % self.BASE_URL+self.urldata
+ print _('Query: %s POST: %s') % (self.BASE_URL, self.urldata)
try:
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
@@ -321,6 +324,7 @@ class ResultList(list):
try:
raw = br.open_novisit(url).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
@@ -410,6 +414,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
def option_parser():
+ import textwrap
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 1eb5d11441..cac3cac7d0 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import sys, textwrap, traceback, socket
+import sys
from threading import Thread
from Queue import Queue
from urllib import urlencode
@@ -53,6 +53,7 @@ class GoogleBooks(MetadataSource):
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose)
except Exception, e:
+ import traceback
self.exception = e
self.tb = traceback.format_exc()
@@ -76,6 +77,7 @@ class ThreadwithResults(Thread):
def report(verbose):
if verbose:
+ import traceback
traceback.print_exc()
@@ -89,6 +91,7 @@ class Query(object):
isbn is None)
assert (max_results < 41)
assert (min_viewability in ('none', 'partial', 'full'))
+
if title == _('Unknown'):
title=None
if author == _('Unknown'):
@@ -125,6 +128,7 @@ class Query(object):
try:
raw = browser.open_novisit(url, timeout=timeout).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
@@ -271,6 +275,7 @@ class ResultList(list):
try:
raw = br.open_novisit(url).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
@@ -352,12 +357,16 @@ def search(title=None, author=None, publisher=None, isbn=None,
entries = Query(title=title, author=author, publisher=publisher,
isbn=isbn, max_results=max_results,
min_viewability=min_viewability)(br, verbose)
+
+ if entries is None or len(entries) == 0:
+ return None
ans = ResultList()
ans.populate(entries, br, verbose)
return ans
def option_parser():
+ import textwrap
parser = OptionParser(textwrap.dedent(
_('''\
%prog [options]
@@ -392,7 +401,7 @@ def main(args=sys.argv):
print _('No result found for this search!')
return 0
for result in results:
- print unicode(result).encode(preferred_encoding)
+ print unicode(result).encode(preferred_encoding, 'replace')
print
if __name__ == '__main__':
From d374b36e97559efc886a3e4733f54431b284be23 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 13 Dec 2010 21:14:38 +0100
Subject: [PATCH 065/163] ...
---
src/calibre/ebooks/metadata/fictionwise.py | 11 ++++-
src/calibre/ebooks/metadata/nicebooks.py | 57 ++++++++++++----------
2 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 3ab960c846..9eabcb2ca8 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -120,7 +120,9 @@ class Query(object):
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '404 - ' in raw:
@@ -329,7 +331,9 @@ class ResultList(list):
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '404 - ' in raw:
@@ -407,6 +411,9 @@ def search(title=None, author=None, publisher=None, isbn=None,
entries, islink = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
+ if entries is None or len(entries) == 0:
+ return None
+
#List of entry
ans = ResultList(islink)
ans.populate(entries, br, verbose)
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 6cb7c9a6ae..cacb511563 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian '
__docformat__ = 'restructuredtext en'
-import sys, textwrap, re, traceback, socket
+import sys, re
from threading import Thread
from Queue import Queue
from urllib import urlencode
@@ -35,6 +35,7 @@ class NiceBooks(MetadataSource):
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose)
except Exception, e:
+ import traceback
self.exception = e
self.tb = traceback.format_exc()
@@ -70,6 +71,7 @@ class NiceBooksCovers(CoverDownload):
ext = 'jpg'
result_queue.put((True, cover_data, ext, self.name))
except Exception, e:
+ import traceback
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
@@ -96,6 +98,7 @@ class ThreadwithResults(Thread):
def report(verbose):
if verbose:
+ import traceback
traceback.print_exc()
@@ -124,18 +127,21 @@ class Query(object):
q = q.encode('utf-8')
self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
- def __call__(self, browser, verbose, timeout = 5.):
+ def brcall(self, browser, url, verbose, timeout):
if verbose:
- print _('Query: %s') % self.BASE_URL+self.urldata
-
+ print _('Query: %s') % url
+
try:
- raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
+ raw = browser.open_novisit(url, timeout=timeout).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
if '404 - ' in raw:
@@ -143,14 +149,19 @@ class Query(object):
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
- feed = soupparser.fromstring(raw)
+ return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
- feed = soupparser.fromstring(clean_ascii_chars(raw))
+ return soupparser.fromstring(clean_ascii_chars(raw))
except:
return None
+ def __call__(self, browser, verbose, timeout = 5.):
+ feed = self.brcall(browser, self.BASE_URL+self.urldata, verbose, timeout)
+ if feed is None:
+ return None
+
#nb of page to call
try:
nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
@@ -162,23 +173,10 @@ class Query(object):
pages =[feed]
if nbpagetoquery > 1:
for i in xrange(2, nbpagetoquery + 1):
- try:
- urldata = self.urldata + '&p=' + str(i)
- raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
- except Exception, e:
+ urldata = self.urldata + '&p=' + str(i)
+ feed = self.brcall(browser, self.BASE_URL+urldata, verbose, timeout)
+ if feed is None:
continue
- if '404 - ' in raw:
- continue
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- feed = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- feed = soupparser.fromstring(clean_ascii_chars(raw))
- except:
- continue
pages.append(feed)
results = []
@@ -270,11 +268,14 @@ class ResultList(list):
try:
raw = br.open_novisit(url).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
raise NiceBooksError(_('NiceBooks timed out. Try again later.'))
raise NiceBooksError(_('NiceBooks encountered an error.'))
if '404 - ' in raw:
@@ -372,7 +373,10 @@ class Covers(object):
self.urlimg.rpartition('.')[-1]
return cover, ext if ext else 'jpg'
except Exception, err:
- if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
+ import socket
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
if not len(self.urlimg):
if not self.isbnf:
@@ -407,6 +411,7 @@ def cover_from_isbn(isbn, timeout = 5.):
def option_parser():
+ import textwrap
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
From 81af8382d630175c34157effb2fd104577dba2e0 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 13 Dec 2010 23:24:12 +0100
Subject: [PATCH 066/163] cleaning
---
src/calibre/ebooks/metadata/amazon.py | 65 +++++++++++----------
src/calibre/ebooks/metadata/fictionwise.py | 5 +-
src/calibre/ebooks/metadata/google_books.py | 6 +-
3 files changed, 39 insertions(+), 37 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index 1362349685..aec4fb313a 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -2,7 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, sengian '
-import sys, textwrap, re, traceback, socket
+import sys, re
from threading import Thread
from Queue import Queue
from urllib import urlencode
@@ -61,6 +61,7 @@ class Amazon(MetadataSource):
tempres.extend(tmpnoloc)
self.results = tempres
except Exception, e:
+ import traceback
self.exception = e
self.tb = traceback.format_exc()
@@ -107,12 +108,14 @@ class AmazonSocial(MetadataSource):
tmploc.tags = tmpnoloc.tags
self.results = tmploc
except Exception, e:
+ import traceback
self.exception = e
self.tb = traceback.format_exc()
def report(verbose):
if verbose:
+ import traceback
traceback.print_exc()
class AmazonError(Exception):
@@ -208,33 +211,40 @@ class Query(object):
q = q.encode('utf-8')
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
- def __call__(self, browser, verbose, timeout = 5.):
+ def brcall(self, browser, url, verbose, timeout):
if verbose:
- print _('Query: %s') % self.urldata
-
+ print _('Query: %s') % url
+
try:
- raw = browser.open_novisit(self.urldata, timeout=timeout).read()
+ raw = browser.open_novisit(url, timeout=timeout).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
- return None, self.urldata
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
- raise AmazonError(_('Amazon timed out. Try again later.'))
- raise AmazonError(_('Amazon encountered an error.'))
+ return None
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
+ raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
+ raise NiceBooksError(_('Nicebooks encountered an error.'))
if '404 - ' in raw:
- return None, self.urldata
+ return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
-
try:
- feed = soupparser.fromstring(raw)
+ return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
- return None, self.urldata
+ return None
+
+ def __call__(self, browser, verbose, timeout = 5.):
+ feed = self.brcall(browser, self.urldata, verbose, timeout)
+ if feed is None:
+ return None, self.urldata
#nb of page
try:
@@ -247,23 +257,10 @@ class Query(object):
if len(nbresults) > 1:
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
for i in xrange(2, nbpagetoquery + 1):
- try:
- urldata = self.urldata + '&page=' + str(i)
- raw = browser.open_novisit(urldata, timeout=timeout).read()
- except Exception, e:
+ urldata = self.urldata + '&page=' + str(i)
+ feed = self.brcall(browser, urldata, verbose, timeout)
+ if feed is None:
continue
- if '404 - ' in raw:
- continue
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- feed = soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- continue
pages.append(feed)
results = []
@@ -453,11 +450,14 @@ class ResultList(object):
try:
raw = br.open_novisit(url).read()
except Exception, e:
+ import socket
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
- if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
raise AmazonError(_('Amazon timed out. Try again later.'))
raise AmazonError(_('Amazon encountered an error.'))
if '404 - ' in raw:
@@ -584,6 +584,7 @@ def get_social_metadata(title, authors, publisher, isbn, verbose=False,
return [mi]
def option_parser():
+ import textwrap
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
@@ -648,6 +649,6 @@ if __name__ == '__main__':
sys.exit(main())
# import cProfile
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile_tmp_2"))
+ # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))
-# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazonbis.py" -m 5 -a gore -v>data.html
\ No newline at end of file
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
\ No newline at end of file
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 9eabcb2ca8..a50bb2ce04 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -14,11 +14,12 @@ from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
-from calibre.library.comments import sanitize_comments_html
from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.library.comments import sanitize_comments_html
from calibre.utils.config import OptionParser
-from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars, unescape
+from calibre.utils.date import parse_date, utcnow
+
class Fictionwise(MetadataSource):
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index cac3cac7d0..765bb4a255 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -1,6 +1,6 @@
from __future__ import with_statement
__license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal '
+__copyright__ = '2009, Kovid Goyal , 2010, sengian '
__docformat__ = 'restructuredtext en'
import sys
@@ -12,13 +12,13 @@ from functools import partial
from lxml import etree
from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
-from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.date import parse_date, utcnow
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
From 99921673d62dd26305a47fed9f35c332aee3a1aa Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 14 Dec 2010 00:34:25 +0100
Subject: [PATCH 067/163] Threading optimisation (last I hope), now faster than
light at least pratchett's for amazon
---
src/calibre/ebooks/metadata/amazon.py | 74 +++++++++------------
src/calibre/ebooks/metadata/fictionwise.py | 36 ++--------
src/calibre/ebooks/metadata/google_books.py | 40 +++--------
src/calibre/ebooks/metadata/nicebooks.py | 36 ++--------
4 files changed, 53 insertions(+), 133 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index aec4fb313a..6eb106c862 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -121,20 +121,6 @@ def report(verbose):
class AmazonError(Exception):
pass
-class ThreadwithResults(Thread):
- def __init__(self, func, *args, **kargs):
- self.func = func
- self.args = args
- self.kargs = kargs
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- self.result = self.func(*self.args, **self.kargs)
-
class Query(object):
@@ -269,14 +255,11 @@ class Query(object):
for i in x.xpath("//a/span[@class='srTitle']")])
return results[:self.max_results], self.baseurl
-class ResultList(object):
+class ResultList(list):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
self.lang = lang
- self.thread = []
- self.res = []
- self.nbtag = 0
self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
@@ -484,63 +467,65 @@ class ResultList(object):
entry = None
finally:
qbr.put(browser, True)
- qsync.put(nb, True)
- return entry
+ qsync.put((nb, entry), True)
def producer(self, sync, urls, br, verbose=False):
for i in xrange(len(urls)):
- thread = ThreadwithResults(self.fetchdatathread, br, sync,
- i, urls[i], verbose)
+ thread = Thread(target=self.fetchdatathread,
+ args=(br, sync, i, urls[i], verbose))
thread.start()
- self.thread.append(thread)
def consumer(self, sync, syncbis, br, total_entries, verbose=False):
i=0
+ self.extend([None]*total_entries)
while i < total_entries:
- nb = int(sync.get(True))
- self.thread[nb].join()
- entry = self.thread[nb].get_result()
+ rq = sync.get(True)
+ nb = int(rq[0])
+ entry = rq[1]
i+=1
if entry is not None:
mi = self.fill_MI(entry, verbose)
if mi is not None:
mi.tags, atag = self.get_tags(entry, verbose)
- self.res[nb] = mi
+ self[nb] = mi
if atag:
- threadbis = ThreadwithResults(self.fetchdatathread,
- br, syncbis, nb, mi.tags, verbose)
- self.thread[nb] = threadbis
- self.nbtag +=1
- threadbis.start()
+ thread = Thread(target=self.fetchdatathread,
+ args=(br, syncbis, nb, mi.tags, verbose))
+ thread.start()
+ else:
+ syncbis.put((nb, None), True)
+
+ def final(self, sync, total_entries, verbose):
+ i=0
+ while i < total_entries:
+ rq = sync.get(True)
+ nb = int(rq[0])
+ tags = rq[1]
+ i+=1
+ if tags is not None:
+ self[nb].tags = self.get_tags(tags, verbose)[0]
def populate(self, entries, ibr, verbose=False, brcall=3):
br = Queue(brcall)
cbr = Queue(brcall-1)
syncp = Queue(1)
- syncc = Queue(len(entries))
+ syncc = Queue(1)
for i in xrange(brcall-1):
br.put(browser(), True)
cbr.put(browser(), True)
br.put(ibr, True)
- self.res = [None]*len(entries)
-
prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
+ fin_thread = Thread(target=self.final, args=(syncc, len(entries), verbose))
prod_thread.start()
cons_thread.start()
+ fin_thread.start()
prod_thread.join()
cons_thread.join()
-
- #finish processing
- for i in xrange(self.nbtag):
- nb = int(syncc.get(True))
- tags = self.thread[nb].get_result()
- if tags is not None:
- self.res[nb].tags = self.get_tags(tags, verbose)[0]
- return self.res
+ fin_thread.join()
def search(title=None, author=None, publisher=None, isbn=None,
@@ -554,7 +539,8 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList(baseurl, lang)
- return [x for x in ans.populate(entries, br, verbose) if x is not None]
+ ans.populate(entries, br, verbose)
+ return [x for x in ans if x is not None]
def get_social_metadata(title, authors, publisher, isbn, verbose=False,
max_results=1, lang='all'):
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index a50bb2ce04..48dac131cc 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -41,20 +41,6 @@ class Fictionwise(MetadataSource):
class FictionwiseError(Exception):
pass
-class ThreadwithResults(Thread):
- def __init__(self, func, *args, **kargs):
- self.func = func
- self.args = args
- self.kargs = kargs
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- self.result = self.func(*self.args, **self.kargs)
-
def report(verbose):
if verbose:
import traceback
@@ -155,7 +141,6 @@ class ResultList(list):
def __init__(self, islink):
self.islink = islink
- self.thread = []
self.retitle = re.compile(r'\[[^\[\]]+\]')
self.rechkauth = re.compile(r'.*book\s*by', re.I)
self.redesc = re.compile(r'book\s*description\s*:\s*( ]+>)*(?P.*) ]*>.{,15}publisher\s*:', re.I)
@@ -361,27 +346,21 @@ class ResultList(list):
entry = None
finally:
qbr.put(browser, True)
- qsync.put(nb, True)
- return entry
+ qsync.put((nb, entry), True)
def producer(self, sync, urls, br, verbose=False):
for i in xrange(len(urls)):
- thread = ThreadwithResults(self.fetchdatathread, br, sync,
- i, self.BASE_URL+urls[i], verbose)
+ thread = Thread(target=self.fetchdatathread,
+ args=(br, sync, i, self.BASE_URL+urls[i], verbose))
thread.start()
- self.thread.append(thread)
def consumer(self, sync, total_entries, verbose=False):
- res=[None]*total_entries
+ self.extend([None]*total_entries)
i=0
while i < total_entries:
- nb = int(sync.get(True))
- self.thread[nb].join()
- entry = self.thread[nb].get_result()
+ rq = sync.get(True)
+ self[int(rq[0])] = self.fill_MI(rq[1], verbose)
i+=1
- if entry is not None:
- res[nb] = self.fill_MI(entry, verbose)
- return res
def populate(self, entries, br, verbose=False, brcall=3):
if not self.islink:
@@ -396,12 +375,11 @@ class ResultList(list):
pbr.put(br, True)
prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
- cons_thread = ThreadwithResults(self.consumer, sync, len(entries), verbose)
+ cons_thread = Thread(target=self.consumer, args=(sync, len(entries), verbose))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
- self.extend(cons_thread.get_result())
def search(title=None, author=None, publisher=None, isbn=None,
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 765bb4a255..fd18f080a0 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -61,20 +61,6 @@ class GoogleBooks(MetadataSource):
class GoogleBooksError(Exception):
pass
-class ThreadwithResults(Thread):
- def __init__(self, func, *args, **kargs):
- self.func = func
- self.args = args
- self.kargs = kargs
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- self.result = self.func(*self.args, **self.kargs)
-
def report(verbose):
if verbose:
import traceback
@@ -173,8 +159,6 @@ class Query(object):
return entries
class ResultList(list):
- def __init__(self):
- self.thread = []
def get_description(self, entry, verbose):
try:
@@ -206,8 +190,7 @@ class ResultList(list):
return val
def get_identifiers(self, entry, mi):
- isbns = [str(x.text).strip() for x in identifier(entry)]
- isbns = [t[5:] for t in isbns \
+ isbns = [t[5:] for t in [str(x.text).strip() for x in identifier(entry)] \
if t[:5].upper() == 'ISBN:' and check_isbn(t[5:])]
# for x in identifier(entry):
# t = str(x.text).strip()
@@ -309,8 +292,7 @@ class ResultList(list):
entry = None
finally:
qbr.put(browser, True)
- qsync.put(nb, True)
- return entry
+ qsync.put((nb, entry), True)
def producer(self, sync, entries, br, verbose=False):
for i in xrange(len(entries)):
@@ -319,21 +301,18 @@ class ResultList(list):
except:
id_url = None
report(verbose)
- thread = ThreadwithResults(self.fetchdatathread, br, sync,
- i, id_url, verbose)
+ thread = Thread(target=self.fetchdatathread,
+ args=(br, sync, i, id_url, verbose))
thread.start()
- self.thread.append(thread)
def consumer(self, entries, sync, total_entries, verbose=False):
- res=[None]*total_entries #remove?
+ self.extend([None]*total_entries)
i=0
while i < total_entries:
- nb = int(sync.get(True))
- self.thread[nb].join()
- data = self.thread[nb].get_result()
- res[nb] = self.fill_MI(entries[nb], data, verbose)
+ rq = sync.get(True)
+ nb = int(rq[0])
+ self[nb] = self.fill_MI(entries[nb], rq[1], verbose)
i+=1
- return res
def populate(self, entries, br, verbose=False, brcall=3):
pbr = Queue(brcall)
@@ -343,12 +322,11 @@ class ResultList(list):
pbr.put(br, True)
prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
- cons_thread = ThreadwithResults(self.consumer, entries, sync, len(entries), verbose)
+ cons_thread = Thread(target=self.consumer, args=(entries, sync, len(entries), verbose))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
- self.extend(cons_thread.get_result())
def search(title=None, author=None, publisher=None, isbn=None,
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index cacb511563..1ff5f7fc6b 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -82,20 +82,6 @@ class NiceBooksError(Exception):
class ISBNNotFound(NiceBooksError):
pass
-class ThreadwithResults(Thread):
- def __init__(self, func, *args, **kargs):
- self.func = func
- self.args = args
- self.kargs = kargs
- self.result = None
- Thread.__init__(self)
-
- def get_result(self):
- return self.result
-
- def run(self):
- self.result = self.func(*self.args, **self.kargs)
-
def report(verbose):
if verbose:
import traceback
@@ -191,7 +177,6 @@ class ResultList(list):
def __init__(self, islink):
self.islink = islink
- self.thread = []
self.repub = re.compile(u'\s*.diteur\s*', re.I)
self.reauteur = re.compile(u'\s*auteur.*', re.I)
self.reautclean = re.compile(u'\s*\(.*\)\s*')
@@ -302,27 +287,21 @@ class ResultList(list):
entry = None
finally:
qbr.put(browser, True)
- qsync.put(nb, True)
- return entry
+ qsync.put((nb, entry), True)
def producer(self, sync, urls, br, verbose=False):
for i in xrange(len(urls)):
- thread = ThreadwithResults(self.fetchdatathread, br, sync,
- i, self.BASE_URL+urls[i], verbose)
+ thread = Thread(target=self.fetchdatathread,
+ args=(br, sync, i, self.BASE_URL+urls[i], verbose))
thread.start()
- self.thread.append(thread)
def consumer(self, sync, total_entries, verbose=False):
- res=[None]*total_entries
+ self.extend([None]*total_entries)
i=0
while i < total_entries:
- nb = int(sync.get(True))
- self.thread[nb].join()
- entry = self.thread[nb].get_result()
+ rq = sync.get(True)
+ self[int(rq[0])] = self.fill_MI(rq[1], verbose)
i+=1
- if entry is not None:
- res[nb] = self.fill_MI(entry, verbose)
- return res
def populate(self, entries, br, verbose=False, brcall=3):
if not self.islink:
@@ -337,12 +316,11 @@ class ResultList(list):
pbr.put(br, True)
prod_thread = Thread(target=self.producer, args=(sync, entries, pbr, verbose))
- cons_thread = ThreadwithResults(self.consumer, sync, len(entries), verbose)
+ cons_thread = Thread(target=self.consumer, args=(sync, len(entries), verbose))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
- self.extend(cons_thread.get_result())
class Covers(object):
From 08eb0e1a59309f0749e19f6898201d260703c4c4 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 15 Dec 2010 01:07:40 +0100
Subject: [PATCH 068/163] Minor modifications
---
src/calibre/ebooks/metadata/fictionwise.py | 2 +-
src/calibre/ebooks/metadata/google_books.py | 11 ++++++-----
src/calibre/ebooks/metadata/nicebooks.py | 2 +-
3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
index 48dac131cc..96638a1788 100644
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -233,7 +233,7 @@ class ResultList(list):
description = self.reimg.sub('', description.group("desc"))
description = self.recomment.sub('', description)
description = self.resanitize.sub('', sanitize_comments_html(description))
- return _('SUMMARY:\n %s') % re.sub(r'\n\s+','\n', description)
+ return _('SUMMARY:\n%s') % re.sub(r'\n\s+','\n', description)
def get_publisher(self, entry):
publisher = self.output_entry(entry.xpath('./p')[1])
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index fd18f080a0..41b2edfefb 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -146,7 +146,7 @@ class Query(object):
# print etree.tostring(feed, pretty_print=True)
total = int(total_results(feed)[0].text)
- nbresultstoget = total if total
Date: Wed, 15 Dec 2010 09:10:37 +0100
Subject: [PATCH 069/163] Refactoring of isbndb plugin + add get language
---
src/calibre/customize/builtins.py | 3 +-
src/calibre/ebooks/metadata/fetch.py | 62 ++---
src/calibre/ebooks/metadata/isbndb.py | 343 ++++++++++++++++++--------
3 files changed, 270 insertions(+), 138 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 9e34d33941..f95c29a718 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -480,7 +480,8 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
-from calibre.ebooks.metadata.fetch import ISBNDB, LibraryThing
+from calibre.ebooks.metadata.fetch import LibraryThing
+from calibre.ebooks.metadata.isbndb import ISBNDB
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 0c607b9bb7..3bf4c22afe 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -172,40 +172,40 @@ class MetadataSource(Plugin): # {{{
# }}}
-class ISBNDB(MetadataSource): # {{{
+# class ISBNDB(MetadataSource): # {{{
- name = 'IsbnDB'
- description = _('Downloads metadata from isbndb.com')
+ # name = 'IsbnDB'
+ # description = _('Downloads metadata from isbndb.com')
- def fetch(self):
- if not self.site_customization:
- return
- from calibre.ebooks.metadata.isbndb import option_parser, create_books
- args = ['isbndb']
- if self.isbn:
- args.extend(['--isbn', self.isbn])
- else:
- if self.title:
- args.extend(['--title', self.title])
- if self.book_author:
- args.extend(['--author', self.book_author])
- if self.publisher:
- args.extend(['--publisher', self.publisher])
- if self.verbose:
- args.extend(['--verbose'])
- args.append(self.site_customization) # IsbnDb key
- try:
- opts, args = option_parser().parse_args(args)
- self.results = create_books(opts, args)
- except Exception, e:
- self.exception = e
- self.tb = traceback.format_exc()
+ # def fetch(self):
+ # if not self.site_customization:
+ # return
+ # from calibre.ebooks.metadata.isbndb import option_parser, create_books
+ # args = ['isbndb']
+ # if self.isbn:
+ # args.extend(['--isbn', self.isbn])
+ # else:
+ # if self.title:
+ # args.extend(['--title', self.title])
+ # if self.book_author:
+ # args.extend(['--author', self.book_author])
+ # if self.publisher:
+ # args.extend(['--publisher', self.publisher])
+ # if self.verbose:
+ # args.extend(['--verbose'])
+ # args.append(self.site_customization) # IsbnDb key
+ # try:
+ # opts, args = option_parser().parse_args(args)
+ # self.results = create_books(opts, args)
+ # except Exception, e:
+ # self.exception = e
+ # self.tb = traceback.format_exc()
- @property
- def string_customization_help(self):
- ans = _('To use isbndb.com you must sign up for a %sfree account%s '
- 'and enter your access key below.')
- return '
'+ans%('', '')
+ # @property
+ # def string_customization_help(self):
+ # ans = _('To use isbndb.com you must sign up for a %sfree account%s '
+ # 'and enter your access key below.')
+ # return '
'+ans%('', '')
# }}}
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index 9169227326..330755fe35 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -5,115 +5,247 @@ Interface to isbndb.com. My key HLLXQX2A.
'''
import sys, re
-from urllib import quote
+from urllib import urlencode
+from lxml import etree
+
+from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
+from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-from calibre import browser
-BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
+
+class ISBNDB(MetadataSource):
+
+ name = 'IsbnDB'
+ description = _('Downloads metadata from isbndb.com')
+ version = (1, 0, 1)
+
+ def fetch(self):
+ if not self.site_customization:
+ return
+ try:
+ self.results = search(self.title, self.book_author, self.publisher, self.isbn,
+ max_results=10, verbose=self.verbose, key=self.site_customization)
+ except Exception, e:
+ import traceback
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ @property
+ def string_customization_help(self):
+ ans = _('To use isbndb.com you must sign up for a %sfree account%s '
+ 'and enter your access key below.')
+ return '
'+ans%('', '')
+
class ISBNDBError(Exception):
pass
-def fetch_metadata(url, max=100, timeout=5.):
- books = []
- page_number = 1
- total_results = sys.maxint
- br = browser()
- while len(books) < total_results and max > 0:
+def report(verbose):
+ if verbose:
+ import traceback
+ traceback.print_exc()
+
+
+class Query(object):
+
+ BASE_URL = 'http://isbndb.com/api/books.xml?'
+
+ def __init__(self, key, title=None, author=None, publisher=None, isbn=None,
+ keywords=None, max_results=40):
+ assert not(title is None and author is None and publisher is None and \
+ isbn is None and keywords is None)
+ assert (max_results < 41)
+
+ if title == _('Unknown'):
+ title=None
+ if author == _('Unknown'):
+ author=None
+ self.maxresults = int(max_results)
+
+ if isbn is not None:
+ q = isbn
+ i = 'isbn'
+ elif keywords is not None:
+ q = ' '.join([e for e in (title, author, publisher, keywords) \
+ if e is not None ])
+ q = q.strip()
+ i = 'full'
+ else:
+ q = ' '.join([e for e in (title, author, publisher) \
+ if e is not None ])
+ q = q.strip()
+ if len(q) == 0:
+ raise ISBNDBError(_('You must specify at least one of author, title or publisher'))
+ i = 'combined'
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.url = self.BASE_URL+urlencode({
+ 'value1':q,
+ 'results':'subjects,authors,texts,details',
+ 'access_key':key,
+ 'index1':i,
+ })+'&page_number='
+
+ def brcall(self, browser, url, verbose, timeout):
+ if verbose:
+ print _('Query: %s') % url
+
try:
- raw = br.open(url, timeout=timeout).read()
- except Exception, err:
- raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
- soup = BeautifulStoneSoup(raw,
- convertEntities=BeautifulStoneSoup.XML_ENTITIES)
- book_list = soup.find('booklist')
- if book_list is None:
- errmsg = soup.find('errormessage').string
- raise ISBNDBError('Error fetching metadata: '+errmsg)
- total_results = int(book_list['total_results'])
- page_number += 1
- np = '&page_number=%s&'%page_number
- url = re.sub(r'\&page_number=\d+\&', np, url)
- books.extend(book_list.findAll('bookdata'))
- max -= 1
- return books
-
-
-class ISBNDBMetadata(Metadata):
-
- def __init__(self, book):
- Metadata.__init__(self, None)
-
- def tostring(e):
- if not hasattr(e, 'string'):
+ raw = browser.open_novisit(url, timeout=timeout).read()
+ except Exception, e:
+ import socket
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return None
+ attr = getattr(e, 'args', [None])
+ attr = attr if attr else [None]
+ if isinstance(attr[0], socket.timeout):
+ raise ISBNDBError(_('ISBNDB timed out. Try again later.'))
+ raise ISBNDBError(_('ISBNDB encountered an error.'))
+ if '
404 - ' in raw:
+ return None
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return etree.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars (normally not needed)
+ return etree.fromstring(clean_ascii_chars(raw))
+ except:
return None
- ans = e.string
- if ans is not None:
- ans = unicode(ans).strip()
- if not ans:
- ans = None
- return ans
- self.isbn = unicode(book.get('isbn13', book.get('isbn')))
- title = tostring(book.find('titlelong'))
+ def __call__(self, browser, verbose, timeout = 5.):
+ url = self.url+str(1)
+ feed = self.brcall(browser, url, verbose, timeout)
+ if feed is None:
+ return None
+
+ # print etree.tostring(feed, pretty_print=True)
+ total = int(feed.find('BookList').get('total_results'))
+ nbresultstoget = total if total < self.maxresults else self.maxresults
+ entries = feed.xpath("./BookList/BookData")
+ i=2
+ while len(entries) < nbresultstoget:
+ url = self.url+str(i)
+ feed = self.brcall(browser, url, verbose, timeout)
+ i+=1
+ if feed is None:
+ break
+ entries.extend(feed.xpath("./BookList/BookData"))
+ return entries[:nbresultstoget]
+
+class ResultList(list):
+
+ def get_description(self, entry, verbose):
+ try:
+ desc = entry.find('Summary')
+ if desc:
+ return _(u'SUMMARY:\n%s') % self.output_entry(desc)
+ except:
+ report(verbose)
+
+ def get_language(self, entry, verbose):
+ try:
+ return entry.find('Details').get('language')
+ except:
+ report(verbose)
+
+ def get_title(self, entry):
+ title = entry.find('TitleLong')
if not title:
- title = tostring(book.find('title'))
- self.title = title
- self.title = unicode(self.title).strip()
+ title = entry.find('Title')
+ return self.output_entry(title)
+
+ def get_authors(self, entry):
authors = []
- au = tostring(book.find('authorstext'))
- if au:
- au = au.strip()
- temp = au.split(',')
+ au = entry.find('AuthorsText')
+ if au is not None:
+ au = self.output_entry(au)
+ temp = au.split(u',')
for au in temp:
if not au: continue
- authors.extend([a.strip() for a in au.split('&')])
- if authors:
- self.authors = authors
+ authors.extend([a.strip() for a in au.split(u'&')])
+ return authors
+
+ def get_author_sort(self, entry, verbose):
try:
- self.author_sort = tostring(book.find('authors').find('person'))
- if self.authors and self.author_sort == self.authors[0]:
- self.author_sort = None
+ return self.output_entry(entry.find('Authors').find('Person'))
except:
- pass
- self.publisher = tostring(book.find('publishertext'))
+ report(verbose)
+ return None
- summ = tostring(book.find('summary'))
- if summ:
- self.comments = 'SUMMARY:\n'+summ
+ def get_isbn(self, entry, verbose):
+ try:
+ return unicode(entry.get('isbn13', entry.get('isbn')))
+ except:
+ report(verbose)
+
+ def get_publisher(self, entry, verbose):
+ try:
+ return self.output_entry(entry.find('PublisherText'))
+ except:
+ report(verbose)
+ return None
+
+ def output_entry(self, entry):
+ out = etree.tostring(entry, encoding=unicode, method="text")
+ return out.strip()
+
+ def populate(self, entries, verbose):
+ for x in entries:
+ try:
+ title = self.get_title(x)
+ authors = self.get_authors(x)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ continue
+ mi = MetaInformation(title, authors)
+ tmpautsort = self.get_author_sort(x, verbose)
+ mi.author_sort = tmpautsort if tmpautsort is not None \
+ else authors_to_sort_string(authors)
+ mi.comments = self.get_description(x, verbose)
+ mi.isbn = self.get_isbn(x, verbose)
+ mi.publisher = self.get_publisher(x, verbose)
+ mi.language = self.get_language(x, verbose)
+ self.append(mi)
-def build_isbn(base_url, opts):
- return base_url + 'index1=isbn&value1='+opts.isbn
+def search(title=None, author=None, publisher=None, isbn=None,
+ max_results=10, verbose=False, keywords=None, key=None):
+ br = browser()
+ entries = Query(key, title=title, author=author, isbn=isbn, publisher=publisher,
+ keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
-def build_combined(base_url, opts):
- query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
- if e is not None ])
- query = query.strip()
- if len(query) == 0:
- raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
-
- query = re.sub(r'\s+', '+', query)
- if isinstance(query, unicode):
- query = query.encode('utf-8')
- return base_url+'index1=combined&value1='+quote(query, '+')
+ if entries is None or len(entries) == 0:
+ return None
+ #List of entry
+ ans = ResultList()
+ ans.populate(entries, verbose)
+ return list(dict((book.isbn, book) for book in ans).values())
def option_parser():
- parser = OptionParser(usage=\
-_('''
-%prog [options] key
+ import textwrap
+ parser = OptionParser(textwrap.dedent(\
+ _('''\
+ %prog [options] key
-Fetch metadata for books from isndb.com. You can specify either the
-books ISBN ID or its title and author. If you specify the title and author,
-then more than one book may be returned.
+ Fetch metadata for books from isndb.com. You can specify either the
+ books ISBN ID or its title and author. If you specify the title and author,
+ then more than one book may be returned.
-key is the account key you generate after signing up for a free account from isbndb.com.
+ key is the account key you generate after signing up for a free account from isbndb.com.
-'''))
+ ''')))
parser.add_option('-i', '--isbn', default=None, dest='isbn',
help=_('The ISBN ID of the book you want metadata for.'))
parser.add_option('-a', '--author', dest='author',
@@ -122,38 +254,37 @@ key is the account key you generate after signing up for a free account from isb
default=None, help=_('The title of the book to search for.'))
parser.add_option('-p', '--publisher', default=None, dest='publisher',
help=_('The publisher of the book to search for.'))
- parser.add_option('-v', '--verbose', default=False,
- action='store_true', help=_('Verbose processing'))
-
+ parser.add_option('-k', '--keywords', help=_('Keywords to search for.'))
+ parser.add_option('-m', '--max-results', default=10,
+ help=_('Maximum number of results to fetch'))
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help=_('Be more verbose about errors'))
return parser
-
-def create_books(opts, args, timeout=5.):
- base_url = BASE_URL%dict(key=args[1])
- if opts.isbn is not None:
- url = build_isbn(base_url, opts)
- else:
- url = build_combined(base_url, opts)
-
- if opts.verbose:
- print ('ISBNDB query: '+url)
-
- tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
- #remove duplicates ISBN
- return list(dict((book.isbn, book) for book in tans).values())
-
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
- print ('You must supply the isbndb.com key')
+ print
+ print _('You must supply the isbndb.com key')
return 1
-
- for book in create_books(opts, args):
- print unicode(book).encode('utf-8')
-
+ try:
+ results = search(opts.title, opts.author, opts.publisher, opts.isbn, key=args[1],
+ keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ if results is None or len(results) == 0:
+ print _('No result found for this search!')
+ return 0
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ print
return 0
if __name__ == '__main__':
sys.exit(main())
+
+# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\isbndb-bis.py" -m 5 -a gore -v PWEK5WY4>data.html
\ No newline at end of file
From 0b0619916aa676822bec1cd3228fe67bf794a552 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 19 Dec 2010 11:15:04 +0100
Subject: [PATCH 070/163] Amazon bug+fetch error
---
src/calibre/ebooks/metadata/amazon.py | 13 +++++++++++++
src/calibre/ebooks/metadata/fetch.py | 4 ++--
src/calibre/ebooks/metadata/isbndb.py | 3 ++-
3 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index 6eb106c862..c617a2beaf 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -121,6 +121,19 @@ def report(verbose):
class AmazonError(Exception):
pass
+class ThreadwithResults(Thread):
+ def __init__(self, func, *args, **kargs):
+ self.func = func
+ self.args = args
+ self.kargs = kargs
+ self.result = None
+ Thread.__init__(self)
+
+ def get_result(self):
+ return self.result
+
+ def run(self):
+ self.result = self.func(*self.args, **self.kargs)
class Query(object):
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index a7709f88b4..dbf0db7bfe 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -299,8 +299,8 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
with MetadataSources(fetchers) as manager:
manager(title, author, publisher, isbn, verbose)
manager.join()
-
- results = list(fetchers[0].results) if fetchers else []
+
+ results = list(fetchers[0].results) if fetchers[0].results else []
for fetcher in fetchers[1:]:
merge_results(results, fetcher.results)
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index b1a69e37c0..787d70eb51 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -25,7 +25,7 @@ class ISBNDB(MetadataSource):
def fetch(self):
if not self.site_customization:
- return
+ return None
try:
self.results = search(self.title, self.book_author, self.publisher, self.isbn,
max_results=10, verbose=self.verbose, key=self.site_customization)
@@ -231,6 +231,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
#List of entry
ans = ResultList()
ans.populate(entries, verbose)
+ ans = [x for x in ans if x is not None]
return list(dict((book.isbn, book) for book in ans).values())
def option_parser():
From 3c60c677158ece7422696d82af857a7aceb844a8 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 19 Dec 2010 15:32:30 +0100
Subject: [PATCH 071/163] Wrong copy-paste
---
src/calibre/ebooks/metadata/amazon.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index c617a2beaf..941c80ac62 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -225,8 +225,8 @@ class Query(object):
attr = getattr(e, 'args', [None])
attr = attr if attr else [None]
if isinstance(attr[0], socket.timeout):
- raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
- raise NiceBooksError(_('Nicebooks encountered an error.'))
+ raise AmazonError(_('Amazon timed out. Try again later.'))
+ raise AmazonError(_('Amazon encountered an error.'))
if '404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
From 1cc42192a7b0ffa2ecca80faa19039dead70f28d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 2 Jan 2011 21:55:01 +0100
Subject: [PATCH 072/163] ...
---
src/calibre/ebooks/metadata/amazon.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index 941c80ac62..cc7a4c9d34 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -650,4 +650,4 @@ if __name__ == '__main__':
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))
-# calibre-debug -e "H:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
\ No newline at end of file
+# calibre-debug -e "D:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
\ No newline at end of file
From 6391251cb782e038b8c71e9276cb449c2ff2fec5 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 2 Jan 2011 22:16:32 +0100
Subject: [PATCH 073/163] BIB catalog now support custom fields
---
src/calibre/gui2/catalog/catalog_bibtex.py | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/src/calibre/gui2/catalog/catalog_bibtex.py b/src/calibre/gui2/catalog/catalog_bibtex.py
index 5030cf6ec8..f66b63bd58 100644
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
from calibre.gui2 import gprefs
from calibre.gui2.catalog.catalog_bibtex_ui import Ui_Form
+from calibre.library import db as db_
from PyQt4.Qt import QWidget, QListWidgetItem
class PluginWidget(QWidget, Ui_Form):
@@ -28,11 +29,14 @@ class PluginWidget(QWidget, Ui_Form):
QWidget.__init__(self, parent)
self.setupUi(self)
from calibre.library.catalog import FIELDS
- self.all_fields = []
- for x in FIELDS :
- if x != 'all':
- self.all_fields.append(x)
- QListWidgetItem(x, self.db_fields)
+
+ self.all_fields = [x for x in FIELDS if x != 'all']
+ #add custom columns
+ db = db_()
+ self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
+ #populate
+ for x in self.all_fields:
+ QListWidgetItem(x, self.db_fields)
def initialize(self, name, db): #not working properly to update
self.name = name
From 24f24109603649f96985e55c9cb812e6b5d98fc2 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 3 Jan 2011 22:17:31 +0100
Subject: [PATCH 074/163] fetch add trad
---
src/calibre/ebooks/metadata/fetch.py | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index dbf0db7bfe..2adde5d6a3 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -168,7 +168,7 @@ class MetadataSource(Plugin): # {{{
customize_plugin(self, sc)
def customization_help(self):
- return 'This plugin can only be customized using the GUI'
+ return _('This plugin can only be customized using the GUI')
# }}}
@@ -433,7 +433,7 @@ def get_social_metadata(mi, verbose=0):
def option_parser():
- parser = OptionParser(textwrap.dedent(
+ parser = OptionParser(textwrap.dedent(_(
'''\
%prog [options]
@@ -441,19 +441,19 @@ def option_parser():
of title, author, publisher or ISBN. If you specify ISBN, the others
are ignored.
'''
- ))
- parser.add_option('-t', '--title', help='Book title')
- parser.add_option('-a', '--author', help='Book author(s)')
- parser.add_option('-p', '--publisher', help='Book publisher')
- parser.add_option('-i', '--isbn', help='Book ISBN')
+ )))
+ parser.add_option('-t', '--title', help=_('Book title'))
+ parser.add_option('-a', '--author', help=_('Book author(s)'))
+ parser.add_option('-p', '--publisher', help=_('Book publisher'))
+ parser.add_option('-i', '--isbn', help=_('Book ISBN'))
parser.add_option('-m', '--max-results', default=10,
- help='Maximum number of results to fetch')
+ help=_('Maximum number of results to fetch'))
parser.add_option('-k', '--isbndb-key',
- help=('The access key for your ISBNDB.com account. '
+ help=_('The access key for your ISBNDB.com account. '
'Only needed if you want to search isbndb.com '
'and you haven\'t customized the IsbnDB plugin.'))
parser.add_option('-v', '--verbose', default=0, action='count',
- help='Be more verbose about errors')
+ help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
@@ -469,7 +469,7 @@ def main(args=sys.argv):
for name, exception, tb in exceptions+social_exceptions:
if exception is not None:
- print 'WARNING: Fetching from', name, 'failed with error:'
+ print _('WARNING: Fetching from %s failed with error:') % (name)
print exception
print tb
From 61f8f592a81eb8d031905b7ac7dab3c6a5eaa2f4 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 5 Jan 2011 01:00:36 +0100
Subject: [PATCH 075/163] ...
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 4b0bb41d42..33dc585579 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -193,6 +193,14 @@ class ParseRtf:
copy_obj.set_dir(self.__debug_dir)
copy_obj.remove_files()
copy_obj.copy_file(self.__temp_file, "original_file")
+ #Check to see if the file is correct ascii
+ check_encoding_obj = check_encoding.CheckEncoding(
+ bug_handler = RtfInvalidCodeException,
+ )
+ if check_encoding_obj.check_encoding(self.__file):
+ file_name = self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8')
+ msg = _('File %s does not appear to be ascii.\n') % file_name
+ raise InvalidRtfException, msg
# Function to check if bracket are well handled
if self.__debug_dir or self.__run_level > 2:
self.__check_brack_obj = check_brackets.CheckBrackets\
@@ -230,13 +238,6 @@ class ParseRtf:
os.remove(self.__temp_file)
except OSError:
pass
- #Check to see if the file is correct ascii
- check_encoding_obj = check_encoding.CheckEncoding(
- bug_handler = RtfInvalidCodeException,
- )
- if check_encoding_obj.check_encoding(self.__file):
- sys.stderr.write(_('File "%s" does not appear to be ascii.\n') \
- % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file,
From 66b1713e8040648381e56031ab5b486d1ae908d8 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 5 Jan 2011 08:07:10 +0100
Subject: [PATCH 076/163] Fix regression broking handling of sub and sup in RTF
input
---
resources/templates/rtf.xsl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl
index ea1fc71172..6db1c0388d 100644
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@@ -287,7 +287,7 @@
]
-
+
@@ -297,7 +297,7 @@
-
+
From b857f8608f12c29557d6c42e2be3a908f9338e54 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 5 Jan 2011 08:09:28 +0100
Subject: [PATCH 077/163] Add debuging options for RTF input.py
---
src/calibre/ebooks/rtf/input.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index fdd501495b..19f944bbb5 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -115,6 +115,10 @@ class RTFInput(InputFormatPlugin):
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 1,
+
+ #debug
+ # deb_dir = "D:\\Mes eBooks\\Developpement\\debug\\rtfdebug",
+ # run_level = 3
)
parser.parse_rtf()
ans = open('out.xml').read()
@@ -256,9 +260,8 @@ class RTFInput(InputFormatPlugin):
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
- '''dataxml = open('dataxml.xml', 'w')
- dataxml.write(xml)
- dataxml.close'''
+ # with open('dataxml.xml', 'w') as dataxml:
+ # dataxml.write(xml)
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
From 5784256e022b700adfeaa99959389aab96868e5b Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 5 Jan 2011 22:39:26 +0100
Subject: [PATCH 078/163] Check if tokens are correct ascii
---
src/calibre/ebooks/rtf2xml/process_tokens.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 13ce495e67..2c603ea28d 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -779,12 +779,16 @@ class ProcessTokens:
msg =_('Invalid RTF: document doesn\'t start with \\rtf \n')
raise self.__exception_handler, msg
- ##token = self.evaluate_token(token)
the_index = token.find('\\ ')
if token is not None and the_index > -1:
- msg ='Invalid RTF: token "\\ " not valid.\n'
+ msg =_('Invalid RTF: token "\\ " not valid.\n')
raise self.__exception_handler, msg
elif token[:1] == "\\":
+ try:
+ token.decode('us-ascii')
+ except UnicodeError, msg:
+ msg = _('Invalid RTF: Tokens not ascii encoded.\n%s') % str(msg)
+ raise self.__exception_handler, msg
line = self.process_cw(token)
if line is not None:
write_obj.write(line)
From bb50018eb35e367d4da05bf3b29d43d2ca2bdc95 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 5 Jan 2011 23:47:14 +0100
Subject: [PATCH 079/163] Clean defaut encoding
---
.../ebooks/rtf2xml/default_encoding.py | 153 ++++++++++++------
src/calibre/ebooks/rtf2xml/process_tokens.py | 6 +-
2 files changed, 109 insertions(+), 50 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index b932b465d0..0268c29f75 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -1,61 +1,118 @@
#########################################################################
# #
-# #
# copyright 2002 Paul Henry Tremblay #
# #
-# This program is distributed in the hope that it will be useful, #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
-# General Public License for more details. #
-# #
-# You should have received a copy of the GNU General Public License #
-# along with this program; if not, write to the Free Software #
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
-# 02111-1307 USA #
-# #
-# #
#########################################################################
+
+'''
+Codepages as to RTF 1.9.1:
+ 437 United States IBM
+ 708 Arabic (ASMO 708)
+ 709 Arabic (ASMO 449+, BCON V4)
+ 710 Arabic (transparent Arabic)
+ 711 Arabic (Nafitha Enhanced)
+ 720 Arabic (transparent ASMO)
+ 819 Windows 3.1 (United States and Western Europe)
+ 850 IBM multilingual
+ 852 Eastern European
+ 860 Portuguese
+ 862 Hebrew
+ 863 French Canadian
+ 864 Arabic
+ 865 Norwegian
+ 866 Soviet Union
+ 874 Thai
+ 932 Japanese
+ 936 Simplified Chinese
+ 949 Korean
+ 950 Traditional Chinese
+ 1250 Eastern European
+ 1251 Cyrillic
+ 1252 Western European
+ 1253 Greek
+ 1254 Turkish
+ 1255 Hebrew
+ 1256 Arabic
+ 1257 Baltic
+ 1258 Vietnamese
+ 1361 Johab
+ 10000 MAC Roman
+ 10001 MAC Japan
+ 10004 MAC Arabic
+ 10005 MAC Hebrew
+ 10006 MAC Greek
+ 10007 MAC Cyrillic
+ 10029 MAC Latin2
+ 10081 MAC Turkish
+ 57002 Devanagari
+ 57003 Bengali
+ 57004 Tamil
+ 57005 Telugu
+ 57006 Assamese
+ 57007 Oriya
+ 57008 Kannada
+ 57009 Malayalam
+ 57010 Gujarati
+ 57011 Punjabi
+'''
+
class DefaultEncoding:
"""
Find the default encoding for the doc
"""
def __init__(self, in_file, bug_handler, run_level = 1,):
- """
- Required:
- 'file'
- Returns:
- nothing
- """
self.__file = in_file
self.__bug_handler = bug_handler
+ self.__platform = 'Windows'
+ self.__default_num = 'not-defined'
+ self.__code_page = '1252'
+ self.__datafetched = False
+
def find_default_encoding(self):
- platform = 'Windows'
- default_num = 'not-defined'
- code_page = 'ansicpg1252'
- read_obj = open(self.__file, 'r')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- if self.__token_info == 'mi nt
'footnote' : ('nt', 'footnote__', self.default_func),
'ftnalt' : ('nt', 'type______ an
- 'tc' : ('an', 'toc_______', self.default_func),
+ 'tc' : ('an', 'toc_______', self.default_func),
'bkmkstt' : ('an', 'book-mk-st', self.default_func),
- 'bkmkstart' : ('an', 'book-mk-st', self.default_func),
+ 'bkmkstart' : ('an', 'book-mk-st', self.default_func),
'bkmkend' : ('an', 'book-mk-en', self.default_func),
'xe' : ('an', 'index-mark', self.default_func),
'rxe' : ('an', 'place_____', self.default_func),
From d0655c4d9abe48185831d4eb7eb9dccfb8b88488 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 5 Jan 2011 23:57:34 +0100
Subject: [PATCH 080/163] ...
---
src/calibre/ebooks/rtf2xml/default_encoding.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index 0268c29f75..f89f54ada8 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -72,7 +72,7 @@ class DefaultEncoding:
if not self.__datafetched:
self._encoding()
self.__datafetched = True
- if self.__platform = 'Macintosh':
+ if self.__platform == 'Macintosh':
code_page = self.__code_page
else
code_page = 'ansicpg' + self.__code_page
@@ -111,8 +111,8 @@ class DefaultEncoding:
#cw
Date: Thu, 6 Jan 2011 00:01:24 +0100
Subject: [PATCH 081/163] Move check encoding
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 24 +++++++++++--------
.../ebooks/rtf2xml/default_encoding.py | 4 ++--
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 33dc585579..fdd17e3f78 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -193,21 +193,13 @@ class ParseRtf:
copy_obj.set_dir(self.__debug_dir)
copy_obj.remove_files()
copy_obj.copy_file(self.__temp_file, "original_file")
- #Check to see if the file is correct ascii
- check_encoding_obj = check_encoding.CheckEncoding(
- bug_handler = RtfInvalidCodeException,
- )
- if check_encoding_obj.check_encoding(self.__file):
- file_name = self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8')
- msg = _('File %s does not appear to be ascii.\n') % file_name
- raise InvalidRtfException, msg
# Function to check if bracket are well handled
if self.__debug_dir or self.__run_level > 2:
self.__check_brack_obj = check_brackets.CheckBrackets\
(file = self.__temp_file,
bug_handler = RtfInvalidCodeException,
)
- # convert Macintosh and Windows line endings to Unix line endings
+ #convert Macintosh and Windows line endings to Unix line endings
#why do this if you don't wb after?
line_obj = line_endings.FixLineEndings(
in_file = self.__temp_file,
@@ -238,7 +230,19 @@ class ParseRtf:
os.remove(self.__temp_file)
except OSError:
pass
- raise InvalidRtfException, msg
+ #Check to see if the file is correctly encoded
+ check_encoding_obj = check_encoding.CheckEncoding(
+ bug_handler = RtfInvalidCodeException,
+ )
+ if check_encoding_obj.check_encoding(self.__file, 'cp1252') and \
+ check_encoding_obj.check_encoding(self.__file, 'cp437') and \
+ check_encoding_obj.check_encoding(self.__file, 'cp850') and \
+ check_encoding_obj.check_encoding(self.__file, 'mac_roman'):
+ file_name = self.__file if isinstance(self.__file, str) \
+ else self.__file.encode('utf-8')
+ msg = _('File %s does not appear to be correctly encoded.\n') % file_name
+ raise InvalidRtfException, msg
+
delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file,
copy = self.__copy,
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index f89f54ada8..a5c2ab9561 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -74,9 +74,9 @@ class DefaultEncoding:
self.__datafetched = True
if self.__platform == 'Macintosh':
code_page = self.__code_page
- else
+ else:
code_page = 'ansicpg' + self.__code_page
- return platform, code_page, self.__default_num
+ return self.__platform, code_page, self.__default_num
def get_codepage(self):
if not self.__datafetched:
From 9e31d706693e4875ea36f80601b015e812d4a862 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 6 Jan 2011 09:00:49 +0100
Subject: [PATCH 082/163] Activate RTF debug
---
src/calibre/ebooks/rtf/input.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 19f944bbb5..05c851a075 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -117,8 +117,8 @@ class RTFInput(InputFormatPlugin):
empty_paragraphs = 1,
#debug
- # deb_dir = "D:\\Mes eBooks\\Developpement\\debug\\rtfdebug",
- # run_level = 3
+ deb_dir = "D:\\Mes eBooks\\Developpement\\debug\\rtfdebug",
+ run_level = 3
)
parser.parse_rtf()
ans = open('out.xml').read()
@@ -260,8 +260,8 @@ class RTFInput(InputFormatPlugin):
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
- # with open('dataxml.xml', 'w') as dataxml:
- # dataxml.write(xml)
+ with open('dataxml.xml', 'w') as dataxml:
+ dataxml.write(xml)
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
From 7ecf2f1e9c974aac94e6dfc260e582f8746f5fe8 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 6 Jan 2011 09:01:13 +0100
Subject: [PATCH 083/163] spell
---
src/calibre/ebooks/rtf2xml/convert_to_tags.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/rtf2xml/convert_to_tags.py b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
index ab54c0cbc3..c2244b784a 100755
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@@ -88,7 +88,7 @@ class ConvertToTags:
def __open_att_func(self, line):
"""
Process lines for open tags that have attributes.
- The important infor is between [17:-1]. Take this info and split it
+ The important info is between [17:-1]. Take this info and split it
with the delimeter '<'. The first token in this group is the element
name. The rest are attributes, separated fromt their values by '>'. So
read each token one at a time, and split them by '>'.
From bbaecb400726cd2b19d6820bbc6ddf83a86fb7e3 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 6 Jan 2011 22:25:12 +0100
Subject: [PATCH 084/163] Allow check encoding to look directly in rtf &
improve code checking for invalid chars
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 17 +++--
.../ebooks/rtf2xml/default_encoding.py | 71 ++++++++++++-------
2 files changed, 59 insertions(+), 29 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index fdd17e3f78..05a4847ce5 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -230,14 +230,21 @@ class ParseRtf:
os.remove(self.__temp_file)
except OSError:
pass
- #Check to see if the file is correctly encoded
+ #Check to see if the file is correctly encoded
+ encode_obj = default_encoding.DefaultEncoding(
+ in_file = self.__temp_file,
+ run_level = self.__run_level,
+ bug_handler = RtfInvalidCodeException,
+ check_raw = True,
+ )
+ platform, code_page, default_font_num = encode_obj.find_default_encoding()
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException,
)
- if check_encoding_obj.check_encoding(self.__file, 'cp1252') and \
- check_encoding_obj.check_encoding(self.__file, 'cp437') and \
- check_encoding_obj.check_encoding(self.__file, 'cp850') and \
- check_encoding_obj.check_encoding(self.__file, 'mac_roman'):
+ enc = encode_obj.get_codepage()
+ if enc != 'mac_roman':
+ enc = 'cp' + enc
+ if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8')
msg = _('File %s does not appear to be correctly encoded.\n') % file_name
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index a5c2ab9561..a4eeac9663 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -55,18 +55,20 @@ Codepages as to RTF 1.9.1:
57010 Gujarati
57011 Punjabi
'''
+import re
class DefaultEncoding:
"""
Find the default encoding for the doc
"""
- def __init__(self, in_file, bug_handler, run_level = 1,):
+ def __init__(self, in_file, bug_handler, run_level = 1, check_raw = False):
self.__file = in_file
self.__bug_handler = bug_handler
self.__platform = 'Windows'
self.__default_num = 'not-defined'
self.__code_page = '1252'
self.__datafetched = False
+ self.__fetchraw = check_raw
def find_default_encoding(self):
if not self.__datafetched:
@@ -92,27 +94,48 @@ class DefaultEncoding:
def _encoding(self):
with open(self.__file, 'r') as read_obj:
- for line in read_obj:
- self.__token_info = line[:16]
- if self.__token_info == 'mi
Date: Thu, 6 Jan 2011 22:53:38 +0100
Subject: [PATCH 085/163] Update get_char_map
---
src/calibre/ebooks/rtf2xml/get_char_map.py | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/get_char_map.py b/src/calibre/ebooks/rtf2xml/get_char_map.py
index db307b19d6..18e27b2fe7 100755
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@@ -43,16 +43,16 @@ class GetCharMap:
def get_char_map(self, map):
if map == 'ansicpg0':
map = 'ansicpg1250'
- found_map = 0
+ found_map = False
map_dict = {}
self.__char_file.seek(0)
- for line in self.__char_file.readlines():
+ for line in self.__char_file:
if not line.strip(): continue
begin_element = '<%s>' % map;
end_element = '%s>' % map
if not found_map:
if begin_element in line:
- found_map = 1
+ found_map = True
else:
if end_element in line:
break
@@ -62,8 +62,7 @@ class GetCharMap:
if not found_map:
- msg = 'no map found\n'
- msg += 'map is "%s"\n'%(map,)
+ msg = _('no map found\nmap is "%s"\n') %(map,)
raise self.__bug_handler, msg
return map_dict
From b2187360ecec9ddab30e79c48a26f340d1a12911 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 7 Jan 2011 07:36:20 +0100
Subject: [PATCH 086/163] various lttle modification in rtf2xml
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 3 +-
src/calibre/ebooks/rtf2xml/hex_2_utf8.py | 98 +++++++++++---------
src/calibre/ebooks/rtf2xml/process_tokens.py | 4 +-
3 files changed, 59 insertions(+), 46 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 05a4847ce5..901188a000 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -326,6 +326,7 @@ class ParseRtf:
invalid_rtf_handler = InvalidRtfException,
)
hex2utf_obj.convert_hex_2_utf8()
+ # raise RtfInvalidCodeException, 'stop'
self.__bracket_match('hex_2_utf_preamble')
fonts_obj = fonts.Fonts(
in_file = self.__temp_file,
@@ -381,7 +382,7 @@ class ParseRtf:
msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg
if self.__run_level > 1:
- sys.stderr.write('File could be older RTF...\n')
+ sys.stderr.write(_('File could be older RTF...\n'))
if found_destination:
if self.__run_level > 1:
sys.stderr.write(_(
diff --git a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
index d67dce30d2..750d0c9180 100755
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@@ -54,10 +54,10 @@ class Hex2Utf8:
'convert_to_caps'--wether to convert caps to utf-8
Returns:
nothing
- """
+ """
self.__file = in_file
self.__copy = copy
- if area_to_convert != 'preamble' and area_to_convert != 'body':
+ if area_to_convert not in ('preamble', 'body'):
msg = (
'Developer error! Wrong flag.\n'
'in module "hex_2_utf8.py\n'
@@ -79,7 +79,8 @@ class Hex2Utf8:
self.__write_to = tempfile.mktemp()
self.__bug_handler = bug_handler
self.__invalid_rtf_handler = invalid_rtf_handler
- def update_values( self,
+
+ def update_values(self,
file,
area_to_convert,
char_file,
@@ -132,6 +133,7 @@ class Hex2Utf8:
# self.__convert_symbol = 0
# self.__convert_wingdings = 0
# self.__convert_zapf = 0
+
def __initiate_values(self):
"""
Required:
@@ -191,6 +193,7 @@ class Hex2Utf8:
'body' : self.__body_func,
'mi
Date: Fri, 7 Jan 2011 08:07:39 +0100
Subject: [PATCH 087/163] Handle non ascii charset in RTF if declared as
codepage
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 2 +-
src/calibre/ebooks/rtf2xml/check_encoding.py | 1 +
src/calibre/ebooks/rtf2xml/convert_to_tags.py | 50 ++++++++++++++-----
.../ebooks/rtf2xml/default_encoding.py | 3 +-
4 files changed, 41 insertions(+), 15 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 901188a000..f9036989b0 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -326,7 +326,6 @@ class ParseRtf:
invalid_rtf_handler = InvalidRtfException,
)
hex2utf_obj.convert_hex_2_utf8()
- # raise RtfInvalidCodeException, 'stop'
self.__bracket_match('hex_2_utf_preamble')
fonts_obj = fonts.Fonts(
in_file = self.__temp_file,
@@ -523,6 +522,7 @@ class ParseRtf:
indent = self.__indent,
run_level = self.__run_level,
no_dtd = self.__no_dtd,
+ encoding = encode_obj.get_codepage(),
bug_handler = RtfInvalidCodeException,
)
tags_obj.convert_to_tags()
diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py
index 4503cbf98a..ae512fa68a 100755
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python
import sys
+
class CheckEncoding:
def __init__(self, bug_handler):
diff --git a/src/calibre/ebooks/rtf2xml/convert_to_tags.py b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
index c2244b784a..6563d2e982 100755
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@@ -1,6 +1,9 @@
import os, tempfile
-from calibre.ebooks.rtf2xml import copy
+
+from calibre.ebooks.rtf2xml import copy, check_encoding
+
public_dtd = 'rtf2xml1.0.dtd'
+
class ConvertToTags:
"""
Convert file to XML
@@ -10,6 +13,7 @@ class ConvertToTags:
bug_handler,
dtd_path,
no_dtd,
+ encoding,
indent = None,
copy = None,
run_level = 1,
@@ -29,9 +33,14 @@ class ConvertToTags:
self.__copy = copy
self.__dtd_path = dtd_path
self.__no_dtd = no_dtd
+ if encoding != 'mac_roman':
+ self.__encoding = 'cp' + encoding
+ else:
+ self.__encoding = 'mac_roman'
self.__indent = indent
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
+
def __initiate_values(self):
"""
Set values, including those for the dictionary.
@@ -61,6 +70,7 @@ class ConvertToTags:
'tx' % info)
+
def __empty_func(self, line):
"""
Print out empty tag and newlines when needed.
@@ -85,6 +96,7 @@ class ConvertToTags:
self.__write_new_line()
if info in self.__two_new_line:
self.__write_extra_new_line()
+
def __open_att_func(self, line):
"""
Process lines for open tags that have attributes.
@@ -119,6 +131,7 @@ class ConvertToTags:
self.__write_new_line()
if element_name in self.__two_new_line:
self.__write_extra_new_line()
+
def __empty_att_func(self, line):
"""
Same as the __open_att_func, except a '/' is placed at the end of the tag.
@@ -143,6 +156,7 @@ class ConvertToTags:
self.__write_new_line()
if element_name in self.__two_new_line:
self.__write_extra_new_line()
+
def __close_func(self, line):
"""
Print out the closed tag and new lines, if appropriate.
@@ -156,6 +170,7 @@ class ConvertToTags:
self.__write_new_line()
if info in self.__two_new_line:
self.__write_extra_new_line()
+
def __text_func(self, line):
"""
Simply print out the information between [17:-1]
@@ -163,6 +178,7 @@ class ConvertToTags:
#tx')
+ #keep maximum compatibility with previous version
+ check_encoding_obj = check_encoding.CheckEncoding(
+ bug_handler = self.__bug_handler,
+ )
+ if not check_encoding_obj.check_encoding(self.__file):
+ self.__write_obj.write('')
+ elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
+ self.__write_obj.write('' % self.__encoding)
+ else:
+ self.__write_obj.write('')
+ sys.stderr.write(_('Bad RTF encoding, revert to US-ASCII chars and hope for the best'))
self.__new_line = 0
self.__write_new_line()
if self.__no_dtd:
@@ -207,6 +236,7 @@ class ConvertToTags:
)
self.__new_line = 0
self.__write_new_line()
+
def convert_to_tags(self):
"""
Read in the file one line at a time. Get the important info, between
@@ -222,18 +252,14 @@ class ConvertToTags:
an empty tag function.
"""
self.__initiate_values()
- read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
self.__write_dec()
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- action = self.__state_dict.get(self.__token_info)
- if action != None:
- action(line)
- read_obj.close()
+ with open(self.__file, 'r') as read_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__state_dict.get(self.__token_info)
+ if action is not None:
+ action(line)
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index a4eeac9663..e145a8a75e 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -132,8 +132,7 @@ class DefaultEncoding:
self.__code_page = '850'
# if __name__ == '__main__':
- # from calibre.ebooks.rtf2xml import default_encoding
- # encode_obj = default_encoding.DefaultEncoding(
+ # encode_obj = DefaultEncoding(
# in_file = sys.argv[1],
# bug_handler = Exception,
# check_raw = True,
From 24cb5514f08c8bf63d59b35f5fd980d126dd49b0 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 7 Jan 2011 08:15:59 +0100
Subject: [PATCH 088/163] ...
---
src/calibre/ebooks/rtf2xml/check_encoding.py | 11 ++++++-----
src/calibre/ebooks/rtf2xml/convert_to_tags.py | 2 +-
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py
index ae512fa68a..7a7b842db6 100755
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@@ -16,7 +16,7 @@ class CheckEncoding:
sys.stderr.write(_('line: %s char: %s\n') % (line_num, char_position))
sys.stderr.write(str(msg) + '\n')
- def check_encoding(self, path, encoding='us-ascii'):
+ def check_encoding(self, path, encoding='us-ascii', verbose = True):
line_num = 0
with open(path, 'r') as read_obj:
for line in read_obj:
@@ -24,10 +24,11 @@ class CheckEncoding:
try:
line.decode(encoding)
except UnicodeError:
- if len(line) < 1000:
- self.__get_position_error(line, encoding, line_num)
- else:
- sys.stderr.write(_('line: %d has bad encoding\n') % line_num)
+ if verbose:
+ if len(line) < 1000:
+ self.__get_position_error(line, encoding, line_num)
+ else:
+ sys.stderr.write(_('line: %d has bad encoding\n') % line_num)
return True
return False
diff --git a/src/calibre/ebooks/rtf2xml/convert_to_tags.py b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
index 6563d2e982..67689eb2d1 100755
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@@ -210,7 +210,7 @@ class ConvertToTags:
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = self.__bug_handler,
)
- if not check_encoding_obj.check_encoding(self.__file):
+ if not check_encoding_obj.check_encoding(self.__file, verbose = False):
self.__write_obj.write('')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
self.__write_obj.write('' % self.__encoding)
From 56bb15d6ff48a36bbe39660631278ab60c246721 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 7 Jan 2011 22:12:49 +0100
Subject: [PATCH 089/163] Various RTF minor changes
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 1 -
src/calibre/ebooks/rtf2xml/delete_info.py | 113 ++++++++++---------
src/calibre/ebooks/rtf2xml/process_tokens.py | 22 ++--
src/calibre/ebooks/rtf2xml/tokenize.py | 2 +-
4 files changed, 69 insertions(+), 69 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index f9036989b0..e994513c68 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -249,7 +249,6 @@ class ParseRtf:
else self.__file.encode('utf-8')
msg = _('File %s does not appear to be correctly encoded.\n') % file_name
raise InvalidRtfException, msg
-
delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file,
copy = self.__copy,
diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py
index f79caa3aae..3c93e028b8 100755
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
class DeleteInfo:
"""Delelet unecessary destination groups"""
def __init__(self,
@@ -29,17 +31,18 @@ class DeleteInfo:
self.__bug_handler = bug_handler
self.__copy = copy
self.__write_to = tempfile.mktemp()
- self.__bracket_count=0
+ self.__bracket_count= 0
self.__ob_count = 0
self.__cb_count = 0
- self.__after_asterisk = 0
+ self.__after_asterisk = False
self.__delete = 0
self.__initiate_allow()
self.__ob = 0
self.__write_cb = 0
self.__run_level = run_level
- self.__found_delete = 0
- self.__list = 0
+ self.__found_delete = False
+ self.__list = False
+
def __initiate_allow(self):
"""
Initiate a list of destination groups which should be printed out.
@@ -69,6 +72,7 @@ class DeleteInfo:
'delete' : self.__delete_func,
'list' : self.__list_func,
}
+
def __default_func(self,line):
"""Handle lines when in no special state. Look for an asterisk to
begin a special state. Otherwise, print out line."""
@@ -81,13 +85,14 @@ class DeleteInfo:
if self.__ob:
self.__write_obj.write(self.__ob)
self.__ob = line
- return 0
+ return False
else:
# write previous bracket, since didn't fine asterisk
if self.__ob:
self.__write_obj.write(self.__ob)
self.__ob = 0
- return 1
+ return True
+
def __delete_func(self,line):
"""Handle lines when in delete state. Don't print out lines
unless the state has ended."""
@@ -95,13 +100,14 @@ class DeleteInfo:
self.__state = 'default'
if self.__write_cb:
self.__write_cb = 0
- return 1
- return 0
+ return True
+ return False
+
def __asterisk_func(self,line):
"""
Determine whether to delete info in group
Note on self.__cb flag.
- If you find that you are in a delete group, and the preivous
+ If you find that you are in a delete group, and the previous
token in not an open bracket (self.__ob = 0), that means
that the delete group is nested inside another acceptable
detination group. In this case, you have alrady written
@@ -110,21 +116,21 @@ class DeleteInfo:
"""
# Test for {\*}, in which case don't enter
# delete state
- self.__after_asterisk = 0 # only enter this function once
- self.__found_delete = 1
+ self.__after_asterisk = False # only enter this function once
+ self.__found_delete = True
if self.__token_info == 'cb 3:
msg = 'flag problem\n'
raise self.__bug_handler, msg
- return 1
+ return True
elif self.__token_info in self.__allowable :
if self.__ob:
self.__write_obj.write(self.__ob)
@@ -132,7 +138,7 @@ class DeleteInfo:
self.__state = 'default'
else:
pass
- return 1
+ return True
elif self.__token_info == 'cw 5:
- msg = 'After an asterisk, and found neither an allowable or non-allowble token\n'
- msg += 'token is "%s"\n' % self.__token_info
+ msg = _('After an asterisk, and found neither an allowable or non-allowble token\n\
+ token is "%s"\n') % self.__token_info
raise self.__bug_handler
if not self.__ob:
self.__write_cb = 1
self.__ob = 0
self.__state = 'delete'
self.__cb_count = 0
- return 0
+ return False
+
def __found_list_func(self, line):
"""
print out control words in this group
"""
self.__state = 'list'
+
def __list_func(self, line):
"""
Check to see if the group has ended.
- Return 1 for all control words.
- Return 0 otherwise.
+ Return True for all control words.
+ Return False otherwise.
"""
if self.__delete_count == self.__cb_count and self.__token_info ==\
'cb 3:
msg = _('No type for "%s" in self.__number_type_dict\n')
raise self.__bug_handler
@@ -634,7 +634,7 @@ class ProcessTokens:
if not lang_name:
lang_name = "not defined"
if self.__run_level > 3:
- msg = 'No entry for number "%s"' % num
+ msg = _('No entry for number "%s"') % num
raise self.__bug_handler, msg
return 'cw<%s<%sfalse<%s\n' % (token, token)
else:
- msg = 'boolean should have some value module process tokens\n'
- msg += 'token is ' + token + "\n"
- msg += "'" + num + "'" + "\n"
+ msg = _("boolean should have some value module process tokens\ntoken is %s\n'%s'\n") % (token, num)
raise self.__bug_handler, msg
def __no_sup_sub_func(self, pre, token, num):
@@ -702,11 +700,9 @@ class ProcessTokens:
numerator = float(re.search('[0-9.\-]+', numerator).group())
except TypeError, msg:
if self.__run_level > 3:
- msg = 'no number to process?\n'
- msg += 'this indicates that the token '
- msg += ' \(\\li\) should have a number and does not\n'
- msg += 'numerator is "%s"\n' % numerator
- msg += 'denominator is "%s"\n' % denominator
+ msg = _('No number to process?\nthis indicates that the token \(\\li\) \
+ should have a number and does not\nnumerator is \
+ "%s"\ndenominator is "%s"\n') % (numerator, denominator)
raise self.__bug_handler, msg
if 5 > self.__return_code:
self.__return_code = 5
@@ -720,17 +716,17 @@ class ProcessTokens:
def split_let_num(self, token):
match_obj = re.search(self.__num_exp,token)
- if match_obj != None:
+ if match_obj is not None:
first = match_obj.group(1)
second = match_obj.group(2)
if not second:
if self.__run_level > 3:
- msg = "token is '%s' \n" % token
+ msg = _("token is '%s' \n") % token
raise self.__bug_handler, msg
return first, 0
else:
if self.__run_level > 3:
- msg = "token is '%s' \n" % token
+ msg = _("token is '%s' \n") % token
raise self.__bug_handler
return token, 0
return first, second
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index d60909a610..de66415f0c 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -117,7 +117,7 @@ class Tokenize:
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
input_file = self.__bin_exp.sub(lambda x: \
- x.group().replace('\n', '') +'\n', input_file)
+ x.group().replace('\n', '') + '\n', input_file)
#split
tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n
From be93bd120ab46c8bfe8959ca1e4186b7992c6fff Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 7 Jan 2011 23:29:35 +0100
Subject: [PATCH 090/163] clean picture handling TODO: update for new rtf
---
src/calibre/ebooks/rtf2xml/pict.py | 108 ++++++++++++-----------------
1 file changed, 43 insertions(+), 65 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/pict.py b/src/calibre/ebooks/rtf2xml/pict.py
index 3a1044520e..be2cd9e600 100755
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
class Pict:
"""Process graphic information"""
def __init__(self,
@@ -36,13 +38,11 @@ class Pict:
self.__ob_count = 0
self.__cb_count = 0
self.__pict_count = 0
- self.__in_pict = 0
- self.__already_found_pict = 0
+ self.__in_pict = False
+ self.__already_found_pict = False
self.__orig_file = orig_file
self.__initiate_pict_dict()
self.__out_file = out_file
- # this is left over
- self.__no_ask = 1
def __initiate_pict_dict(self):
self.__pict_dict = {
@@ -71,57 +71,43 @@ class Pict:
self.__out_file))
else:
dir_name = os.path.dirname(self.__orig_file)
- # self.__output_to_file_func()
self.__dir_name = base_name + "_rtf_pict_dir/"
self.__dir_name = os.path.join(dir_name, self.__dir_name)
if not os.path.isdir(self.__dir_name):
try:
os.mkdir(self.__dir_name)
except OSError, msg:
- msg = str(msg)
- msg += "Couldn't make directory '%s':\n" % (self.__dir_name)
+ msg = _("%sCouldn't make directory '%s':\n") % (str(msg), self.__dir_name)
raise self.__bug_handler
else:
- if self.__no_ask:
- user_response = 'r'
- else:
- msg = 'Do you want to remove all files in %s?\n' % self.__dir_name
- msg += 'Type "r" to remove.\n'
- msg += 'Type any other key to keep files in place.\n'
- sys.stderr.write(msg)
- user_response = raw_input()
- if user_response == 'r':
- if self.__run_level > 1:
- sys.stderr.write('Removing files from old pict directory...\n')
- all_files = os.listdir(self.__dir_name)
- for the_file in all_files:
- the_file = os.path.join(self.__dir_name, the_file)
- try:
- os.remove(the_file)
- except OSError:
- pass
- if self.__run_level > 1:
- sys.stderr.write('Files removed.\n')
+ if self.__run_level > 1:
+ sys.stderr.write(_('Removing files from old pict directory...\n'))
+ all_files = os.listdir(self.__dir_name)
+ for the_file in all_files:
+ the_file = os.path.join(self.__dir_name, the_file)
+ try:
+ os.remove(the_file)
+ except OSError:
+ pass
+ if self.__run_level > 1:
+ sys.stderr.write(_('Files removed.\n'))
def __create_pict_file(self):
"""Create a file for all the pict data to be written to.
"""
self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf')
- write_pic_obj = open(self.__pict_file, 'w')
- write_pic_obj.close()
self.__write_pic_obj = open(self.__pict_file, 'a')
def __in_pict_func(self, line):
if self.__cb_count == self.__pict_br_count:
- self.__in_pict = 0
+ self.__in_pict = False
self.__write_pic_obj.write("}\n")
- return 1
+ return True
else:
action = self.__pict_dict.get(self.__token_info)
if action:
- line = action(line)
- self.__write_pic_obj.write(line)
- return 0
+ self.__write_pic_obj.write(action(line))
+ return False
def __default(self, line, write_obj):
"""Determine if each token marks the beginning of pict data.
@@ -142,50 +128,42 @@ class Pict:
write_obj.write('mi
Date: Fri, 7 Jan 2011 23:36:42 +0100
Subject: [PATCH 091/163] Clean RTF combine borders
---
src/calibre/ebooks/rtf2xml/combine_borders.py | 35 ++++++++++---------
1 file changed, 18 insertions(+), 17 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/combine_borders.py b/src/calibre/ebooks/rtf2xml/combine_borders.py
index 71cd822e30..c0b7185c9b 100755
--- a/src/calibre/ebooks/rtf2xml/combine_borders.py
+++ b/src/calibre/ebooks/rtf2xml/combine_borders.py
@@ -16,7 +16,9 @@
# #
#########################################################################
import os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
class CombineBorders:
"""Combine borders in RTF tokens to make later processing easier"""
def __init__(self,
@@ -32,28 +34,31 @@ class CombineBorders:
self.__state = 'default'
self.__bord_pos = 'default'
self.__bord_att = []
+
def found_bd(self, line):
#cw
Date: Sat, 8 Jan 2011 08:35:59 +0100
Subject: [PATCH 092/163] cleaning RTF footnote
---
src/calibre/ebooks/rtf2xml/footnote.py | 84 ++++++++++++++------------
1 file changed, 47 insertions(+), 37 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/footnote.py b/src/calibre/ebooks/rtf2xml/footnote.py
index a596ca73f6..0027348cde 100755
--- a/src/calibre/ebooks/rtf2xml/footnote.py
+++ b/src/calibre/ebooks/rtf2xml/footnote.py
@@ -16,7 +16,9 @@
# #
#########################################################################
import os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
class Footnote:
"""
Two public methods are available. The first separates all of the
@@ -35,6 +37,7 @@ class Footnote:
self.__copy = copy
self.__write_to = tempfile.mktemp()
self.__found_a_footnote = 0
+
def __first_line_func(self, line):
"""
Print the tag info for footnotes. Check whether footnote is an
@@ -47,6 +50,7 @@ class Footnote:
self.__write_to_foot_obj.write(
'mi%s\n' % self.__footnote_count)
self.__first_line = 0
+
def __in_footnote_func(self, line):
"""Handle all tokens that are part of footnote"""
if self.__first_line:
@@ -68,6 +72,7 @@ class Footnote:
'mi
Date: Sun, 9 Jan 2011 14:47:23 +0100
Subject: [PATCH 093/163] Add metadata to info in RTF metadata plugin
---
src/calibre/ebooks/metadata/rtf.py | 86 ++++++++++++++++++++++--------
1 file changed, 63 insertions(+), 23 deletions(-)
diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py
index ad41125575..bb6392af6d 100644
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@@ -11,6 +11,8 @@ title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(? 6:
- md += '}'
+ md.append(r'{\subject %s}'%(comment,))
+ if options.publisher:
+ publisher = options.publisher.encode('ascii', 'ignore')
+ md.append(r'{\manager %s}'%(publisher,))
+ if options.tags:
+ tags = u', '.join(options.tags)
+ tags = tags.encode('ascii', 'ignore')
+ md.append(r'{\keywords %s}'%(tags,))
+ if len(md) > 1:
+ md.append('}')
stream.seek(0)
src = stream.read()
- ans = src[:6] + md + src[6:]
+ ans = src[:6] + ''.join(md) + src[6:]
stream.seek(0)
stream.write(ans)
@@ -149,14 +171,15 @@ def set_metadata(stream, options):
index = src.rindex('}')
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
src, pos = get_document_info(stream)
- if not src:
+ print 'I was thre'
+ if src is not None:
create_metadata(stream, options)
else:
olen = len(src)
base_pat = r'\{\\name(.*?)(?
Date: Sun, 9 Jan 2011 19:26:39 +0100
Subject: [PATCH 094/163] Correct a bug with file opening and convert to with
---
src/calibre/ebooks/rtf2xml/combine_borders.py | 2 +-
src/calibre/ebooks/rtf2xml/footnote.py | 66 +++++++------------
2 files changed, 26 insertions(+), 42 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/combine_borders.py b/src/calibre/ebooks/rtf2xml/combine_borders.py
index c0b7185c9b..a0bc77e7ad 100755
--- a/src/calibre/ebooks/rtf2xml/combine_borders.py
+++ b/src/calibre/ebooks/rtf2xml/combine_borders.py
@@ -78,7 +78,7 @@ class CombineBorders:
self.add_to_border_desc(line)
def combine_borders(self):
- with open(self.__file, 'r') as read_obj,
+ with open(self.__file, 'r') as read_obj, \
open(self.__write_to, 'w') as write_obj:
for line in read_obj:
self.__first_five = line[0:5]
diff --git a/src/calibre/ebooks/rtf2xml/footnote.py b/src/calibre/ebooks/rtf2xml/footnote.py
index 0027348cde..c1ffb18ada 100755
--- a/src/calibre/ebooks/rtf2xml/footnote.py
+++ b/src/calibre/ebooks/rtf2xml/footnote.py
@@ -119,14 +119,11 @@ class Footnote:
bottom of the main file.
"""
self.__initiate_sep_values()
- self.__write_obj = open(self.__write_to, 'w')
- with open(self.__file) as read_obj:
- self.__footnote_holder = tempfile.mktemp()
- self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
+ self.__footnote_holder = tempfile.mktemp()
+ with open(self.__file) as read_obj, \
+ open(self.__write_to, 'w') as self.__write_obj, \
+ open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+ for line in read_obj:
self.__token_info = line[:16]
# keep track of opening and closing brackets
if self.__token_info == 'ob
Date: Sun, 9 Jan 2011 20:46:15 +0100
Subject: [PATCH 095/163] Add pict.rtf if debugging + simplify extract images
in RTFinput
---
src/calibre/ebooks/rtf/input.py | 51 ++++++++++++++++++++----------
src/calibre/ebooks/rtf2xml/pict.py | 4 ++-
2 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 05c851a075..545c1fb3c8 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -127,35 +127,52 @@ class RTFInput(InputFormatPlugin):
def extract_images(self, picts):
self.log('Extracting images...')
-
- count = 0
+
raw = open(picts, 'rb').read()
- starts = []
- for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
- starts.append(match.start(1))
-
+ picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
+ hex = re.compile(r'[^a-zA-Z0-9]')
+ encs = [hex.sub('', pict) for pict in picts]
+
+ count = 0
imap = {}
-
- for start in starts:
- pos, bc = start, 1
- while bc > 0:
- if raw[pos] == '}': bc -= 1
- elif raw[pos] == '{': bc += 1
- pos += 1
- pict = raw[start:pos+1]
- enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
+ for enc in encs:
if len(enc) % 2 == 1:
enc = enc[:-1]
data = enc.decode('hex')
count += 1
- name = (('%4d'%count).replace(' ', '0'))+'.wmf'
+ name = '%04d.wmf' % count
open(name, 'wb').write(data)
imap[count] = name
#open(name+'.hex', 'wb').write(enc)
return self.convert_images(imap)
+ # count = 0
+ # raw = open(picts, 'rb').read()
+ # starts = []
+ # for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
+ # starts.append(match.start(1))
+
+ # imap = {}
+ # for start in starts:
+ # pos, bc = start, 1
+ # while bc > 0:
+ # if raw[pos] == '}': bc -= 1
+ # elif raw[pos] == '{': bc += 1
+ # pos += 1
+ # pict = raw[start:pos+1]
+ # enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
+ # if len(enc) % 2 == 1:
+ # enc = enc[:-1]
+ # data = enc.decode('hex')
+ # count += 1
+ # name = (('%4d'%count).replace(' ', '0'))+'.wmf'
+ # open(name, 'wb').write(data)
+ # imap[count] = name
+ # #open(name+'.hex', 'wb').write(enc)
+ # return self.convert_images(imap)
+
def convert_images(self, imap):
- for count, val in imap.items():
+ for count, val in imap.iteritems():
try:
imap[count] = self.convert_image(val)
except:
diff --git a/src/calibre/ebooks/rtf2xml/pict.py b/src/calibre/ebooks/rtf2xml/pict.py
index be2cd9e600..a6cc2deade 100755
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@@ -146,7 +146,8 @@ class Pict:
def process_pict(self):
self.__make_dir()
- with open(self.__file) as read_obj, open(self.__write_to, 'w') as write_obj:
+ with open(self.__file) as read_obj, \
+ open(self.__write_to, 'w') as write_obj:
for line in read_obj:
self.__token_info = line[:16]
if self.__token_info == 'ob
Date: Sun, 9 Jan 2011 21:16:00 +0100
Subject: [PATCH 096/163] Remove category field in rtf metadata plugin
---
src/calibre/ebooks/metadata/rtf.py | 21 ++-------------------
1 file changed, 2 insertions(+), 19 deletions(-)
diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py
index bb6392af6d..f88250e72a 100644
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@@ -10,7 +10,6 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?
Date: Sun, 9 Jan 2011 22:35:33 +0100
Subject: [PATCH 097/163] Replace keywords by category in rtf metadata
---
src/calibre/ebooks/metadata/rtf.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py
index f88250e72a..3e316ee430 100644
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@@ -10,7 +10,7 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(? 1:
md.append('}')
stream.seek(0)
@@ -198,11 +198,11 @@ def set_metadata(stream, options):
if tags is not None:
tags = ', '.join(tags)
tags = tags.encode('ascii', 'ignore')
- pat = re.compile(base_pat.replace('name', 'keywords'), re.DOTALL)
+ pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
if pat.search(src):
- src = pat.sub(r'{\\keywords ' + tags + r'}', src)
+ src = pat.sub(r'{\\category ' + tags + r'}', src)
else:
- src = add_metadata_item(src, 'keywords', tags)
+ src = add_metadata_item(src, 'category', tags)
publisher = options.publisher
if publisher is not None:
publisher = publisher.encode('ascii', 'replace')
From 6e831360a29d9f9e181f238a5565bff3fb7dc253 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 10 Jan 2011 21:27:45 +0100
Subject: [PATCH 098/163] Fix 0 case in rtf cp + case when there is no pictures
in a file
---
src/calibre/ebooks/rtf2xml/default_encoding.py | 6 ++++--
src/calibre/ebooks/rtf2xml/pict.py | 5 ++++-
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index e145a8a75e..c7e030e48b 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -101,7 +101,7 @@ class DefaultEncoding:
break
if self.__token_info == 'cw
Date: Mon, 10 Jan 2011 23:52:34 +0100
Subject: [PATCH 099/163] Improve hard line break handling in RTF
---
src/calibre/ebooks/rtf2xml/inline.py | 8 +-------
src/calibre/ebooks/rtf2xml/process_tokens.py | 5 ++++-
2 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/inline.py b/src/calibre/ebooks/rtf2xml/inline.py
index 5ca1cd0783..55e6ed1dbb 100755
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@@ -51,7 +51,6 @@ class Inline:
'tx ml
'*' : ('ml', 'asterisk__', self.default_func),
':' : ('ml', 'colon_____', self.default_func),
@@ -77,7 +78,6 @@ class ProcessTokens:
'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func),
- 'line' : ('nu', 'hard-lineb', self.default_func), #calibre
#'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func),
@@ -605,6 +605,9 @@ class ProcessTokens:
def ms_sub_func(self, pre, token, num):
return 'tx
Date: Tue, 11 Jan 2011 00:00:20 +0100
Subject: [PATCH 100/163] clean rtf2xml inline
---
src/calibre/ebooks/rtf2xml/inline.py | 78 +++++++++++++++-------------
1 file changed, 43 insertions(+), 35 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/inline.py b/src/calibre/ebooks/rtf2xml/inline.py
index 55e6ed1dbb..83c383fa1f 100755
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@@ -1,5 +1,7 @@
import sys, os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
"""
States.
1. default
@@ -36,6 +38,7 @@ class Inline:
self.__copy = copy
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
+
def __initiate_values(self):
"""
Initiate all values.
@@ -81,12 +84,12 @@ class Inline:
self.__in_para = 0 # not in paragraph
self.__char_dict = {
# character info => ci
- 'annotation' : 'annotation',
+ 'annotation' : 'annotation',
'blue______' : 'blue',
'bold______' : 'bold',
- 'caps______' : 'caps',
- 'char-style' : 'character-style',
- 'dbl-strike' : 'double-strike-through',
+ 'caps______' : 'caps',
+ 'char-style' : 'character-style',
+ 'dbl-strike' : 'double-strike-through',
'emboss____' : 'emboss',
'engrave___' : 'engrave',
'font-color' : 'font-color',
@@ -94,7 +97,7 @@ class Inline:
'font-size_' : 'font-size',
'font-style' : 'font-style',
'font-up___' : 'superscript',
- 'footnot-mk' : 'footnote-marker',
+ 'footnot-mk' : 'footnote-marker',
'green_____' : 'green',
'hidden____' : 'hidden',
'italics___' : 'italics',
@@ -105,9 +108,10 @@ class Inline:
'strike-thr' : 'strike-through',
'subscript_' : 'subscript',
'superscrip' : 'superscript',
- 'underlined' : 'underlined',
+ 'underlined' : 'underlined',
}
self.__caps_list = ['false']
+
def __set_list_func(self, line):
"""
Requires:
@@ -126,6 +130,7 @@ class Inline:
self.__place = 'in_list'
self.__inline_list = self.__list_inline_list
self.__groups_in_waiting = self.__groups_in_waiting_list
+
def __default_func(self, line):
"""
Requires:
@@ -138,8 +143,8 @@ class Inline:
action = self.__default_dict.get(self.__token_info)
if action:
action(line)
- if self.__token_info != 'cw 3:
- msg = 'self.__inline_list is %s\n' % self.__inline_list
+ msg = _('self.__inline_list is %s\n') % self.__inline_list
raise self.__bug_handler, msg
self.__write_obj.write('error\n')
self.__groups_in_waiting[0] = 0
@@ -308,6 +317,7 @@ class Inline:
self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
self.__write_obj.write('\n')
self.__groups_in_waiting[0] = 0
+
def __end_para_func(self, line):
"""
Requires:
@@ -336,6 +346,7 @@ class Inline:
self.__write_obj.write('mi%s' % (the_key, the_dict[the_key]))
self.__write_obj.write('\n')
self.__groups_in_waiting[0] = 0
+
def __found_field_func(self, line):
"""
Just a default function to make sure I don't prematurely exit
default state
"""
pass
+
def form_tags(self):
"""
Requires:
@@ -380,32 +393,27 @@ class Inline:
the state.
"""
self.__initiate_values()
- read_obj = open(self.__file, 'r')
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- token = line[0:-1]
- self.__token_info = ''
- if token == 'tx
Date: Tue, 11 Jan 2011 00:04:41 +0100
Subject: [PATCH 101/163] use calibre function to clean lower ascii char in
rtf2xml
---
.../ebooks/rtf2xml/replace_illegals.py | 20 ++++++++-----------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/replace_illegals.py b/src/calibre/ebooks/rtf2xml/replace_illegals.py
index 901cdd289d..9c5c1ef0e9 100755
--- a/src/calibre/ebooks/rtf2xml/replace_illegals.py
+++ b/src/calibre/ebooks/rtf2xml/replace_illegals.py
@@ -16,7 +16,10 @@
# #
#########################################################################
import os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+from calibre.utils.cleantext import clean_ascii_chars
+
class ReplaceIllegals:
"""
reaplace illegal lower ascii characters
@@ -30,21 +33,14 @@ class ReplaceIllegals:
self.__copy = copy
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
+
def replace_illegals(self):
"""
"""
- nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19]
- read_obj = open(self.__file, 'r')
- write_obj = open(self.__write_to, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- for num in nums:
- line = line.replace(chr(num), '')
- write_obj.write(line)
- read_obj.close()
- write_obj.close()
+ with open(self.__file, 'r') as read_obj, \
+ open(self.__write_to, 'w') as write_obj:
+ for line in read_obj:
+ write_obj.write(clean_ascii_chars(line))
copy_obj = copy.Copy()
if self.__copy:
copy_obj.copy_file(self.__write_to, "replace_illegals.data")
From 7ea92e2c672d3e4ef315cd98c9466254304c466d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 11 Jan 2011 00:30:24 +0100
Subject: [PATCH 102/163] ...
---
src/calibre/ebooks/rtf2xml/inline.py | 4 ++--
src/calibre/ebooks/rtf2xml/process_tokens.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/inline.py b/src/calibre/ebooks/rtf2xml/inline.py
index 83c383fa1f..8949cf35ad 100755
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@@ -143,7 +143,7 @@ class Inline:
action = self.__default_dict.get(self.__token_info)
if action:
action(line)
- self.__write_obj.write(line)
+ self.__write_obj.write(line)
def __found_open_bracket_func(self, line):
"""
@@ -410,7 +410,7 @@ class Inline:
self.__token_info = line[:16]
self.__set_list_func(line)
action = self.__state_dict.get(self.__state)
- if action == None:
+ if action is None:
sys.stderr.write(_('No matching state in module inline_for_lists.py\n'))
sys.stderr.write(self.__state + '\n')
action(line)
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 9f26bb295b..1033ebc583 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -52,7 +52,7 @@ class ProcessTokens:
self.__return_code = 0
self.dict_token={
# unicode
- 'mshex' : ('nu', '__________', self.__ms_hex_func),
+ 'mshex' : ('nu', '__________', self.__ms_hex_func),
# brackets
'{' : ('nu', '{', self.ob_func),
'}' : ('nu', '}', self.cb_func),
From 056f52c84361ce265602541344184d764f92d28d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 11 Jan 2011 13:41:23 +0100
Subject: [PATCH 103/163] Integrate rtf2xml debug process to calibre
---
src/calibre/ebooks/rtf/input.py | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 422105e5b3..3f9eda374f 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -77,7 +77,18 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
- ofile = 'out.xml'
+ debug_dir = getattr(self.opts, 'debug_pipeline', None)
+ run_lev = 1
+ if debug_dir is not None:
+ try:
+ debug_dir = os.path.abspath(os.path.normpath(debug_dir + u'/rtfdebug/'))
+ os.makedirs(debug_dir)
+ run_lev = 6
+ except OSError, ( errno, strerror ):
+ print strerror
+ print errno
+ debug_dir = None
+ ofile = 'dataxml.xml'
parser = ParseRtf(
in_file = stream,
out_file = ofile,
@@ -117,12 +128,13 @@ class RTFInput(InputFormatPlugin):
empty_paragraphs = 1,
#debug
- deb_dir = "D:\\Mes eBooks\\Developpement\\debug\\rtfdebug",
- run_level = 3
+ deb_dir = debug_dir,
+ run_level = run_lev,
)
parser.parse_rtf()
- ans = open('out.xml').read()
- os.remove('out.xml')
+ ans = open('dataxml.xml').read()
+ if debug_dir is None:
+ os.remove('dataxml.xml')
return ans
def extract_images(self, picts):
@@ -213,7 +225,7 @@ class RTFInput(InputFormatPlugin):
css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes)
- for cls, val in border_styles.items():
+ for cls, val in border_styles.iteritems():
css += '\n\n.%s {\n%s\n}'%(cls, val)
with open('styles.css', 'ab') as f:
@@ -277,9 +289,6 @@ class RTFInput(InputFormatPlugin):
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
- with open('dataxml.xml', 'w') as dataxml:
- dataxml.write(xml)
-
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
imap = {}
From 576cac2b98cec1fd620d14bffd341f6cd62bdb44 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 11 Jan 2011 18:20:31 +0100
Subject: [PATCH 104/163] Modify rtf2xml debug parameters
---
src/calibre/ebooks/rtf/input.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 3f9eda374f..c07764c744 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -81,9 +81,9 @@ class RTFInput(InputFormatPlugin):
run_lev = 1
if debug_dir is not None:
try:
- debug_dir = os.path.abspath(os.path.normpath(debug_dir + u'/rtfdebug/'))
- os.makedirs(debug_dir)
- run_lev = 6
+ debug_dir = os.path.normpath('rtfdebug/')
+ os.mkdir(debug_dir)
+ run_lev = 4
except OSError, ( errno, strerror ):
print strerror
print errno
From 9ed1e9419081a984161c20b44dcce7940fc8a072 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 11 Jan 2011 18:39:55 +0100
Subject: [PATCH 105/163] Modify delete_info in rtf2xml
---
src/calibre/ebooks/rtf/input.py | 2 -
src/calibre/ebooks/rtf2xml/delete_info.py | 49 +++++++++++------------
2 files changed, 23 insertions(+), 28 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index c07764c744..981a930d54 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -133,8 +133,6 @@ class RTFInput(InputFormatPlugin):
)
parser.parse_rtf()
ans = open('dataxml.xml').read()
- if debug_dir is None:
- os.remove('dataxml.xml')
return ans
def extract_images(self, picts):
diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py
index 3c93e028b8..3a7442addc 100755
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@@ -34,14 +34,14 @@ class DeleteInfo:
self.__bracket_count= 0
self.__ob_count = 0
self.__cb_count = 0
- self.__after_asterisk = False
- self.__delete = 0
+ # self.__after_asterisk = False
+ # self.__delete = 0
self.__initiate_allow()
self.__ob = 0
- self.__write_cb = 0
+ self.__write_cb = False
self.__run_level = run_level
self.__found_delete = False
- self.__list = False
+ # self.__list = False
def __initiate_allow(self):
"""
@@ -69,7 +69,7 @@ class DeleteInfo:
self.__state_dict = {
'default' : self.__default_func,
'after_asterisk' : self.__asterisk_func,
- 'delete' : self.__delete_func,
+ 'delete' : self.__delete_func,
'list' : self.__list_func,
}
@@ -99,7 +99,7 @@ class DeleteInfo:
if self.__delete_count == self.__cb_count:
self.__state = 'default'
if self.__write_cb:
- self.__write_cb = 0
+ self.__write_cb = True
return True
return False
@@ -116,7 +116,7 @@ class DeleteInfo:
"""
# Test for {\*}, in which case don't enter
# delete state
- self.__after_asterisk = False # only enter this function once
+ # self.__after_asterisk = False # only enter this function once
self.__found_delete = True
if self.__token_info == 'cb 3:
- msg = 'flag problem\n'
+ msg = _('flag problem\n')
raise self.__bug_handler, msg
return True
elif self.__token_info in self.__allowable :
@@ -144,18 +144,18 @@ class DeleteInfo:
self.__found_list_func(line)
elif self.__token_info in self.__not_allowable:
if not self.__ob:
- self.__write_cb = 1
+ self.__write_cb = False
self.__ob = 0
self.__state = 'delete'
self.__cb_count = 0
return False
else:
if self.__run_level > 5:
- msg = _('After an asterisk, and found neither an allowable or non-allowble token\n\
+ msg = _('After an asterisk, and found neither an allowable or non-allowable token\n\
token is "%s"\n') % self.__token_info
- raise self.__bug_handler
+ raise self.__bug_handler, msg
if not self.__ob:
- self.__write_cb = 1
+ self.__write_cb = True
self.__ob = 0
self.__state = 'delete'
self.__cb_count = 0
@@ -177,7 +177,7 @@ class DeleteInfo:
'cb
Date: Tue, 11 Jan 2011 20:28:13 +0100
Subject: [PATCH 106/163] Modify rtf2xml debug parameters (2)
---
src/calibre/ebooks/rtf/input.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 981a930d54..915ca55fc1 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -77,18 +77,19 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
- debug_dir = getattr(self.opts, 'debug_pipeline', None)
+ ofile = 'dataxml.xml'
run_lev = 1
- if debug_dir is not None:
+ if hasattr(self.opts, 'debug_pipeline'):
try:
- debug_dir = os.path.normpath('rtfdebug/')
+ debug_dir = 'rtfdebug'
os.mkdir(debug_dir)
run_lev = 4
except OSError, ( errno, strerror ):
print strerror
print errno
debug_dir = None
- ofile = 'dataxml.xml'
+ else:
+ debug_dir = None
parser = ParseRtf(
in_file = stream,
out_file = ofile,
From 10c2e603e29dd7305a25704f4b7711a85cca7af4 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 15 Jan 2011 13:21:13 +0100
Subject: [PATCH 107/163] Correct handling of \ as \par for old RTF
---
src/calibre/ebooks/rtf/input.py | 1 -
src/calibre/ebooks/rtf2xml/ParseRtf.py | 11 ++++---
.../ebooks/rtf2xml/default_encoding.py | 1 +
src/calibre/ebooks/rtf2xml/process_tokens.py | 4 +--
src/calibre/ebooks/rtf2xml/tokenize.py | 33 +++++++++++++++----
5 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 5907bf6b55..a6b8c86e79 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -312,7 +312,6 @@ class RTFInput(InputFormatPlugin):
try:
xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e:
- raise
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 902ad09c30..73f8f04e1c 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -226,10 +226,6 @@ class ParseRtf:
try:
return_value = process_tokens_obj.process_tokens()
except InvalidRtfException, msg:
- try:
- os.remove(self.__temp_file)
- except OSError:
- pass
#Check to see if the file is correctly encoded
encode_obj = default_encoding.DefaultEncoding(
in_file = self.__temp_file,
@@ -244,11 +240,16 @@ class ParseRtf:
enc = encode_obj.get_codepage()
if enc != 'mac_roman':
enc = 'cp' + enc
+ msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8')
msg = 'File %s does not appear to be correctly encoded.\n' % file_name
- raise InvalidRtfException, msg
+ try:
+ os.remove(self.__temp_file)
+ except OSError:
+ pass
+ raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file,
copy = self.__copy,
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index aec33943a9..53887e0d90 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -3,6 +3,7 @@
# copyright 2002 Paul Henry Tremblay #
# #
#########################################################################
+
'''
Codepages as to RTF 1.9.1:
437 United States IBM
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index ff4fbe110c..5066843976 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -70,7 +70,7 @@ class ProcessTokens:
';' : ('mc', ';', self.ms_sub_func),
# this must be wrong
'-' : ('mc', '-', self.ms_sub_func),
- 'line' : ('mi', 'hardline-break', self.hardline_func), #calibre
+ 'line' : ('mi', 'hardline-break', self.direct_conv_func), #calibre
# misc => ml
'*' : ('ml', 'asterisk__', self.default_func),
':' : ('ml', 'colon_____', self.default_func),
@@ -605,7 +605,7 @@ class ProcessTokens:
def ms_sub_func(self, pre, token, num):
return 'tx ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
@@ -127,7 +131,7 @@ class Tokenize:
# this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
- #(self.__remove_line.sub('', x) for x in tokens))
+ #(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self):
SIMPLE_RPL = {
@@ -153,8 +157,6 @@ class Tokenize:
# put a backslash in front of to eliminate special cases and
# make processing easier
"}": "\\}",
- # this is for older RTF
- r'\\$': '\\par ',
}
self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u
@@ -168,10 +170,12 @@ class Tokenize:
#why keep backslash whereas \is replaced before?
#remove \n from endline char
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
+ #this is for old RTF
+ self.__par_exp = re.compile(r'\\\n+')
+ # self.__par_exp = re.compile(r'\\$')
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
- #self.__par_exp = re.compile(r'\\$')
#self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
@@ -199,7 +203,24 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data")
+ # if self.__out_file:
+ # self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)
- #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
\ No newline at end of file
+ #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
+
+# import sys
+# def main(args=sys.argv):
+ # if len(args) < 1:
+ # print 'No file'
+ # return
+ # file = 'data_tokens.txt'
+ # if len(args) == 3:
+ # file = args[2]
+ # to = Tokenize(args[1], Exception, out_file = file)
+ # to.tokenize()
+
+
+# if __name__ == '__main__':
+ # sys.exit(main())
\ No newline at end of file
From 93ef1699dfd732596ad9f10f08aff7aed43eaa21 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 15 Jan 2011 16:11:28 +0100
Subject: [PATCH 108/163] Modify mac-roman encoding, now go to 10000
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 4 +-
.../ebooks/rtf2xml/default_encoding.py | 57 +++++++++++--------
src/calibre/ebooks/rtf2xml/process_tokens.py | 1 -
3 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 73f8f04e1c..442f5f4ac3 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -237,9 +237,7 @@ class ParseRtf:
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException,
)
- enc = encode_obj.get_codepage()
- if enc != 'mac_roman':
- enc = 'cp' + enc
+ enc = 'cp' + encode_obj.get_codepage()
msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index 53887e0d90..31122318b6 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -74,9 +74,6 @@ class DefaultEncoding:
if not self.__datafetched:
self._encoding()
self.__datafetched = True
- if self.__platform == 'Macintosh':
- code_page = self.__code_page
- else:
code_page = 'ansicpg' + self.__code_page
return self.__platform, code_page, self.__default_num
@@ -94,49 +91,59 @@ class DefaultEncoding:
def _encoding(self):
with open(self.__file, 'r') as read_obj:
+ cpfound = False
if not self.__fetchraw:
for line in read_obj:
self.__token_info = line[:16]
if self.__token_info == 'mi pf
'page' : ('pf', 'page-break', self.default_func),
'par' : ('pf', 'par-end___', self.default_func),
From 55616a4e2d8c525463e6c440f7e4112ac0782f5f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 15 Jan 2011 20:51:39 +0100
Subject: [PATCH 109/163] Update info handling to rev RTF 1.9.1 TODO: integrate
\userprops
---
src/calibre/ebooks/rtf2xml/info.py | 69 ++++++++++++++------
src/calibre/ebooks/rtf2xml/process_tokens.py | 16 ++++-
2 files changed, 62 insertions(+), 23 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/info.py b/src/calibre/ebooks/rtf2xml/info.py
index ad0fb8ec06..9f2905f31b 100755
--- a/src/calibre/ebooks/rtf2xml/info.py
+++ b/src/calibre/ebooks/rtf2xml/info.py
@@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
class Info:
"""
Make tags for document-information
@@ -42,6 +44,7 @@ class Info:
self.__copy = copy
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
+
def __initiate_values(self):
"""
Initiate all values.
@@ -58,27 +61,49 @@ class Info:
self.__info_table_dict = {
'cw33\n
+
def __collect_tokens_func(self, line):
"""
Requires:
@@ -194,18 +224,19 @@ class Info:
att = line[6:16]
value = line[20:-1]
att_changed = self.__token_dict.get(att)
- if att_changed == None:
+ if att_changed is None:
if self.__run_level > 3:
- msg = 'no dictionary match for %s\n' % att
+ msg = 'No dictionary match for %s\n' % att
raise self.__bug_handler, msg
else:
self.__text_string += '<%s>%s' % (att_changed, value)
+
def __single_field_func(self, line, tag):
value = line[20:-1]
self.__write_obj.write(
- 'mi%s\n' % (tag, tag, value)
+ 'mi%s\n' % (tag, tag, value)
)
+
def __after_info_table_func(self, line):
"""
Requires:
@@ -217,6 +248,7 @@ class Info:
the file.
"""
self.__write_obj.write(line)
+
def fix_info(self):
"""
Requires:
@@ -234,20 +266,15 @@ class Info:
information table, simply write the line to the output file.
"""
self.__initiate_values()
- read_obj = open(self.__file, 'r')
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- action = self.__state_dict.get(self.__state)
- if action == None:
- sys.stderr.write('no no matching state in module styles.py\n')
- sys.stderr.write(self.__state + '\n')
- action(line)
- read_obj.close()
- self.__write_obj.close()
+ with open(self.__file, 'r') as read_obj:
+ with open(self.__write_to, 'wb') as self.__write_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__state_dict.get(self.__state)
+ if action is None:
+ sys.stderr.write('No matching state in module styles.py\n')
+ sys.stderr.write(self.__state + '\n')
+ action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "info.data")
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 6ff0519dc2..56e61d2b60 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -230,11 +230,15 @@ class ProcessTokens:
'trhdr' : ('tb', 'row-header', self.default_func),
# preamble => pr
# document information => di
+ # TODO integrate \userprops
'info' : ('di', 'doc-info__', self.default_func),
+ 'title' : ('di', 'title_____', self.default_func),
'author' : ('di', 'author____', self.default_func),
'operator' : ('di', 'operator__', self.default_func),
- 'title' : ('di', 'title_____', self.default_func),
+ 'manager' : ('di', 'manager___', self.default_func),
+ 'company' : ('di', 'company___', self.default_func),
'keywords' : ('di', 'keywords__', self.default_func),
+ 'category' : ('di', 'category__', self.default_func),
'doccomm' : ('di', 'doc-notes_', self.default_func),
'comment' : ('di', 'doc-notes_', self.default_func),
'subject' : ('di', 'subject___', self.default_func),
@@ -243,11 +247,19 @@ class ProcessTokens:
'mo' : ('di', 'month_____', self.default_func),
'dy' : ('di', 'day_______', self.default_func),
'min' : ('di', 'minute____', self.default_func),
+ 'sec' : ('di', 'second____', self.default_func),
'revtim' : ('di', 'revis-time', self.default_func),
+ 'edmins' : ('di', 'edit-time_', self.default_func),
+ 'printim' : ('di', 'print-time', self.default_func),
+ 'buptim' : ('di', 'backuptime', self.default_func),
'nofwords' : ('di', 'num-of-wor', self.default_func),
'nofchars' : ('di', 'num-of-chr', self.default_func),
+ 'nofcharsws' : ('di', 'numofchrws', self.default_func),
'nofpages' : ('di', 'num-of-pag', self.default_func),
- 'edmins' : ('di', 'edit-time_', self.default_func),
+ 'version' : ('di', 'version___', self.default_func),
+ 'vern' : ('di', 'intern-ver', self.default_func),
+ 'hlinkbase' : ('di', 'linkbase__', self.default_func),
+ 'id' : ('di', 'internalID', self.default_func),
# headers and footers => hf
'headerf' : ('hf', 'head-first', self.default_func),
'headerl' : ('hf', 'head-left_', self.default_func),
From 77ce7b9c7697cb960ff410b50ad66652e0ce14ec Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 15 Jan 2011 21:38:22 +0100
Subject: [PATCH 110/163] Handling of company tag in info
---
src/calibre/ebooks/rtf2xml/delete_info.py | 34 ++++++++---------------
1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py
index 3ffff7d73a..b3b5bdcad7 100755
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@@ -20,7 +20,7 @@ import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
class DeleteInfo:
- """Delelet unecessary destination groups"""
+ """Delete unecessary destination groups"""
def __init__(self,
in_file ,
bug_handler,
@@ -31,17 +31,14 @@ class DeleteInfo:
self.__bug_handler = bug_handler
self.__copy = copy
self.__write_to = tempfile.mktemp()
+ self.__run_level = run_level
+ self.__initiate_allow()
self.__bracket_count= 0
self.__ob_count = 0
self.__cb_count = 0
- # self.__after_asterisk = False
- # self.__delete = 0
- self.__initiate_allow()
self.__ob = 0
self.__write_cb = False
- self.__run_level = run_level
self.__found_delete = False
- # self.__list = False
def __initiate_allow(self):
"""
@@ -57,6 +54,8 @@ class DeleteInfo:
'cw 3:
- msg = 'flag problem\n'
+ msg = 'Flag problem\n'
raise self.__bug_handler, msg
return True
elif self.__token_info in self.__allowable :
@@ -173,8 +171,8 @@ class DeleteInfo:
Return True for all control words.
Return False otherwise.
"""
- if self.__delete_count == self.__cb_count and self.__token_info ==\
- 'cb
Date: Sat, 15 Jan 2011 21:51:37 +0100
Subject: [PATCH 111/163] Remove empty tags in info
---
src/calibre/ebooks/rtf2xml/info.py | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/info.py b/src/calibre/ebooks/rtf2xml/info.py
index 9f2905f31b..f5f1c5851c 100755
--- a/src/calibre/ebooks/rtf2xml/info.py
+++ b/src/calibre/ebooks/rtf2xml/info.py
@@ -15,7 +15,7 @@
# #
# #
#########################################################################
-import sys, os, tempfile
+import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import copy
@@ -51,6 +51,7 @@ class Info:
"""
self.__text_string = ''
self.__state = 'before_info_table'
+ self.rmspace = re.compile(r'\s+')
self.__state_dict = {
'before_info_table': self.__before_info_table_func,
'after_info_table': self.__after_info_table_func,
@@ -167,11 +168,13 @@ class Info:
"""
if self.__token_info == 'mi
Date: Sun, 16 Jan 2011 00:47:01 +0100
Subject: [PATCH 112/163] Handle inproper \*\csN in body without braces
---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 2 +-
src/calibre/ebooks/rtf2xml/delete_info.py | 5 ++-
src/calibre/ebooks/rtf2xml/fields_small.py | 52 +++++++++++++---------
src/calibre/ebooks/rtf2xml/tokenize.py | 4 +-
4 files changed, 39 insertions(+), 24 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 442f5f4ac3..a28b6f81da 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -375,7 +375,7 @@ class ParseRtf:
old_rtf = old_rtf_obj.check_if_old_rtf()
if old_rtf:
if self.__run_level > 5:
- msg = 'older RTF\n'
+ msg = 'Older RTF\n'
msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg
if self.__run_level > 1:
diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py
index b3b5bdcad7..80d2a2b2bd 100755
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@@ -48,6 +48,7 @@ class DeleteInfo:
'cw%s' % sub_entry
my_changed_string += '\n'
return my_changed_string
+
def __index_see_func(self, my_string):
in_see = 0
bracket_count = 0
@@ -226,6 +235,7 @@ file.
in_see = 1
changed_string += '%s\n' % line
return changed_string, see_string
+
def __index_bookmark_func(self, my_string):
"""
Requries:
@@ -262,6 +272,7 @@ file.
in_bookmark = 1
index_string += '%s\n' % line
return index_string, bookmark_string
+
def __index__format_func(self, my_string):
italics = 0
bold =0
@@ -273,6 +284,7 @@ file.
if token_info == 'cw%s' % main_entry
my_changed_string += '\n'
return my_changed_string
+
def __parse_bookmark_for_toc(self, my_string):
"""
Requires:
@@ -353,6 +366,7 @@ file.
in_bookmark = 1
toc_string += '%s\n' % line
return toc_string, book_start_string, book_end_string
+
def __parse_bookmark_func(self, my_string, type):
"""
Requires:
@@ -367,6 +381,7 @@ file.
my_changed_string = ('mi%s'
'%snone\n' % (type, my_string))
return my_changed_string
+
def __found_toc_index_func(self, line, tag):
"""
Requires:
@@ -382,6 +397,7 @@ file.
self.__cb_count = 0
self.__state = 'toc_index'
self.__tag = tag
+
def __toc_index_func(self, line):
"""
Requires:
@@ -409,6 +425,7 @@ file.
self.__write_obj.write(line)
else:
self.__text_string += line
+
def fix_fields(self):
"""
Requires:
@@ -423,24 +440,19 @@ file.
bookmark.
"""
self.__initiate_values()
- read_obj = open(self.__file)
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = '1'
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- if self.__token_info == 'ob", input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
@@ -172,6 +172,8 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF
self.__par_exp = re.compile(r'\\\n+')
+ #handle improper cs char-style with \* before without {
+ self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
# self.__par_exp = re.compile(r'\\$')
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
From c81f75f6f6f02e68cf77485aebad44dd28a7594e Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 16 Jan 2011 12:05:00 +0100
Subject: [PATCH 113/163] Partial fix for blank line in RTFInput
---
resources/templates/rtf.xsl | 9 ++++-----
src/calibre/ebooks/rtf/input.py | 33 +++++----------------------------
2 files changed, 9 insertions(+), 33 deletions(-)
diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl
index 6db1c0388d..58536186d9 100644
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@@ -220,7 +220,7 @@
-
+
padding-left:
@@ -260,11 +260,11 @@
text-decoration:underline;
-
+
text-align: justify;
@@ -314,7 +314,6 @@
-
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index a6b8c86e79..bf7d11c7ed 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -78,13 +78,14 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
- run_lev, debug_dir = 1, None
+ run_lev, debug_dir, indent_out = 1, None, 0
#just to check if the debug process is lauched, no need of this directory in fact
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir('rtfdebug')
debug_dir = 'rtfdebug'
run_lev = 4
+ indent_out = 1
except:
pass
parser = ParseRtf(
@@ -108,7 +109,7 @@ class RTFInput(InputFormatPlugin):
# Indent resulting XML.
# Default is 0 (no indent).
- indent = 1,
+ indent = indent_out,
# Form lists from RTF. Default is 1.
form_lists = 1,
@@ -157,34 +158,10 @@ class RTFInput(InputFormatPlugin):
with open(name, 'wb') as f:
f.write(data)
imap[count] = name
- #open(name+'.hex', 'wb').write(enc)
+ # with open(name+'.hex', 'wb') as f:
+ # f.write(enc)
return self.convert_images(imap)
- # count = 0
- # raw = open(picts, 'rb').read()
- # starts = []
- # for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
- # starts.append(match.start(1))
-
- # imap = {}
- # for start in starts:
- # pos, bc = start, 1
- # while bc > 0:
- # if raw[pos] == '}': bc -= 1
- # elif raw[pos] == '{': bc += 1
- # pos += 1
- # pict = raw[start:pos+1]
- # enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
- # if len(enc) % 2 == 1:
- # enc = enc[:-1]
- # data = enc.decode('hex')
- # count += 1
- # name = (('%4d'%count).replace(' ', '0'))+'.wmf'
- # open(name, 'wb').write(data)
- # imap[count] = name
- # #open(name+'.hex', 'wb').write(enc)
- # return self.convert_images(imap)
-
def convert_images(self, imap):
self.default_img = None
for count, val in imap.iteritems():
From 92870ad5b862014357a0d993645df57a0515bd0c Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 18 Jan 2011 23:22:50 +0100
Subject: [PATCH 114/163] Add comment with test cmd
---
src/calibre/ebooks/rtf/input.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index d8301b7120..6361cb7fdb 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -338,3 +338,4 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
+#ebook-convert "bad.rtf" test.epub -v -d "D:\Mes eBooks\Developpement\debug"
\ No newline at end of file
From 270d5c41f2ef7c9e63dc3756f59f4ba46131037e Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 24 Jan 2011 21:55:04 +0100
Subject: [PATCH 115/163] Corrrect edit time field bug
---
src/calibre/ebooks/rtf2xml/info.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/rtf2xml/info.py b/src/calibre/ebooks/rtf2xml/info.py
index f5f1c5851c..55cb54b93a 100755
--- a/src/calibre/ebooks/rtf2xml/info.py
+++ b/src/calibre/ebooks/rtf2xml/info.py
@@ -73,7 +73,6 @@ class Info:
'cw
Date: Mon, 24 Jan 2011 23:32:36 +0100
Subject: [PATCH 116/163] RTF hex_2_utf8 cleaning
---
src/calibre/ebooks/rtf2xml/hex_2_utf8.py | 65 +++++++++++-------------
1 file changed, 29 insertions(+), 36 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
index 0d17f2da99..38f21fd10b 100755
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@@ -115,7 +115,7 @@ class Hex2Utf8:
"""
self.__file=file
self.__copy = copy
- if area_to_convert != 'preamble' and area_to_convert != 'body':
+ if area_to_convert not in ('preamble', 'body'):
msg = (
'in module "hex_2_utf8.py\n'
'"area_to_convert" must be "body" or "preamble"\n'
@@ -143,18 +143,19 @@ class Hex2Utf8:
Set values, including those for the dictionaries.
The file that contains the maps is broken down into many different
sets. For example, for the Symbol font, there is the standard part for
- hexidecimal numbers, and the the part for Microsoft charcters. Read
+ hexidecimal numbers, and the part for Microsoft characters. Read
each part in, and then combine them.
"""
# the default encoding system, the lower map for characters 0 through
# 128, and the encoding system for Microsoft characters.
- # New on 2004-05-8: the self.__char_map is not in diretory with other
+ # New on 2004-05-8: the self.__char_map is not in directory with other
# modules
self.__char_file = cStringIO.StringIO(char_set)
char_map_obj = get_char_map.GetCharMap(
char_file = self.__char_file,
bug_handler = self.__bug_handler,
)
+ print self.__default_char_map
up_128_dict = char_map_obj.get_char_map(map=self.__default_char_map)
bt_128_dict = char_map_obj.get_char_map(map = 'bottom_128')
ms_standard_dict = char_map_obj.get_char_map(map = 'ms_standard')
@@ -195,7 +196,6 @@ class Hex2Utf8:
'body' : self.__body_func,
'mi 1:
self.__caps_list.pop()
else:
- sys.stderr.write('Module is hex_2_utf8\n')
- sys.stderr.write('method is __end_caps_func\n')
- sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
+ sys.stderr.write('Module is hex_2_utf8\n'
+ 'method is __end_caps_func\n'
+ 'caps list should be more than one?\n') #self.__in_caps not set
def __text_func(self, line):
"""
@@ -493,8 +488,7 @@ class Hex2Utf8:
hex_num = '\'%s' % hex_num
converted = self.__current_dict.get(hex_num)
if converted is None:
- sys.stderr.write('module is hex_2_ut8\n')
- sys.stderr.write('method is __text_func\n')
+ sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
sys.stderr.write('no hex value for "%s"\n' % hex_num)
else:
the_string += converted
@@ -550,16 +544,15 @@ class Hex2Utf8:
def __convert_body(self):
self.__state = 'body'
with open(self.__file, 'r') as read_obj:
- self.__write_obj = open(self.__write_to, 'w')
- for line in read_obj:
- self.__token_info = line[:16]
- action = self.__body_state_dict.get(self.__state)
- if action is None:
- sys.stderr.write('error no state found in hex_2_utf8',
- self.__state
- )
- action(line)
- self.__write_obj.close()
+ with open(self.__write_to, 'w') as self.__write_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__body_state_dict.get(self.__state)
+ if action is None:
+ sys.stderr.write('error no state found in hex_2_utf8',
+ self.__state
+ )
+ action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
From 05a90f1bcb5139c7a331a93c323feb8122921dd5 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 25 Jan 2011 00:48:32 +0100
Subject: [PATCH 117/163] ...
---
src/calibre/ebooks/rtf2xml/get_char_map.py | 3 ---
src/calibre/ebooks/rtf2xml/hex_2_utf8.py | 1 -
src/calibre/ebooks/rtf2xml/process_tokens.py | 4 ++--
3 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/src/calibre/ebooks/rtf2xml/get_char_map.py b/src/calibre/ebooks/rtf2xml/get_char_map.py
index fb3ef28b4f..cb118b0df8 100755
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@@ -30,8 +30,6 @@ class GetCharMap:
'char_file'--the file with the mappings
-
-
Returns:
nothing
@@ -62,7 +60,6 @@ class GetCharMap:
fields[1].replace('\\colon', ':')
map_dict[fields[1]] = fields[3]
-
if not found_map:
msg = 'no map found\nmap is "%s"\n'%(map,)
raise self.__bug_handler, msg
diff --git a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
index 38f21fd10b..7b8e148661 100755
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@@ -155,7 +155,6 @@ class Hex2Utf8:
char_file = self.__char_file,
bug_handler = self.__bug_handler,
)
- print self.__default_char_map
up_128_dict = char_map_obj.get_char_map(map=self.__default_char_map)
bt_128_dict = char_map_obj.get_char_map(map = 'bottom_128')
ms_standard_dict = char_map_obj.get_char_map(map = 'ms_standard')
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 56e61d2b60..1edf69b32d 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -757,7 +757,7 @@ class ProcessTokens:
def process_cw(self, token):
"""Change the value of the control word by determining what dictionary
it belongs to"""
- special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
+ special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
##if token != "{" or token != "}":
token = token[1:] # strip off leading \
token = token.replace(" ", "")
@@ -793,7 +793,7 @@ class ProcessTokens:
raise self.__exception_handler, msg
the_index = token.find('\\ ')
- if token is not None and the_index > -1:
+ if token is not None and the_index > -1:
msg = 'Invalid RTF: token "\\ " not valid.\n'
raise self.__exception_handler, msg
elif token[:1] == "\\":
From 026772d016c47ebdf2d99c9161363944c6db9382 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Thu, 27 Jan 2011 23:56:51 +0100
Subject: [PATCH 118/163] Add posssibility of file path export with formts in
bibtex catalog
---
src/calibre/gui2/catalog/catalog_bibtex.py | 9 +--
src/calibre/gui2/catalog/catalog_bibtex.ui | 9 ++-
src/calibre/library/catalog.py | 74 ++++++++++++++--------
3 files changed, 61 insertions(+), 31 deletions(-)
diff --git a/src/calibre/gui2/catalog/catalog_bibtex.py b/src/calibre/gui2/catalog/catalog_bibtex.py
index a24f6b0f95..ebfcc6e546 100644
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'
from calibre.gui2 import gprefs
from calibre.gui2.catalog.catalog_bibtex_ui import Ui_Form
-from calibre.library import db as db_
from PyQt4.Qt import QWidget, QListWidgetItem
class PluginWidget(QWidget, Ui_Form):
@@ -20,7 +19,9 @@ class PluginWidget(QWidget, Ui_Form):
('bib_entry', 0), #mixed
('bibfile_enc', 0), #utf-8
('bibfile_enctag', 0), #strict
- ('impcit', True) ]
+ ('impcit', True),
+ ('addfiles', False),
+ ]
sync_enabled = False
formats = set(['bib'])
@@ -50,7 +51,7 @@ class PluginWidget(QWidget, Ui_Form):
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
if opt[0] in ['bibfile_enc', 'bibfile_enctag', 'bib_entry']:
getattr(self, opt[0]).setCurrentIndex(opt_value)
- elif opt[0] == 'impcit' :
+ elif opt[0] in ['impcit', 'addfiles'] :
getattr(self, opt[0]).setChecked(opt_value)
else:
getattr(self, opt[0]).setText(opt_value)
@@ -77,7 +78,7 @@ class PluginWidget(QWidget, Ui_Form):
for opt in self.OPTION_FIELDS:
if opt[0] in ['bibfile_enc', 'bibfile_enctag', 'bib_entry']:
opt_value = getattr(self,opt[0]).currentIndex()
- elif opt[0] == 'impcit' :
+ elif opt[0] in ['impcit', 'addfiles'] :
opt_value = getattr(self, opt[0]).isChecked()
else :
opt_value = unicode(getattr(self, opt[0]).text())
diff --git a/src/calibre/gui2/catalog/catalog_bibtex.ui b/src/calibre/gui2/catalog/catalog_bibtex.ui
index 7f4920655d..8712d40148 100644
--- a/src/calibre/gui2/catalog/catalog_bibtex.ui
+++ b/src/calibre/gui2/catalog/catalog_bibtex.ui
@@ -47,7 +47,7 @@
-
+
@@ -141,6 +141,13 @@
+
+
+ Add files path with formats?
+
+
+
+ Expression to form the BibTeX citation tag:
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index f0e4778de4..e20eebc517 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -24,10 +24,9 @@ from calibre.utils.logging import default_log as log
from calibre.utils.zipfile import ZipFile, ZipInfo
from calibre.utils.magick.draw import thumbnail
-FIELDS = ['all', 'author_sort', 'authors', 'comments',
- 'cover', 'formats', 'id', 'isbn', 'ondevice', 'pubdate', 'publisher', 'rating',
- 'series_index', 'series', 'size', 'tags', 'timestamp', 'title',
- 'uuid']
+FIELDS = ['all', 'title', 'author_sort', 'authors', 'comments',
+ 'cover', 'formats','id', 'isbn', 'ondevice', 'pubdate', 'publisher',
+ 'rating', 'series_index', 'series', 'size', 'tags', 'timestamp', 'uuid']
#Allowed fields for template
TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate',
@@ -252,6 +251,15 @@ class BIBTEX(CatalogPlugin): # {{{
"Default: '%default'\n"
"Applies to: BIBTEX output format")),
+ Option('--add-files-path',
+ default = 'True',
+ dest = 'addfiles',
+ action = None,
+ help = _('Create a file entry if formats is selected for BibTeX entries.\n'
+ 'Boolean value: True, False\n'
+ "Default: '%default'\n"
+ "Applies to: BIBTEX output format")),
+
Option('--citation-template',
default = '{authors}{id}',
dest = 'bib_cit',
@@ -298,7 +306,7 @@ class BIBTEX(CatalogPlugin): # {{{
from calibre.utils.bibtex import BibTeX
def create_bibtex_entry(entry, fields, mode, template_citation,
- bibtexdict, citation_bibtex = True):
+ bibtexdict, citation_bibtex=True, calibre_files=True):
#Bibtex doesn't like UTF-8 but keep unicode until writing
#Define starting chain or if book valid strict and not book return a Fail string
@@ -360,8 +368,13 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry.append(u'isbn = "%s"' % re.sub(u'[\D]', u'', item))
elif field == 'formats' :
- item = u', '.join([format.rpartition('.')[2].lower() for format in item])
- bibtex_entry.append(u'formats = "%s"' % item)
+ #Add file path if format is selected
+ formats = [format.rpartition('.')[2].lower() for format in item]
+ bibtex_entry.append(u'formats = "%s"' % u', '.join(formats))
+ if calibre_files:
+ files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
+ for format in item]
+ bibtex_entry.append(u'files = "%s"' % u', '.join(files))
elif field == 'series_index' :
bibtex_entry.append(u'volume = "%s"' % int(item))
@@ -510,32 +523,41 @@ class BIBTEX(CatalogPlugin): # {{{
citation_bibtex= True
else :
citation_bibtex= opts.impcit
+
+ #Check add file entry and go to default in case of bad CLI
+ if isinstance(opts.addfiles, (StringType, UnicodeType)) :
+ if opts.addfiles == 'False' :
+ addfiles_bibtex = False
+ elif opts.addfiles == 'True' :
+ addfiles_bibtex = True
+ else :
+ log(" WARNING: incorrect --add-files-path, revert to default")
+ addfiles_bibtex= True
+ else :
+ addfiles_bibtex = opts.addfiles
#Preprocess for error and light correction
template_citation = preprocess_template(opts.bib_cit)
#Open output and write entries
- outfile = codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)
+ with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\
+ as outfile:
+ #File header
+ nb_entries = len(data)
+ #check in book strict if all is ok else throw a warning into log
+ if bib_entry == 'book' :
+ nb_books = len(filter(check_entry_book_valid, data))
+ if nb_books < nb_entries :
+ log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
+ nb_entries = nb_books
- #File header
- nb_entries = len(data)
+ outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
+ outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
+ % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
- #check in book strict if all is ok else throw a warning into log
- if bib_entry == 'book' :
- nb_books = len(filter(check_entry_book_valid, data))
- if nb_books < nb_entries :
- log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
- nb_entries = nb_books
-
- outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
- outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
- % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
-
- for entry in data:
- outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
- bibtexc, citation_bibtex))
-
- outfile.close()
+ for entry in data:
+ outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
+ bibtexc, citation_bibtex, addfiles_bibtex))
# }}}
class EPUB_MOBI(CatalogPlugin):
From ea86886f16ca5dd89f3eca0f250d42fe711951fb Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 28 Jan 2011 00:05:35 +0100
Subject: [PATCH 119/163] bibtex catalog on device modifications
---
src/calibre/library/catalog.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index e20eebc517..2b95d0a5be 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -340,7 +340,7 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
- elif field in ['title', 'publisher', 'cover', 'uuid',
+ elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
@@ -374,7 +374,7 @@ class BIBTEX(CatalogPlugin): # {{{
if calibre_files:
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
for format in item]
- bibtex_entry.append(u'files = "%s"' % u', '.join(files))
+ bibtex_entry.append(u'file = "%s"' % u', '.join(files))
elif field == 'series_index' :
bibtex_entry.append(u'volume = "%s"' % int(item))
@@ -470,6 +470,8 @@ class BIBTEX(CatalogPlugin): # {{{
if opts.verbose:
opts_dict = vars(opts)
log("%s(): Generating %s" % (self.name,self.fmt))
+ if opts.connected_device['is_device_connected']:
+ log(" connected_device: %s" % opts.connected_device['name'])
if opts_dict['search_text']:
log(" --search='%s'" % opts_dict['search_text'])
@@ -544,6 +546,7 @@ class BIBTEX(CatalogPlugin): # {{{
as outfile:
#File header
nb_entries = len(data)
+
#check in book strict if all is ok else throw a warning into log
if bib_entry == 'book' :
nb_books = len(filter(check_entry_book_valid, data))
@@ -551,6 +554,11 @@ class BIBTEX(CatalogPlugin): # {{{
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books
+ # If connected device, add 'On Device' values to data
+ if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
+ for entry in data:
+ entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
+
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
From 23f2fc62021ed8b1e9b2dca6f7d409affdeebe0a Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sun, 30 Jan 2011 11:05:07 +0100
Subject: [PATCH 120/163] Minor modifications to catalog
---
src/calibre/library/catalog.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 084c238f00..8b88e44407 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
help = _('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n'
'Available fields: %s.\n'
+ 'plus user-created custom fields.\n'
'Example: %s=title,authors,tags\n'
"Default: '%%default'\n"
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
@@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
dest = 'bib_cit',
action = None,
help = _('The template for citation creation from database fields.\n'
- ' Should be a template with {} enclosed fields.\n'
+ 'Should be a template with {} enclosed fields.\n'
'Available fields: %s.\n'
"Default: '%%default'\n"
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
From ed4da14df07a4c61a21bfe09c542aa4802863a9d Mon Sep 17 00:00:00 2001
From: Sengian
Date: Mon, 31 Jan 2011 08:29:42 +0100
Subject: [PATCH 121/163] Correct problems with tag splitting in RTFParser,
some encoding refactoring & move all encodings to UTF-8 or US-ASCII for lxml
---
src/calibre/ebooks/rtf/input.py | 23 ++------
src/calibre/ebooks/rtf2xml/ParseRtf.py | 2 +
src/calibre/ebooks/rtf2xml/colors.py | 54 +++++++++++--------
src/calibre/ebooks/rtf2xml/convert_to_tags.py | 38 ++++++++-----
.../ebooks/rtf2xml/default_encoding.py | 4 ++
src/calibre/ebooks/rtf2xml/fonts.py | 36 +++++++------
src/calibre/ebooks/rtf2xml/get_char_map.py | 2 +-
src/calibre/ebooks/rtf2xml/tokenize.py | 24 +++++----
8 files changed, 101 insertions(+), 82 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 6361cb7fdb..caa35a9eda 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -85,6 +85,7 @@ class RTFInput(InputFormatPlugin):
debug_dir = 'rtfdebug'
run_lev = 4
indent_out = 1
+ self.log('Running RTFParser in debug mode')
except:
pass
parser = ParseRtf(
@@ -233,22 +234,6 @@ class RTFInput(InputFormatPlugin):
with open('styles.css', 'ab') as f:
f.write(css)
- # def preprocess(self, fname):
- # self.log('\tPreprocessing to convert unicode characters')
- # try:
- # data = open(fname, 'rb').read()
- # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
- # tokenizer = RtfTokenizer(data)
- # tokens = RtfTokenParser(tokenizer.tokens)
- # data = tokens.toRTF()
- # fname = 'preprocessed.rtf'
- # with open(fname, 'wb') as f:
- # f.write(data)
- # except:
- # self.log.exception(
- # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
- # return fname
-
def convert_borders(self, doc):
border_styles = []
style_map = {}
@@ -283,8 +268,6 @@ class RTFInput(InputFormatPlugin):
self.opts = options
self.log = log
self.log('Converting RTF to XML...')
- #Name of the preprocesssed RTF file
- # fname = self.preprocess(stream.name)
try:
xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e:
@@ -338,4 +321,6 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
-#ebook-convert "bad.rtf" test.epub -v -d "D:\Mes eBooks\Developpement\debug"
\ No newline at end of file
+#ebook-convert "bad.rtf" test.epub -v -d "D:\Mes eBooks\Developpement\debug"
+# os.makedirs('D:\\Mes eBooks\\Developpement\\rtfdebug')
+# debug_dir = 'D:\\Mes eBooks\\Developpement\\rtfdebug'
\ No newline at end of file
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index a28b6f81da..56e18fe74d 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -238,6 +238,8 @@ class ParseRtf:
bug_handler = RtfInvalidCodeException,
)
enc = 'cp' + encode_obj.get_codepage()
+ if enc == 'cp10000':
+ enc = 'mac_roman'
msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
diff --git a/src/calibre/ebooks/rtf2xml/colors.py b/src/calibre/ebooks/rtf2xml/colors.py
index d81b293bbf..eba03547c8 100755
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@@ -15,8 +15,10 @@
# #
# #
#########################################################################
-import sys, os, tempfile, re
+import sys, os, tempfile, re
+
from calibre.ebooks.rtf2xml import copy
+
class Colors:
"""
Change lines with color info from color numbers to the actual color names.
@@ -40,8 +42,10 @@ class Colors:
self.__file = in_file
self.__copy = copy
self.__bug_handler = bug_handler
+ self.__line = 0
self.__write_to = tempfile.mktemp()
self.__run_level = run_level
+
def __initiate_values(self):
"""
Initiate all values.
@@ -61,6 +65,7 @@ class Colors:
self.__color_num = 1
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
# cw 3:
- msg = 'no value in self.__color_dict for key %s\n' % num
- raise self.__bug_hanlder, msg
- if hex_num == None:
+ if hex_num is None:
hex_num = '0'
+ if self.__run_level > 5:
+ msg = 'no value in self.__color_dict' \
+ 'for key %s at line %d\n' % (num, self.__line)
+ raise self.__bug_handler, msg
return hex_num
+
def __do_nothing_func(self, line):
"""
Bad RTF will have text in the color table
"""
pass
+
def convert_colors(self):
"""
Requires:
@@ -226,20 +238,16 @@ class Colors:
info, and substitute the number with the hex number.
"""
self.__initiate_values()
- read_obj = open(self.__file, 'r')
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- action = self.__state_dict.get(self.__state)
- if action == None:
- sys.stderr.write('no no matching state in module fonts.py\n')
- sys.stderr.write(self.__state + '\n')
- action(line)
- read_obj.close()
- self.__write_obj.close()
+ with open(self.__file, 'r') as read_obj:
+ with open(self.__write_to, 'w') as self.__write_obj:
+ for line in read_obj:
+ self.__line+=1
+ self.__token_info = line[:16]
+ action = self.__state_dict.get(self.__state)
+ if action is None:
+ sys.stderr.write('no matching state in module fonts.py\n')
+ sys.stderr.write(self.__state + '\n')
+ action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "color.data")
diff --git a/src/calibre/ebooks/rtf2xml/convert_to_tags.py b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
index 6927537474..1abc672f85 100755
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@@ -33,13 +33,13 @@ class ConvertToTags:
self.__copy = copy
self.__dtd_path = dtd_path
self.__no_dtd = no_dtd
- if encoding != 'mac_roman':
- self.__encoding = 'cp' + encoding
- else:
+ self.__encoding = 'cp' + encoding
+ if encoding == 'mac_roman':
self.__encoding = 'mac_roman'
self.__indent = indent
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
+ self.__convert_utf = False
def __initiate_values(self):
"""
@@ -213,7 +213,8 @@ class ConvertToTags:
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
- self.__write_obj.write('' % self.__encoding)
+ self.__write_obj.write('')
+ self.__convert_utf = True
else:
self.__write_obj.write('')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@@ -253,15 +254,28 @@ class ConvertToTags:
an empty tag function.
"""
self.__initiate_values()
- self.__write_obj = open(self.__write_to, 'w')
- self.__write_dec()
- with open(self.__file, 'r') as read_obj:
- for line in read_obj:
- self.__token_info = line[:16]
- action = self.__state_dict.get(self.__token_info)
- if action is not None:
- action(line)
+ with open(self.__write_to, 'w') as self.__write_obj:
+ self.__write_dec()
+ with open(self.__file, 'r') as read_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__state_dict.get(self.__token_info)
+ if action is not None:
+ action(line)
self.__write_obj.close()
+ #convert all encodings to UTF8 to avoid unsupported encodings in lxml
+ if self.__convert_utf:
+ copy_obj = copy.Copy(bug_handler = self.__bug_handler)
+ copy_obj.rename(self.__write_to, self.__file)
+ with open(self.__file, 'r') as read_obj:
+ with open(self.__write_to, 'w') as write_obj:
+ file = read_obj.read()
+ try:
+ file = file.decode(self.__encoding)
+ write_obj.write(file.encode('utf-8'))
+ except:
+ sys.stderr.write('Conversion to UTF-8 is not possible,'
+ ' encoding should be very carefully checked')
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index 3ddfbcd321..c0a43db800 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -75,12 +75,16 @@ class DefaultEncoding:
self._encoding()
self.__datafetched = True
code_page = 'ansicpg' + self.__code_page
+ if self.__code_page == '10000':
+ self.__code_page = 'mac_roman'
return self.__platform, code_page, self.__default_num
def get_codepage(self):
if not self.__datafetched:
self._encoding()
self.__datafetched = True
+ if self.__code_page == '10000':
+ self.__code_page = 'mac_roman'
return self.__code_page
def get_platform(self):
diff --git a/src/calibre/ebooks/rtf2xml/fonts.py b/src/calibre/ebooks/rtf2xml/fonts.py
index b85717ce48..45ed3c1957 100755
--- a/src/calibre/ebooks/rtf2xml/fonts.py
+++ b/src/calibre/ebooks/rtf2xml/fonts.py
@@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
+
from calibre.ebooks.rtf2xml import copy
+
class Fonts:
"""
Change lines with font info from font numbers to the actual font names.
@@ -45,6 +47,7 @@ class Fonts:
self.__default_font_num = default_font_num
self.__write_to = tempfile.mktemp()
self.__run_level = run_level
+
def __initiate_values(self):
"""
Initiate all values.
@@ -67,6 +70,7 @@ class Fonts:
self.__font_table = {}
# individual font written
self.__wrote_ind_font = 0
+
def __default_func(self, line):
"""
Requires:
@@ -79,6 +83,7 @@ class Fonts:
if self.__token_info == 'miTimes0\n' )
+ 'Times0\n')
+
def __after_font_table_func(self, line):
"""
Required:
@@ -169,7 +177,7 @@ class Fonts:
if self.__token_info == 'cw 3:
msg = 'no value for %s in self.__font_table\n' % font_num
raise self.__bug_handler, msg
@@ -182,6 +190,7 @@ class Fonts:
)
else:
self.__write_obj.write(line)
+
def convert_fonts(self):
"""
Required:
@@ -197,20 +206,15 @@ class Fonts:
info. Substitute a font name for a font number.
"""
self.__initiate_values()
- read_obj = open(self.__file, 'r')
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- action = self.__state_dict.get(self.__state)
- if action == None:
- sys.stderr.write('no no matching state in module fonts.py\n')
- sys.stderr.write(self.__state + '\n')
- action(line)
- read_obj.close()
- self.__write_obj.close()
+ with open(self.__file, 'r') as read_obj:
+ with open(self.__write_to, 'w') as self.__write_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__state_dict.get(self.__state)
+ if action is None:
+ sys.stderr.write('no matching state in module fonts.py\n' \
+ + self.__state + '\n')
+ action(line)
default_font_name = self.__font_table.get(self.__default_font_num)
if not default_font_name:
default_font_name = 'Not Defined'
diff --git a/src/calibre/ebooks/rtf2xml/get_char_map.py b/src/calibre/ebooks/rtf2xml/get_char_map.py
index cb118b0df8..bd487bb6f5 100755
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@@ -41,7 +41,7 @@ class GetCharMap:
def get_char_map(self, map):
if map == 'ansicpg0':
map = 'ansicpg1250'
- if map in ('ansicpg10000', '10000'):
+ if map == 'ansicpg10000':
map = 'mac_roman'
found_map = False
map_dict = {}
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 9ebd718833..84acd26a57 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -115,6 +115,7 @@ class Tokenize:
def __sub_reg_split(self,input_file):
input_file = self.__replace_spchar.mreplace(input_file)
+ # this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file)
input_file = self.__cs_ast.sub("\g<1>", input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
@@ -126,12 +127,6 @@ class Tokenize:
tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
- #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
- # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
- # this is for older RTF
- #line = re.sub(self.__par_exp, '\\par ', line)
- #return filter(lambda x: len(x) > 0, \
- #(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self):
SIMPLE_RPL = {
@@ -160,7 +155,7 @@ class Tokenize:
}
self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u
- self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+ self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
#manage upr/ud situations
@@ -174,14 +169,21 @@ class Tokenize:
self.__par_exp = re.compile(r'\\\n+')
#handle improper cs char-style with \* before without {
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
- # self.__par_exp = re.compile(r'\\$')
+ #handle cw using a digit as argument and without space as delimiter
+ self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__remove_line = re.compile(r'\n+')
- #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
+ def __correct_spliting(self, token):
+ match_obj = re.search(self.__cwdigit_exp, token)
+ if match_obj is None:
+ return token
+ else:
+ return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
+
def tokenize(self):
"""Main class for handling other methods. Reads the file \
, uses method self.sub_reg to make basic substitutions,\
@@ -197,6 +199,8 @@ class Tokenize:
tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc
tokens = filter(lambda x: len(x) > 0, tokens)
+ #handles bothersome cases
+ tokens = map(self.__correct_spliting, tokens)
#write
with open(self.__write_to, 'wb') as write_obj:
@@ -205,8 +209,6 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data")
- # if self.__out_file:
- # self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)
From 056f97c7008037f0eb9d20d9ae508171dd879993 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 5 Feb 2011 12:19:46 +0100
Subject: [PATCH 122/163] Correct splitting problem
---
src/calibre/ebooks/rtf/input.py | 6 +++---
src/calibre/ebooks/rtf2xml/colors.py | 2 +-
src/calibre/ebooks/rtf2xml/tokenize.py | 10 +---------
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 2ef5932784..6e17e33556 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -321,6 +321,6 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
-#ebook-convert "bad.rtf" test.epub -v -d "D:\Mes eBooks\Developpement\debug"
-# os.makedirs('D:\\Mes eBooks\\Developpement\\rtfdebug')
-# debug_dir = 'D:\\Mes eBooks\\Developpement\\rtfdebug'
+#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
+# os.makedirs('E:\\Mes eBooks\\Developpement\\rtfdebug')
+# debug_dir = 'E:\\Mes eBooks\\Developpement\\rtfdebug'
diff --git a/src/calibre/ebooks/rtf2xml/colors.py b/src/calibre/ebooks/rtf2xml/colors.py
index eba03547c8..e85b59571c 100755
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@@ -210,7 +210,7 @@ class Colors:
hex_num = self.__color_dict.get(num)
if hex_num is None:
hex_num = '0'
- if self.__run_level > 5:
+ if self.__run_level > 3:
msg = 'no value in self.__color_dict' \
'for key %s at line %d\n' % (num, self.__line)
raise self.__bug_handler, msg
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 2c0fa8fcb6..5e01515730 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -117,6 +117,7 @@ class Tokenize:
input_file = self.__replace_spchar.mreplace(input_file)
# this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file)
+ input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
input_file = self.__cs_ast.sub("\g<1>", input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
@@ -177,13 +178,6 @@ class Tokenize:
#self.__remove_line = re.compile(r'\n+')
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
- def __correct_spliting(self, token):
- match_obj = re.search(self.__cwdigit_exp, token)
- if match_obj is None:
- return token
- else:
- return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
-
def tokenize(self):
"""Main class for handling other methods. Reads the file \
, uses method self.sub_reg to make basic substitutions,\
@@ -199,8 +193,6 @@ class Tokenize:
tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc
tokens = filter(lambda x: len(x) > 0, tokens)
- #handles bothersome cases
- tokens = map(self.__correct_spliting, tokens)
#write
with open(self.__write_to, 'wb') as write_obj:
From ccf856539aee3de4a462a409aee514dde22b312a Mon Sep 17 00:00:00 2001
From: Sengian
Date: Sat, 5 Feb 2011 17:34:57 +0100
Subject: [PATCH 123/163] Still old paragraph format
---
src/calibre/ebooks/rtf/input.py | 5 ++---
src/calibre/ebooks/rtf2xml/ParseRtf.py | 12 +++++-----
src/calibre/ebooks/rtf2xml/process_tokens.py | 14 +++++++-----
src/calibre/ebooks/rtf2xml/tokenize.py | 23 ++++++++------------
4 files changed, 25 insertions(+), 29 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 6e17e33556..06a5fa61c9 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -78,7 +78,6 @@ class RTFInput(InputFormatPlugin):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
run_lev, debug_dir, indent_out = 1, None, 0
- #just to check if the debug process is lauched, no need of this directory in fact
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir('rtfdebug')
@@ -322,5 +321,5 @@ class RTFInput(InputFormatPlugin):
return os.path.abspath('metadata.opf')
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
-# os.makedirs('E:\\Mes eBooks\\Developpement\\rtfdebug')
-# debug_dir = 'E:\\Mes eBooks\\Developpement\\rtfdebug'
+# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
+# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 56e18fe74d..9f554467b0 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -226,7 +226,7 @@ class ParseRtf:
try:
return_value = process_tokens_obj.process_tokens()
except InvalidRtfException, msg:
- #Check to see if the file is correctly encoded
+ # Check to see if the file is correctly encoded
encode_obj = default_encoding.DefaultEncoding(
in_file = self.__temp_file,
run_level = self.__run_level,
@@ -237,14 +237,14 @@ class ParseRtf:
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException,
)
- enc = 'cp' + encode_obj.get_codepage()
- if enc == 'cp10000':
- enc = 'mac_roman'
- msg = 'Exception in token processing'
+ enc = encode_obj.get_codepage()
+ if enc != 'mac_roman':
+ enc = 'cp' + enc
+ msg = '%s\nException in token processing' % str(msg)
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8')
- msg = 'File %s does not appear to be correctly encoded.\n' % file_name
+ msg +='\nFile %s does not appear to be correctly encoded.\n' % file_name
try:
os.remove(self.__temp_file)
except OSError:
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 1edf69b32d..010d374cbc 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -786,21 +786,23 @@ class ProcessTokens:
token = line.replace("\n","")
line_count += 1
if line_count == 1 and token != '\\{':
- msg = 'Invalid RTF: document doesn\'t start with {\n'
+ msg = '\nInvalid RTF: document doesn\'t start with {\n'
raise self.__exception_handler, msg
elif line_count == 2 and token[0:4] != '\\rtf':
- msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
+ msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n'
raise self.__exception_handler, msg
the_index = token.find('\\ ')
if token is not None and the_index > -1:
- msg = 'Invalid RTF: token "\\ " not valid.\n'
+ msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
+ % line_count
raise self.__exception_handler, msg
elif token[:1] == "\\":
try:
token.decode('us-ascii')
except UnicodeError, msg:
- msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
+ msg = '\nInvalid RTF: Tokens not ascii encoded.\n%s\nError at line %d'\
+ % (str(msg), line_count)
raise self.__exception_handler, msg
line = self.process_cw(token)
if line is not None:
@@ -816,7 +818,7 @@ class ProcessTokens:
write_obj.write('tx
Date: Sun, 6 Feb 2011 01:14:56 +0100
Subject: [PATCH 124/163] Remove librarything login
---
src/calibre/ebooks/metadata/covers.py | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py
index cbd8fc0e99..529b38c1d3 100644
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@@ -145,12 +145,13 @@ class LibraryThingCovers(CoverDownload): # {{{
return url
def has_cover(self, mi, ans, timeout=5.):
- if not mi.isbn or not self.site_customization:
+ # if not mi.isbn or not self.site_customization:
+ if not mi.isbn:
return False
from calibre.ebooks.metadata.library_thing import get_browser, login
br = get_browser()
- un, _, pw = self.site_customization.partition(':')
- login(br, un, pw)
+ # un, _, pw = self.site_customization.partition(':')
+ # login(br, un, pw)
try:
self.get_cover_url(mi.isbn, br, timeout=timeout)
self.debug('cover for', mi.isbn, 'found')
@@ -159,12 +160,13 @@ class LibraryThingCovers(CoverDownload): # {{{
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
- if not mi.isbn or not self.site_customization:
+ # if not mi.isbn or not self.site_customization:
+ if not mi.isbn:
return
from calibre.ebooks.metadata.library_thing import get_browser, login
br = get_browser()
- un, _, pw = self.site_customization.partition(':')
- login(br, un, pw)
+ # un, _, pw = self.site_customization.partition(':')
+ # login(br, un, pw)
try:
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
cover_data = br.open_novisit(url).read()
From bcc516afc211f873d8b8d9be712d2968d1960271 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 22 Feb 2011 22:48:36 +0100
Subject: [PATCH 125/163] Meta personalisation
---
src/calibre/customize/builtins.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 8f83795ef5..5f1dfd9c35 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -506,12 +506,12 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
-from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
- KentDistrictLibrary
+from calibre.ebooks.metadata.fetch import KentDistrictLibrary
from calibre.ebooks.metadata.douban import DoubanBooks
-#from calibre.ebooks.metadata.google_books import GoogleBooks
+from calibre.ebooks.metadata.isbndb import ISBNDB
+from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-#from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
+from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers, DoubanCovers
From 888aaec88fea2d669d0ed4d2b245351c0013436f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 9 Mar 2011 22:21:02 +0100
Subject: [PATCH 126/163] Metadata compatibility
---
src/calibre/customize/builtins.py | 6 +-
src/calibre/ebooks/metadata/amazon.py | 259 +++-----------------------
src/calibre/ebooks/metadata/fetch.py | 21 +++
3 files changed, 52 insertions(+), 234 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index ba90d20dcc..74f1f9eafe 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -580,12 +580,12 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
-from calibre.ebooks.metadata.fetch import KentDistrictLibrary
+from calibre.ebooks.metadata.fetch import KentDistrictLibrary, Amazon
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.isbndb import ISBNDB
from calibre.ebooks.metadata.google_books import GoogleBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
+# from calibre.ebooks.metadata.amazon import Amazon , AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
AmazonCovers, DoubanCovers, LibrarythingCovers
@@ -593,7 +593,7 @@ from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
-plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, AmazonSocial,
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, #AmazonSocial,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, LibrarythingCovers,
NiceBooksCovers]
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index a2ddc22770..c87249ed39 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -1,7 +1,11 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2010, sengian '
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+'''
+Fetch metadata using Amazon AWS
+'''
import sys, re
from threading import RLock
@@ -12,10 +16,6 @@ from calibre import browser
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata import MetaInformation, check_isbn, \
- authors_to_sort_string
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.utils.config import OptionParser
from calibre.library.comments import sanitize_comments_html
asin_cache = {}
@@ -160,229 +160,31 @@ def get_metadata(br, asin, mi):
m = pat.match(t)
if m is not None:
try:
- default = utcnow().replace(day=15)
- if self.lang != 'all':
- d = replace_months(d, self.lang)
- d = parse_date(d, assume_utc=True, default=default)
- mi.pubdate = d
+ mi.rating = float(m.group(1))/float(m.group(2)) * 5
+ break
except:
- report(verbose)
- #ISBN
- elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
- if elt:
- isbn = elt[0].find('b').tail.replace('-', '').strip()
- if check_isbn(isbn):
- mi.isbn = unicode(isbn)
- elif len(elt) > 1:
- isbnone = elt[1].find('b').tail.replace('-', '').strip()
- if check_isbn(isbnone):
- mi.isbn = unicode(isbnone)
- else:
- #assume ASIN-> find a check for asin
- mi.isbn = unicode(isbn)
- #Langue
- elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
- if elt:
- langue = elt[0].find('b').tail.strip()
- if langue:
- mi.language = unicode(langue)
- #ratings
- elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
- if elt:
- ratings = elt[0].find_class('swSprite')
- if ratings:
- ratings = self.rerat.findall(ratings[0].get('title'))
- if len(ratings) == 2:
- mi.rating = float(ratings[0])/float(ratings[1]) * 5
- return mi
+ pass
- def fill_MI(self, entry, verbose):
- try:
- title = self.get_title(entry)
- authors = self.get_authors(entry)
- except Exception, e:
- if verbose:
- print _('Failed to get all details for an entry')
- print e
- print _('URL who failed: %s') % x
- report(verbose)
- return None
- mi = MetaInformation(title, authors)
- mi.author_sort = authors_to_sort_string(authors)
- try:
- mi.comments = self.get_description(entry, verbose)
- mi = self.get_book_info(entry, mi, verbose)
- except:
- pass
- return mi
+ desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
+ if desc:
+ desc = desc[0]
+ for c in desc.xpath('descendant::*[@class="seeAll" or'
+ ' @class="emptyClear" or @href]'):
+ c.getparent().remove(c)
+ desc = html.tostring(desc, method='html', encoding=unicode).strip()
+ # remove all attributes from tags
+ desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
+ # Collapse whitespace
+ #desc = re.sub('\n+', '\n', desc)
+ #desc = re.sub(' +', ' ', desc)
+ # Remove the notice about text referring to out of print editions
+ desc = re.sub(r'(?s)--This text ref.*?', '', desc)
+ # Remove comments
+ desc = re.sub(r'(?s)', '', desc)
+ mi.comments = sanitize_comments_html(desc)
- def get_individual_metadata(self, url, br, verbose):
- try:
- raw = br.open_novisit(url).read()
- except Exception, e:
- import socket
- report(verbose)
- if callable(getattr(e, 'getcode', None)) and \
- e.getcode() == 404:
- return None
- attr = getattr(e, 'args', [None])
- attr = attr if attr else [None]
- if isinstance(attr[0], socket.timeout):
- raise AmazonError(_('Amazon timed out. Try again later.'))
- raise AmazonError(_('Amazon encountered an error.'))
- if '404 - ' in raw:
- report(verbose)
- return None
- raw = xml_to_unicode(raw, strip_encoding_pats=True,
- resolve_entities=True)[0]
- try:
- return soupparser.fromstring(raw)
- except:
- try:
- #remove ASCII invalid chars
- return soupparser.fromstring(clean_ascii_chars(raw))
- except:
- report(verbose)
- return None
+ return True
- def fetchdatathread(self, qbr, qsync, nb, url, verbose):
- try:
- browser = qbr.get(True)
- entry = self.get_individual_metadata(url, browser, verbose)
- except:
- report(verbose)
- entry = None
- finally:
- qbr.put(browser, True)
- qsync.put((nb, entry), True)
-
- def producer(self, sync, urls, br, verbose=False):
- for i in xrange(len(urls)):
- thread = Thread(target=self.fetchdatathread,
- args=(br, sync, i, urls[i], verbose))
- thread.start()
-
- def consumer(self, sync, syncbis, br, total_entries, verbose=False):
- i=0
- self.extend([None]*total_entries)
- while i < total_entries:
- rq = sync.get(True)
- nb = int(rq[0])
- entry = rq[1]
- i+=1
- if entry is not None:
- mi = self.fill_MI(entry, verbose)
- if mi is not None:
- mi.tags, atag = self.get_tags(entry, verbose)
- self[nb] = mi
- if atag:
- thread = Thread(target=self.fetchdatathread,
- args=(br, syncbis, nb, mi.tags, verbose))
- thread.start()
- else:
- syncbis.put((nb, None), True)
-
- def final(self, sync, total_entries, verbose):
- i=0
- while i < total_entries:
- rq = sync.get(True)
- nb = int(rq[0])
- tags = rq[1]
- i+=1
- if tags is not None:
- self[nb].tags = self.get_tags(tags, verbose)[0]
-
- def populate(self, entries, ibr, verbose=False, brcall=3):
- br = Queue(brcall)
- cbr = Queue(brcall-1)
-
- syncp = Queue(1)
- syncc = Queue(1)
-
- for i in xrange(brcall-1):
- br.put(browser(), True)
- cbr.put(browser(), True)
- br.put(ibr, True)
-
- prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
- cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
- fin_thread = Thread(target=self.final, args=(syncc, len(entries), verbose))
- prod_thread.start()
- cons_thread.start()
- fin_thread.start()
- prod_thread.join()
- cons_thread.join()
- fin_thread.join()
-
-
-def search(title=None, author=None, publisher=None, isbn=None,
- max_results=5, verbose=False, keywords=None, lang='all'):
- br = browser()
- entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
- keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
-
- if entries is None or len(entries) == 0:
- return None
-
- #List of entry
- ans = ResultList(baseurl, lang)
- ans.populate(entries, br, verbose)
- return [x for x in ans if x is not None]
-
-def get_social_metadata(title, authors, publisher, isbn, verbose=False,
- max_results=1, lang='all'):
- mi = MetaInformation(title, authors)
- if not isbn or not check_isbn(isbn):
- return [mi]
-
- amazresults = search(isbn=isbn, verbose=verbose,
- max_results=max_results, lang=lang)
- if amazresults is None or amazresults[0] is None:
- from calibre.ebooks.metadata.xisbn import xisbn
- for i in xisbn.get_associated_isbns(isbn):
- amazresults = search(isbn=i, verbose=verbose,
- max_results=max_results, lang=lang)
- if amazresults is not None and amazresults[0] is not None:
- break
- if amazresults is None or amazresults[0] is None:
- return [mi]
-
- miaz = amazresults[0]
- if miaz.rating is not None:
- mi.rating = miaz.rating
- if miaz.comments is not None:
- mi.comments = miaz.comments
- if miaz.tags is not None:
- mi.tags = miaz.tags
- return [mi]
-
-def option_parser():
- import textwrap
- parser = OptionParser(textwrap.dedent(\
- _('''\
- %prog [options]
-
- Fetch book metadata from Amazon. You must specify one of title, author,
- ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
- so you should make your query as specific as possible.
- You can chose the language for metadata retrieval:
- english & french & german
- '''
- )))
- parser.add_option('-t', '--title', help=_('Book title'))
- parser.add_option('-a', '--author', help=_('Book author(s)'))
- parser.add_option('-p', '--publisher', help=_('Book publisher'))
- parser.add_option('-i', '--isbn', help=_('Book ISBN'))
- parser.add_option('-k', '--keywords', help=_('Keywords'))
- parser.add_option('-s', '--social', default=0, action='count',
- help=_('Get social data only'))
- parser.add_option('-m', '--max-results', default=10,
- help=_('Maximum number of results to fetch'))
- parser.add_option('-l', '--lang', default='all',
- help=_('Chosen language for metadata search (en, fr, de)'))
- parser.add_option('-v', '--verbose', default=0, action='count',
- help=_('Be more verbose about errors'))
- return parser
def main(args=sys.argv):
import tempfile, os
@@ -412,8 +214,3 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
- # import cProfile
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
- # sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))
-
-# calibre-debug -e "D:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
\ No newline at end of file
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 5936222e24..978e460190 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -212,6 +212,27 @@ class MetadataSource(Plugin): # {{{
# }}}
+class Amazon(MetadataSource): # {{{
+
+ name = 'Amazon'
+ metadata_type = 'social'
+ description = _('Downloads social metadata from amazon.com')
+
+ has_html_comments = True
+
+ def fetch(self):
+ if not self.isbn:
+ return
+ from calibre.ebooks.metadata.amazon import get_social_metadata
+ try:
+ self.results = get_social_metadata(self.title, self.book_author,
+ self.publisher, self.isbn)
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ # }}}
+
class KentDistrictLibrary(MetadataSource): # {{{
name = 'Kent District Library'
From 5dc5b93a1fa5a69143d59c7b423b64c1be4cf92f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 9 Mar 2011 22:22:42 +0100
Subject: [PATCH 127/163] Correction of space eating after unicode chars
---
src/calibre/ebooks/rtf2xml/tokenize.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 062a720d91..45a6e75ed6 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -46,7 +46,8 @@ class Tokenize:
def __remove_uc_chars(self, startchar, token):
for i in xrange(startchar, len(token)):
- if token[i] == " ":
+ #handle the case of an uc char with a terminating blank before ansi char
+ if token[i] == " " and self.__uc_char:
continue
elif self.__uc_char:
self.__uc_char -= 1
From 79a28bad795545a42196ceacddbd58566c12d52b Mon Sep 17 00:00:00 2001
From: Sengian
Date: Fri, 1 Apr 2011 23:10:09 +0200
Subject: [PATCH 128/163] Meta activation
---
src/calibre/customize/builtins.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index ffac87c02e..8d91913b84 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -588,14 +588,14 @@ from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
# from calibre.ebooks.metadata.amazon import Amazon , AmazonSocial
from calibre.ebooks.metadata.fictionwise import Fictionwise
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
- AmazonCovers, DoubanCovers, LibrarythingCovers
+ AmazonCovers, DoubanCovers #, LibrarythingCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, #AmazonSocial,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
- Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, LibrarythingCovers,
+ Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, #LibrarythingCovers,
NiceBooksCovers]
plugins += [
ComicInput,
From b68b82fc647b7ac9cc95a222f9cf65c690608fb6 Mon Sep 17 00:00:00 2001
From: Sengian
Date: Tue, 3 May 2011 00:41:16 +0200
Subject: [PATCH 129/163] Correct and bug with multiple authors and convert
html in comments to markdown text
---
src/calibre/library/catalog.py | 17 ++++++++++-------
src/calibre/utils/bibtex.py | 2 +-
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 717e8e2c6b..67f1c16d2d 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -8,6 +8,7 @@ from collections import namedtuple
from copy import deepcopy
from xml.sax.saxutils import escape
from lxml import etree
+from types import StringType, UnicodeType
from calibre import prints, prepare_string_for_xml, strftime
from calibre.constants import preferred_encoding, DEBUG
@@ -15,13 +16,16 @@ from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites
+from calibre.library.save_to_disk import preprocess_template
from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.bibtex import BibTeX
from calibre.utils.config import config_dir
from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf
+from calibre.utils.html2text import html2text
from calibre.utils.icu import capitalize
from calibre.utils.logging import default_log as log
-from calibre.utils.zipfile import ZipFile, ZipInfo
from calibre.utils.magick.draw import thumbnail
+from calibre.utils.zipfile import ZipFile, ZipInfo
FIELDS = ['all', 'title', 'author_sort', 'authors', 'comments',
'cover', 'formats','id', 'isbn', 'ondevice', 'pubdate', 'publisher',
@@ -303,12 +307,6 @@ class BIBTEX(CatalogPlugin): # {{{
def run(self, path_to_output, opts, db, notification=DummyReporter()):
- from types import StringType, UnicodeType
-
- from calibre.library.save_to_disk import preprocess_template
- #Bibtex functions
- from calibre.utils.bibtex import BibTeX
-
def create_bibtex_entry(entry, fields, mode, template_citation,
bibtexdict, citation_bibtex=True, calibre_files=True):
@@ -365,6 +363,11 @@ class BIBTEX(CatalogPlugin): # {{{
#\n removal
item = item.replace(u'\r\n',u' ')
item = item.replace(u'\n',u' ')
+ #html to text
+ try:
+ item = html2text(item)
+ except:
+ log(" WARNING: error in converting comments to text")
bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item))
elif field == 'isbn' :
diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py
index d19a6b05fe..518ec96611 100644
--- a/src/calibre/utils/bibtex.py
+++ b/src/calibre/utils/bibtex.py
@@ -2905,4 +2905,4 @@ class BibTeX:
def bibtex_author_format(self, item):
#Format authors for Bibtex compliance (get a list as input)
- return self.utf8ToBibtex(u' and'.join([author for author in item]))
+ return self.utf8ToBibtex(u' and '.join([author for author in item]))
From 50df54efa63f27ff41559d7d47ff0dce0790564f Mon Sep 17 00:00:00 2001
From: Sengian
Date: Wed, 11 May 2011 23:44:22 +0200
Subject: [PATCH 130/163] Color None mistingly translated to true instead of 0
---
src/calibre/ebooks/rtf/input.py | 2 +-
src/calibre/ebooks/rtf2xml/process_tokens.py | 9 +++++++--
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 2f8c11fd50..be032f0598 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -87,7 +87,7 @@ class RTFInput(InputFormatPlugin):
indent_out = 1
self.log('Running RTFParser in debug mode')
except:
- pass
+ self.log.warn('Impossible to run RTFParser in debug mode')
parser = ParseRtf(
in_file = stream,
out_file = ofile,
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 010d374cbc..7dc88e7f2b 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -197,8 +197,8 @@ class ProcessTokens:
# character info => ci
'b' : ('ci', 'bold______', self.bool_st_func),
'blue' : ('ci', 'blue______', self.color_func),
- 'caps' : ('ci', 'caps______', self.bool_st_func),
- 'cf' : ('ci', 'font-color', self.default_func),
+ 'caps' : ('ci', 'caps______', self.bool_st_func),
+ 'cf' : ('ci', 'font-color', self.colorz_func),
'chftn' : ('ci', 'footnot-mk', self.bool_st_func),
'dn' : ('ci', 'font-down_', self.divide_by_2),
'embo' : ('ci', 'emboss____', self.bool_st_func),
@@ -624,6 +624,11 @@ class ProcessTokens:
num = 'true'
return 'cw<%s<%s
Date: Sat, 3 Sep 2011 05:35:49 +0000
Subject: [PATCH 131/163] Launchpad automatic translations update.
---
setup/iso_639/af.po | 6 +-
setup/iso_639/am.po | 6 +-
setup/iso_639/ar.po | 6 +-
setup/iso_639/az.po | 6 +-
setup/iso_639/bg.po | 6 +-
setup/iso_639/bn_IN.po | 6 +-
setup/iso_639/br.po | 6 +-
setup/iso_639/bs.po | 6 +-
setup/iso_639/byn.po | 6 +-
setup/iso_639/ca.po | 6 +-
setup/iso_639/crh.po | 6 +-
setup/iso_639/cs.po | 6 +-
setup/iso_639/cy.po | 6 +-
setup/iso_639/da.po | 6 +-
setup/iso_639/de.po | 6 +-
setup/iso_639/el.po | 6 +-
setup/iso_639/eo.po | 6 +-
setup/iso_639/es.po | 1334 ++++++------
setup/iso_639/et.po | 6 +-
setup/iso_639/eu.po | 6 +-
setup/iso_639/fa.po | 6 +-
setup/iso_639/fi.po | 6 +-
setup/iso_639/fr.po | 6 +-
setup/iso_639/ga.po | 6 +-
setup/iso_639/gez.po | 6 +-
setup/iso_639/gl.po | 6 +-
setup/iso_639/gu.po | 6 +-
setup/iso_639/he.po | 6 +-
setup/iso_639/hi.po | 6 +-
setup/iso_639/hr.po | 6 +-
setup/iso_639/hu.po | 6 +-
setup/iso_639/id.po | 6 +-
setup/iso_639/is.po | 6 +-
setup/iso_639/it.po | 6 +-
setup/iso_639/ja.po | 6 +-
setup/iso_639/kn.po | 6 +-
setup/iso_639/ko.po | 6 +-
setup/iso_639/kok.po | 6 +-
setup/iso_639/lt.po | 6 +-
setup/iso_639/lv.po | 6 +-
setup/iso_639/mi.po | 6 +-
setup/iso_639/mk.po | 6 +-
setup/iso_639/mn.po | 6 +-
setup/iso_639/mr.po | 6 +-
setup/iso_639/ms.po | 6 +-
setup/iso_639/mt.po | 6 +-
setup/iso_639/nb.po | 6 +-
setup/iso_639/nl.po | 6 +-
setup/iso_639/nn.po | 6 +-
setup/iso_639/nso.po | 6 +-
setup/iso_639/oc.po | 6 +-
setup/iso_639/or.po | 6 +-
setup/iso_639/pa.po | 6 +-
setup/iso_639/pl.po | 6 +-
setup/iso_639/ps.po | 6 +-
setup/iso_639/pt.po | 6 +-
setup/iso_639/pt_BR.po | 6 +-
setup/iso_639/ro.po | 6 +-
setup/iso_639/ru.po | 6 +-
setup/iso_639/rw.po | 6 +-
setup/iso_639/sk.po | 6 +-
setup/iso_639/sl.po | 6 +-
setup/iso_639/sr.po | 6 +-
setup/iso_639/sr@latin.po | 6 +-
setup/iso_639/sv.po | 6 +-
setup/iso_639/ta.po | 6 +-
setup/iso_639/th.po | 6 +-
setup/iso_639/ti.po | 6 +-
setup/iso_639/tig.po | 6 +-
setup/iso_639/tr.po | 4 +-
setup/iso_639/tt.po | 6 +-
setup/iso_639/uk.po | 6 +-
setup/iso_639/ve.po | 6 +-
setup/iso_639/vi.po | 6 +-
setup/iso_639/wa.po | 6 +-
setup/iso_639/xh.po | 6 +-
setup/iso_639/zh_CN.po | 6 +-
setup/iso_639/zh_TW.po | 6 +-
setup/iso_639/zu.po | 6 +-
src/calibre/translations/af.po | 566 +++---
src/calibre/translations/ar.po | 572 +++---
src/calibre/translations/ast.po | 566 +++---
src/calibre/translations/az.po | 566 +++---
src/calibre/translations/bg.po | 566 +++---
src/calibre/translations/bn.po | 566 +++---
src/calibre/translations/br.po | 566 +++---
src/calibre/translations/bs.po | 566 +++---
src/calibre/translations/ca.po | 560 +++---
src/calibre/translations/cs.po | 560 +++---
src/calibre/translations/da.po | 560 +++---
src/calibre/translations/de.po | 639 +++---
src/calibre/translations/el.po | 566 +++---
src/calibre/translations/en_AU.po | 566 +++---
src/calibre/translations/en_CA.po | 566 +++---
src/calibre/translations/en_GB.po | 3129 ++++++++++++++++++++---------
src/calibre/translations/eo.po | 566 +++---
src/calibre/translations/es.po | 560 +++---
src/calibre/translations/et.po | 566 +++---
src/calibre/translations/eu.po | 560 +++---
src/calibre/translations/fa.po | 566 +++---
src/calibre/translations/fi.po | 566 +++---
src/calibre/translations/fo.po | 566 +++---
src/calibre/translations/fr.po | 587 +++---
src/calibre/translations/gl.po | 560 +++---
src/calibre/translations/gu.po | 566 +++---
src/calibre/translations/he.po | 566 +++---
src/calibre/translations/hi.po | 566 +++---
src/calibre/translations/hr.po | 560 +++---
src/calibre/translations/hu.po | 558 ++---
src/calibre/translations/id.po | 566 +++---
src/calibre/translations/it.po | 585 +++---
src/calibre/translations/ja.po | 558 ++---
src/calibre/translations/kn.po | 566 +++---
src/calibre/translations/ko.po | 560 +++---
src/calibre/translations/lt.po | 566 +++---
src/calibre/translations/ltg.po | 566 +++---
src/calibre/translations/lv.po | 566 +++---
src/calibre/translations/ml.po | 566 +++---
src/calibre/translations/mr.po | 566 +++---
src/calibre/translations/ms.po | 566 +++---
src/calibre/translations/nb.po | 560 +++---
src/calibre/translations/nds.po | 560 +++---
src/calibre/translations/nl.po | 601 +++---
src/calibre/translations/oc.po | 566 +++---
src/calibre/translations/pa.po | 566 +++---
src/calibre/translations/pl.po | 560 +++---
src/calibre/translations/pt.po | 560 +++---
src/calibre/translations/pt_BR.po | 560 +++---
src/calibre/translations/ro.po | 796 +++++---
src/calibre/translations/ru.po | 587 +++---
src/calibre/translations/sc.po | 566 +++---
src/calibre/translations/si.po | 566 +++---
src/calibre/translations/sk.po | 560 +++---
src/calibre/translations/sl.po | 566 +++---
src/calibre/translations/sq.po | 566 +++---
src/calibre/translations/sr.po | 560 +++---
src/calibre/translations/sv.po | 560 +++---
src/calibre/translations/ta.po | 566 +++---
src/calibre/translations/te.po | 566 +++---
src/calibre/translations/th.po | 566 +++---
src/calibre/translations/tr.po | 564 +++---
src/calibre/translations/uk.po | 566 +++---
src/calibre/translations/ur.po | 566 +++---
src/calibre/translations/vi.po | 566 +++---
src/calibre/translations/wa.po | 566 +++---
src/calibre/translations/yi.po | 566 +++---
src/calibre/translations/zh_CN.po | 651 +++---
src/calibre/translations/zh_HK.po | 566 +++---
src/calibre/translations/zh_TW.po | 560 +++---
149 files changed, 24119 insertions(+), 20234 deletions(-)
diff --git a/setup/iso_639/af.po b/setup/iso_639/af.po
index 7ad92a44cb..1b181c3da7 100644
--- a/setup/iso_639/af.po
+++ b/setup/iso_639/af.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:54+0000\n"
"Last-Translator: Ysbeer \n"
"Language-Team: Afrikaans \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:34+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:52+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: af\n"
#. name for aaa
diff --git a/setup/iso_639/am.po b/setup/iso_639/am.po
index a60eab9f4e..9f22522f15 100644
--- a/setup/iso_639/am.po
+++ b/setup/iso_639/am.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:15+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Amharic\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:34+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:53+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/ar.po b/setup/iso_639/ar.po
index fb2932ca11..33542ef4b8 100644
--- a/setup/iso_639/ar.po
+++ b/setup/iso_639/ar.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:57+0000\n"
"Last-Translator: Mohammad Gamal \n"
"Language-Team: Arabic \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:35+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:54+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ar\n"
#. name for aaa
diff --git a/setup/iso_639/az.po b/setup/iso_639/az.po
index 4dcd801571..11eea22e21 100644
--- a/setup/iso_639/az.po
+++ b/setup/iso_639/az.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:38+0000\n"
"Last-Translator: Vasif İsmayıloğlu MD \n"
"Language-Team: Azerbaijani Turkish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:35+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:54+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/bg.po b/setup/iso_639/bg.po
index 78d850654d..2a6c31b84b 100644
--- a/setup/iso_639/bg.po
+++ b/setup/iso_639/bg.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:12+0000\n"
"Last-Translator: Roumen Petrov \n"
"Language-Team: Bulgarian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:37+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:56+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: bg\n"
#. name for aaa
diff --git a/setup/iso_639/bn_IN.po b/setup/iso_639/bn_IN.po
index 690e97765e..a12cd3374b 100644
--- a/setup/iso_639/bn_IN.po
+++ b/setup/iso_639/bn_IN.po
@@ -12,15 +12,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:33+0000\n"
"Last-Translator: runa \n"
"Language-Team: Bengali (India) \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:15+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:33+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/br.po b/setup/iso_639/br.po
index 6d7aba44f1..252c0af5d9 100644
--- a/setup/iso_639/br.po
+++ b/setup/iso_639/br.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:57+0000\n"
"Last-Translator: Kovid Goyal \n"
"Language-Team: Breton \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:37+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:56+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: br\n"
#. name for aaa
diff --git a/setup/iso_639/bs.po b/setup/iso_639/bs.po
index 604d56e1ff..d7f885e6ce 100644
--- a/setup/iso_639/bs.po
+++ b/setup/iso_639/bs.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:15+0000\n"
"Last-Translator: Nesiren Armin \n"
"Language-Team: Bosanski \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:36+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:55+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/byn.po b/setup/iso_639/byn.po
index 4c9d4226b0..ad82262111 100644
--- a/setup/iso_639/byn.po
+++ b/setup/iso_639/byn.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:42+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Blin\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:38+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:57+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/ca.po b/setup/iso_639/ca.po
index 5905784a05..6e7067fc7d 100644
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 16:59+0000\n"
"Last-Translator: FerranRius \n"
"Language-Team: Catalan \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:38+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:57+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ca\n"
#. name for aaa
diff --git a/setup/iso_639/crh.po b/setup/iso_639/crh.po
index 42f7ecefab..9702410505 100644
--- a/setup/iso_639/crh.po
+++ b/setup/iso_639/crh.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:17+0000\n"
"Last-Translator: Reşat SABIQ \n"
"Language-Team: Crimean Tatar \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:39+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:58+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: crh\n"
#. name for aaa
diff --git a/setup/iso_639/cs.po b/setup/iso_639/cs.po
index 53b2883f5f..b8aa1cd1c4 100644
--- a/setup/iso_639/cs.po
+++ b/setup/iso_639/cs.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:48+0000\n"
"Last-Translator: Miroslav Kure \n"
"Language-Team: Czech \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:40+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:58+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: cs\n"
#. name for aaa
diff --git a/setup/iso_639/cy.po b/setup/iso_639/cy.po
index b0f5f738ff..168d1d28ee 100644
--- a/setup/iso_639/cy.po
+++ b/setup/iso_639/cy.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:08+0000\n"
"Last-Translator: Dafydd Tomos \n"
"Language-Team: Welsh \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:13+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:31+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: cy\n"
#. name for aaa
diff --git a/setup/iso_639/da.po b/setup/iso_639/da.po
index dfa629e7d3..4c831e5f84 100644
--- a/setup/iso_639/da.po
+++ b/setup/iso_639/da.po
@@ -16,15 +16,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 09:06+0000\n"
"Last-Translator: Claus Hindsgaul \n"
"Language-Team: Danish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:40+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:59+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: da\n"
#. name for aaa
diff --git a/setup/iso_639/de.po b/setup/iso_639/de.po
index 8b31d2c89a..b47007f745 100644
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@@ -17,15 +17,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:01+0000\n"
"Last-Translator: Wolfgang Rohdewald \n"
"Language-Team: German \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:44+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:03+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: de\n"
#. name for aaa
diff --git a/setup/iso_639/el.po b/setup/iso_639/el.po
index e6e519d4e6..6e3f886169 100644
--- a/setup/iso_639/el.po
+++ b/setup/iso_639/el.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:47+0000\n"
"Last-Translator: Thanos Lefteris \n"
"Language-Team: Greek \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:46+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:05+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: el\n"
#. name for aaa
diff --git a/setup/iso_639/eo.po b/setup/iso_639/eo.po
index ff6af66da8..b38aea1434 100644
--- a/setup/iso_639/eo.po
+++ b/setup/iso_639/eo.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:01+0000\n"
"Last-Translator: Edmund GRIMLEY EVANS \n"
"Language-Team: Esperanto \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:41+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:00+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: eo\n"
#. name for aaa
diff --git a/setup/iso_639/es.po b/setup/iso_639/es.po
index 783b8afd04..a97856fc6a 100644
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
@@ -7,14 +7,14 @@ msgid ""
msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
-"PO-Revision-Date: 2011-09-01 22:28+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
+"PO-Revision-Date: 2011-09-02 22:41+0000\n"
"Last-Translator: Alejandro Pérez \n"
"Language-Team: Spanish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-02 04:40+0000\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:25+0000\n"
"X-Generator: Launchpad (build 13830)\n"
#. name for aaa
@@ -23,7 +23,7 @@ msgstr "Ghotuo"
#. name for aab
msgid "Alumu-Tesu"
-msgstr "Alumu-Tesu"
+msgstr "Alumu-tesu"
#. name for aac
msgid "Ari"
@@ -47,11 +47,11 @@ msgstr "Ambrak"
#. name for aah
msgid "Arapesh, Abu'"
-msgstr "Arapesh, Abu'"
+msgstr "Arapesh abu'"
#. name for aai
msgid "Arifama-Miniafia"
-msgstr "Arifama-Miniafia"
+msgstr "Arifama-miniafia"
#. name for aak
msgid "Ankave"
@@ -103,7 +103,7 @@ msgstr "Solong"
#. name for aax
msgid "Mandobo Atas"
-msgstr "Mandobo Atas"
+msgstr "Mandobo atas"
#. name for aaz
msgid "Amarasi"
@@ -131,7 +131,7 @@ msgstr "Abenaki occidental"
#. name for abf
msgid "Abai Sungai"
-msgstr "Abai Sungai"
+msgstr "Abai sungai"
#. name for abg
msgid "Abaga"
@@ -147,7 +147,7 @@ msgstr "Abidji"
#. name for abj
msgid "Aka-Bea"
-msgstr "Aka-Bea"
+msgstr "Aka-bea"
#. name for abk
msgid "Abkhazian"
@@ -155,7 +155,7 @@ msgstr "Abjasio"
#. name for abl
msgid "Lampung Nyo"
-msgstr "Lampung Nyo"
+msgstr "Lampung nyo"
#. name for abm
msgid "Abanyom"
@@ -207,7 +207,7 @@ msgstr "Inabaknon"
#. name for aby
msgid "Aneme Wake"
-msgstr "Aneme Wake"
+msgstr "Aneme wake"
#. name for abz
msgid "Abui"
@@ -239,15 +239,15 @@ msgstr "Acholi"
#. name for aci
msgid "Aka-Cari"
-msgstr "Aka-Cari"
+msgstr "Aka-cari"
#. name for ack
msgid "Aka-Kora"
-msgstr "Aka-Kora"
+msgstr "Aka-kora"
#. name for acl
msgid "Akar-Bale"
-msgstr "Akar-Bale"
+msgstr "Akar-bale"
#. name for acm
msgid "Arabic, Mesopotamian"
@@ -455,7 +455,7 @@ msgstr "Aeka"
#. name for afb
msgid "Arabic, Gulf"
-msgstr "Árabe del Golfo Pérsico"
+msgstr "Árabe del golfo Pérsico"
#. name for afd
msgid "Andai"
@@ -631,7 +631,7 @@ msgstr "Aghu"
#. name for ahi
msgid "Aizi, Tiagbamrin"
-msgstr "Aizi, Tiagbamrin"
+msgstr "Aizi tiagbamrin"
#. name for ahk
msgid "Akha"
@@ -643,7 +643,7 @@ msgstr "Igo"
#. name for ahm
msgid "Aizi, Mobumrin"
-msgstr "Aizi, Mobumrin"
+msgstr "Aizi mobumrin"
#. name for ahn
msgid "Àhàn"
@@ -655,7 +655,7 @@ msgstr "Ahom"
#. name for ahp
msgid "Aizi, Aproumu"
-msgstr "Aizi, Aproumu"
+msgstr "Aizi aproumu"
#. name for ahr
msgid "Ahirani"
@@ -699,7 +699,7 @@ msgstr "Inglés criollo antiguano"
#. name for aih
msgid "Ai-Cham"
-msgstr "Ai-Cham"
+msgstr "Ai-cham"
#. name for aii
msgid "Neo-Aramaic, Assyrian"
@@ -707,7 +707,7 @@ msgstr "Arameo moderno asirio"
#. name for aij
msgid "Lishanid Noshan"
-msgstr "Lishanid Noshan"
+msgstr "Lishanid noshan"
#. name for aik
msgid "Ake"
@@ -743,7 +743,7 @@ msgstr "Airoran"
#. name for ais
msgid "Amis, Nataoran"
-msgstr "Amis, Nataoran"
+msgstr "Amis nataoran"
#. name for ait
msgid "Arikem"
@@ -767,7 +767,7 @@ msgstr "Aja (Sudán)"
#. name for ajg
msgid "Aja (Benin)"
-msgstr "Aja (Benin)"
+msgstr "Aja (Benín)"
#. name for aji
msgid "Ajië"
@@ -791,7 +791,7 @@ msgstr "Ajawa"
#. name for ajz
msgid "Karbi, Amri"
-msgstr "Karbi, Amri"
+msgstr "Amri karbi"
#. name for aka
msgid "Akan"
@@ -799,7 +799,7 @@ msgstr "Acano"
#. name for akb
msgid "Batak Angkola"
-msgstr "Batak Angkola"
+msgstr "Batak angkola"
#. name for akc
msgid "Mpur"
@@ -807,7 +807,7 @@ msgstr "Mpur"
#. name for akd
msgid "Ukpet-Ehom"
-msgstr "Ukpet-Ehom"
+msgstr "Ukpet-ehom"
#. name for ake
msgid "Akawaio"
@@ -823,7 +823,7 @@ msgstr "Anakalangu"
#. name for akh
msgid "Angal Heneng"
-msgstr "Angal Heneng"
+msgstr "Angal heneng"
#. name for aki
msgid "Aiome"
@@ -831,7 +831,7 @@ msgstr "Aiome"
#. name for akj
msgid "Aka-Jeru"
-msgstr "Aka-Jeru"
+msgstr "Aka-jeru"
#. name for akk
msgid "Akkadian"
@@ -843,7 +843,7 @@ msgstr "Aklanon"
#. name for akm
msgid "Aka-Bo"
-msgstr "Aka-Bo"
+msgstr "Aka-bo"
#. name for ako
msgid "Akurio"
@@ -883,11 +883,11 @@ msgstr "Akwa"
#. name for akx
msgid "Aka-Kede"
-msgstr "Aka-Kede"
+msgstr "Aka-kede"
#. name for aky
msgid "Aka-Kol"
-msgstr "Aka-Kol"
+msgstr "Aka-kol"
#. name for akz
msgid "Alabama"
@@ -943,7 +943,7 @@ msgstr "Albanés guego"
#. name for alo
msgid "Larike-Wakasihu"
-msgstr "Larike-Wakasihu"
+msgstr "Larike-wakasihu"
#. name for alp
msgid "Alune"
@@ -971,7 +971,7 @@ msgstr "'Are'are"
#. name for alw
msgid "Alaba-K’abeena"
-msgstr "Alaba-K’abeena"
+msgstr "Alaba-k'abeena"
#. name for alx
msgid "Amol"
@@ -1003,7 +1003,7 @@ msgstr "Yanesha'"
#. name for amf
msgid "Hamer-Banna"
-msgstr "Hamer-Banna"
+msgstr "Hamer-banna"
#. name for amg
msgid "Amarag"
@@ -1027,11 +1027,11 @@ msgstr "Ambai"
#. name for aml
msgid "War-Jaintia"
-msgstr "War-Jaintia"
+msgstr "War-jaintia"
#. name for amm
msgid "Ama (Papua New Guinea)"
-msgstr "Ama (Papua Nueva Guinea)"
+msgstr "Ama (Papúa Nueva Guinea)"
#. name for amn
msgid "Amanab"
@@ -1179,7 +1179,7 @@ msgstr "Anaang"
#. name for anx
msgid "Andra-Hus"
-msgstr "Andra-Hus"
+msgstr "Andra-hus"
#. name for any
msgid "Anyin"
@@ -1207,7 +1207,7 @@ msgstr "Andarum"
#. name for aoe
msgid "Angal Enen"
-msgstr "Angal Enen"
+msgstr "Angal enen"
#. name for aof
msgid "Bragat"
@@ -1243,7 +1243,7 @@ msgstr "Ömie"
#. name for aon
msgid "Arapesh, Bumbita"
-msgstr "Arapesh, Bumbita"
+msgstr "Arapesh bumbita"
#. name for aor
msgid "Aore"
@@ -1263,7 +1263,7 @@ msgstr "Atorada"
#. name for aoz
msgid "Uab Meto"
-msgstr "Uab Meto"
+msgstr "Uab meto"
#. name for apb
msgid "Sa'a"
@@ -1283,7 +1283,7 @@ msgstr "Bukiyip"
#. name for apf
msgid "Agta, Pahanan"
-msgstr "Agta, Pahanan"
+msgstr "Agta pahanan"
#. name for apg
msgid "Ampanang"
@@ -1327,15 +1327,15 @@ msgstr "Apma"
#. name for apq
msgid "A-Pucikwar"
-msgstr "A-Pucikwar"
+msgstr "A-pucikwar"
#. name for apr
msgid "Arop-Lokep"
-msgstr "Arop-Lokep"
+msgstr "Arop-lokep"
#. name for aps
msgid "Arop-Sissano"
-msgstr "Arop-Sissano"
+msgstr "Arop-sissano"
#. name for apt
msgid "Apatani"
@@ -1371,7 +1371,7 @@ msgstr "Archi"
#. name for aqd
msgid "Dogon, Ampari"
-msgstr "Dogon, Ampari"
+msgstr "Dogon ampari"
#. name for aqg
msgid "Arigidi"
@@ -1467,7 +1467,7 @@ msgstr "Árabe naydí"
#. name for aru
msgid "Aruá (Amazonas State)"
-msgstr "Aruá (Estado de Amazonas)"
+msgstr "Aruá (estado de Amazonas)"
#. name for arv
msgid "Arbore"
@@ -1479,7 +1479,7 @@ msgstr "Arahuaco"
#. name for arx
msgid "Aruá (Rodonia State)"
-msgstr "Aruá (Estado de Rodonia)"
+msgstr "Aruá (estado de Rondonia)"
#. name for ary
msgid "Arabic, Moroccan"
@@ -1499,7 +1499,7 @@ msgstr "Assiniboine"
#. name for asc
msgid "Asmat, Casuarina Coast"
-msgstr "Asmat, costa Casuarina"
+msgstr "Asmat de la costa Casuarina"
#. name for asd
msgid "Asas"
@@ -1543,7 +1543,7 @@ msgstr "Asamés"
#. name for asn
msgid "Asuriní, Xingú"
-msgstr "Asuriní, Xingú"
+msgstr "Asuriní de Xingú"
#. name for aso
msgid "Dano"
@@ -1571,7 +1571,7 @@ msgstr "Asturiano"
#. name for asu
msgid "Asurini, Tocantins"
-msgstr "Asurini, Tocantins"
+msgstr "Asuriní de Tocantins"
#. name for asv
msgid "Asoa"
@@ -1587,7 +1587,7 @@ msgstr "Muratayak"
#. name for asy
msgid "Asmat, Yaosakor"
-msgstr "Asmat, Yaosakor"
+msgstr "Asmat yaosakor"
#. name for asz
msgid "As"
@@ -1595,7 +1595,7 @@ msgstr "As"
#. name for ata
msgid "Pele-Ata"
-msgstr "Pele-Ata"
+msgstr "Pele-ata"
#. name for atb
msgid "Zaiwa"
@@ -1607,7 +1607,7 @@ msgstr "Atsahuaca"
#. name for atd
msgid "Manobo, Ata"
-msgstr "Manobo, Ata"
+msgstr "Ata manobo"
#. name for ate
msgid "Atemble"
@@ -1647,15 +1647,15 @@ msgstr "Atong"
#. name for atp
msgid "Atta, Pudtol"
-msgstr "Atta, Pudtol"
+msgstr "Atta de Pudtol"
#. name for atq
msgid "Aralle-Tabulahan"
-msgstr "Aralle-Tabulahan"
+msgstr "Aralle-tabulahan"
#. name for atr
msgid "Waimiri-Atroari"
-msgstr "Waimiri-Atroari"
+msgstr "Waimiri-atroari"
#. name for ats
msgid "Gros Ventre"
@@ -1663,7 +1663,7 @@ msgstr "Atsina"
#. name for att
msgid "Atta, Pamplona"
-msgstr "Atta, Pamplona"
+msgstr "Atta de Pamplona"
#. name for atu
msgid "Reel"
@@ -1739,7 +1739,7 @@ msgstr "Asu (Nigeria)"
#. name for aun
msgid "One, Molmo"
-msgstr "One, Molmo"
+msgstr "One molmo"
#. name for auo
msgid "Auyokawa"
@@ -1791,7 +1791,7 @@ msgstr "Avau"
#. name for avd
msgid "Alviri-Vidari"
-msgstr "Alviri-Vidari"
+msgstr "Alviri-vidari"
#. name for ave
msgid "Avestan"
@@ -1823,15 +1823,15 @@ msgstr "Aushiri"
#. name for avt
msgid "Au"
-msgstr ""
+msgstr "Au"
#. name for avu
msgid "Avokaya"
-msgstr ""
+msgstr "Avokaya"
#. name for avv
msgid "Avá-Canoeiro"
-msgstr ""
+msgstr "Avá-canoeiro"
#. name for awa
msgid "Awadhi"
@@ -1839,7 +1839,7 @@ msgstr "Awadhi"
#. name for awb
msgid "Awa (Papua New Guinea)"
-msgstr ""
+msgstr "Awa (Papúa Nueva Guinea)"
#. name for awc
msgid "Cicipu"
@@ -1847,11 +1847,11 @@ msgstr "Cicipu"
#. name for awe
msgid "Awetí"
-msgstr ""
+msgstr "Awetí"
#. name for awh
msgid "Awbono"
-msgstr ""
+msgstr "Awbono"
#. name for awi
msgid "Aekyom"
@@ -1915,7 +1915,7 @@ msgstr "Arara de Mato Grosso"
#. name for axk
msgid "Yaka (Central African Republic)"
-msgstr ""
+msgstr "Yaka (República Centroafricana)"
#. name for axm
msgid "Armenian, Middle"
@@ -1931,7 +1931,7 @@ msgstr "Awar"
#. name for ayb
msgid "Gbe, Ayizo"
-msgstr "Gbe, Ayizo"
+msgstr "Ayizo-gbe"
#. name for ayc
msgid "Aymara, Southern"
@@ -1983,7 +1983,7 @@ msgstr "Árabe normesopotámico"
#. name for ayq
msgid "Ayi (Papua New Guinea)"
-msgstr "Ayi (Papua Nueva Guinea)"
+msgstr "Ayi (Papúa Nueva Guinea)"
#. name for ayr
msgid "Aymara, Central"
@@ -2007,7 +2007,7 @@ msgstr "Aeta tayabas"
#. name for ayz
msgid "Mai Brat"
-msgstr "Mai Brat"
+msgstr "Mai brat"
#. name for aza
msgid "Azha"
@@ -2039,7 +2039,7 @@ msgstr "Awing"
#. name for azt
msgid "Atta, Faire"
-msgstr "Atta, Faire"
+msgstr "Atta de Faire"
#. name for azz
msgid "Nahuatl, Highland Puebla"
@@ -2051,7 +2051,7 @@ msgstr "Babatana"
#. name for bab
msgid "Bainouk-Gunyuño"
-msgstr "Bainouk-Gunyuño"
+msgstr "Bainouk-gunyuño"
#. name for bac
msgid "Badui"
@@ -2119,7 +2119,7 @@ msgstr "Vengo"
#. name for baw
msgid "Bambili-Bambui"
-msgstr "Bambili-Bambui"
+msgstr "Bambili-bambui"
#. name for bax
msgid "Bamun"
@@ -2143,7 +2143,7 @@ msgstr "Barai"
#. name for bbc
msgid "Batak Toba"
-msgstr "Batak Toba"
+msgstr "Batak toba"
#. name for bbd
msgid "Bau"
@@ -2243,7 +2243,7 @@ msgstr "Bai central"
#. name for bcb
msgid "Bainouk-Samik"
-msgstr "Bainouk-Samik"
+msgstr "Bainouk-samik"
#. name for bcc
msgid "Balochi, Southern"
@@ -2263,7 +2263,7 @@ msgstr "Bamu"
#. name for bcg
msgid "Baga Binari"
-msgstr "Baga Binari"
+msgstr "Baga binari"
#. name for bch
msgid "Bariai"
@@ -2299,11 +2299,11 @@ msgstr "Kaluli"
#. name for bcp
msgid "Bali (Democratic Republic of Congo)"
-msgstr ""
+msgstr "Bali (República Democrática del Congo)"
#. name for bcq
msgid "Bench"
-msgstr ""
+msgstr "Bench"
#. name for bcr
msgid "Babine"
@@ -2319,11 +2319,11 @@ msgstr "Bendi"
#. name for bcu
msgid "Awad Bing"
-msgstr "Awad Bing"
+msgstr "Awad bing"
#. name for bcv
msgid "Shoo-Minda-Nye"
-msgstr "Shoo-Minda-Nye"
+msgstr "Shoo-minda-nye"
#. name for bcw
msgid "Bana"
@@ -2335,7 +2335,7 @@ msgstr "Bacama"
#. name for bcz
msgid "Bainouk-Gunyaamolo"
-msgstr "Bainouk-Gunyaamolo"
+msgstr "Bainouk-gunyaamolo"
#. name for bda
msgid "Bayot"
@@ -2347,7 +2347,7 @@ msgstr "Basap"
#. name for bdc
msgid "Emberá-Baudó"
-msgstr "Emberá-Baudó"
+msgstr "Emberá-baudó"
#. name for bdd
msgid "Bunama"
@@ -2403,7 +2403,7 @@ msgstr "Bende"
#. name for bdq
msgid "Bahnar"
-msgstr ""
+msgstr "Bahnar"
#. name for bdr
msgid "Bajau, West Coast"
@@ -2411,71 +2411,71 @@ msgstr "Bajau costero occidental"
#. name for bds
msgid "Burunge"
-msgstr ""
+msgstr "Burunge"
#. name for bdt
msgid "Bokoto"
-msgstr ""
+msgstr "Bokoto"
#. name for bdu
msgid "Oroko"
-msgstr ""
+msgstr "Oroko"
#. name for bdv
msgid "Bodo Parja"
-msgstr ""
+msgstr "Bodo parja"
#. name for bdw
msgid "Baham"
-msgstr ""
+msgstr "Baham"
#. name for bdx
msgid "Budong-Budong"
-msgstr ""
+msgstr "Budong-budong"
#. name for bdy
msgid "Bandjalang"
-msgstr ""
+msgstr "Bandjalang"
#. name for bdz
msgid "Badeshi"
-msgstr ""
+msgstr "Badeshi"
#. name for bea
msgid "Beaver"
-msgstr ""
+msgstr "Beaver"
#. name for beb
msgid "Bebele"
-msgstr ""
+msgstr "Bebele"
#. name for bec
msgid "Iceve-Maci"
-msgstr ""
+msgstr "Iceve-maci"
#. name for bed
msgid "Bedoanas"
-msgstr ""
+msgstr "Bedoanas"
#. name for bee
msgid "Byangsi"
-msgstr ""
+msgstr "Byangsi"
#. name for bef
msgid "Benabena"
-msgstr ""
+msgstr "Benabena"
#. name for beg
msgid "Belait"
-msgstr ""
+msgstr "Belait"
#. name for beh
msgid "Biali"
-msgstr ""
+msgstr "Biali"
#. name for bei
msgid "Bekati'"
-msgstr ""
+msgstr "Bekati'"
#. name for bej
msgid "Beja"
@@ -2483,7 +2483,7 @@ msgstr "Beya"
#. name for bek
msgid "Bebeli"
-msgstr ""
+msgstr "Bebeli"
#. name for bel
msgid "Belarusian"
@@ -2491,7 +2491,7 @@ msgstr "Bielorruso"
#. name for bem
msgid "Bemba (Zambia)"
-msgstr ""
+msgstr "Bemba (Zambia)"
#. name for ben
msgid "Bengali"
@@ -2499,55 +2499,55 @@ msgstr "Bengalí"
#. name for beo
msgid "Beami"
-msgstr ""
+msgstr "Beami"
#. name for bep
msgid "Besoa"
-msgstr ""
+msgstr "Besoa"
#. name for beq
msgid "Beembe"
-msgstr ""
+msgstr "Beembe"
#. name for bes
msgid "Besme"
-msgstr ""
+msgstr "Besme"
#. name for bet
msgid "Béte, Guiberoua"
-msgstr ""
+msgstr "Beté de Guiberoua"
#. name for beu
msgid "Blagar"
-msgstr ""
+msgstr "Blagar"
#. name for bev
msgid "Bété, Daloa"
-msgstr ""
+msgstr "Beté de Daloa"
#. name for bew
msgid "Betawi"
-msgstr ""
+msgstr "Betawi"
#. name for bex
msgid "Jur Modo"
-msgstr ""
+msgstr "Jur modo"
#. name for bey
msgid "Beli (Papua New Guinea)"
-msgstr ""
+msgstr "Beli (Papúa Nueva Guinea)"
#. name for bez
msgid "Bena (Tanzania)"
-msgstr ""
+msgstr "Bena (Tanzania)"
#. name for bfa
msgid "Bari"
-msgstr ""
+msgstr "Bari"
#. name for bfb
msgid "Bareli, Pauri"
-msgstr ""
+msgstr "Bareli pauri"
#. name for bfc
msgid "Bai, Northern"
@@ -2555,23 +2555,23 @@ msgstr "Bai septentrional"
#. name for bfd
msgid "Bafut"
-msgstr ""
+msgstr "Bafut"
#. name for bfe
msgid "Betaf"
-msgstr ""
+msgstr "Betaf"
#. name for bff
msgid "Bofi"
-msgstr ""
+msgstr "Bofi"
#. name for bfg
msgid "Kayan, Busang"
-msgstr ""
+msgstr "Kayan busang"
#. name for bfh
msgid "Blafe"
-msgstr ""
+msgstr "Blafe"
#. name for bfi
msgid "British Sign Language"
@@ -2579,7 +2579,7 @@ msgstr "Lengua de signos británica"
#. name for bfj
msgid "Bafanji"
-msgstr ""
+msgstr "Bafanji"
#. name for bfk
msgid "Ban Khor Sign Language"
@@ -2587,31 +2587,31 @@ msgstr "Lengua de signos de Ban Khor"
#. name for bfl
msgid "Banda-Ndélé"
-msgstr ""
+msgstr "Banda-ndélé"
#. name for bfm
msgid "Mmen"
-msgstr ""
+msgstr "Mmen"
#. name for bfn
msgid "Bunak"
-msgstr ""
+msgstr "Bunak"
#. name for bfo
msgid "Birifor, Malba"
-msgstr ""
+msgstr "Birifor malba"
#. name for bfp
msgid "Beba"
-msgstr ""
+msgstr "Beba"
#. name for bfq
msgid "Badaga"
-msgstr ""
+msgstr "Badaga"
#. name for bfr
msgid "Bazigar"
-msgstr ""
+msgstr "Bazigar"
#. name for bfs
msgid "Bai, Southern"
@@ -2619,75 +2619,75 @@ msgstr "Bai meridional"
#. name for bft
msgid "Balti"
-msgstr ""
+msgstr "Balti"
#. name for bfu
msgid "Gahri"
-msgstr ""
+msgstr "Gahri"
#. name for bfw
msgid "Bondo"
-msgstr ""
+msgstr "Bondo"
#. name for bfx
msgid "Bantayanon"
-msgstr ""
+msgstr "Bantayanon"
#. name for bfy
msgid "Bagheli"
-msgstr ""
+msgstr "Bagheli"
#. name for bfz
msgid "Pahari, Mahasu"
-msgstr ""
+msgstr "Pahari mahasu"
#. name for bga
msgid "Gwamhi-Wuri"
-msgstr ""
+msgstr "Gwamhi-wuri"
#. name for bgb
msgid "Bobongko"
-msgstr ""
+msgstr "Bobongko"
#. name for bgc
msgid "Haryanvi"
-msgstr ""
+msgstr "Haryanvi"
#. name for bgd
msgid "Bareli, Rathwi"
-msgstr ""
+msgstr "Bareli rathwi"
#. name for bge
msgid "Bauria"
-msgstr ""
+msgstr "Bauria"
#. name for bgf
msgid "Bangandu"
-msgstr ""
+msgstr "Bangandu"
#. name for bgg
msgid "Bugun"
-msgstr ""
+msgstr "Bugun"
#. name for bgi
msgid "Giangan"
-msgstr ""
+msgstr "Giangan"
#. name for bgj
msgid "Bangolan"
-msgstr ""
+msgstr "Bangolan"
#. name for bgk
msgid "Bit"
-msgstr ""
+msgstr "Bit"
#. name for bgl
msgid "Bo (Laos)"
-msgstr ""
+msgstr "Bo (Laos)"
#. name for bgm
msgid "Baga Mboteni"
-msgstr ""
+msgstr "Baga mboteni"
#. name for bgn
msgid "Balochi, Western"
@@ -2695,7 +2695,7 @@ msgstr "Baluchi occidental"
#. name for bgo
msgid "Baga Koga"
-msgstr ""
+msgstr "Baga koga"
#. name for bgp
msgid "Balochi, Eastern"
@@ -2703,31 +2703,31 @@ msgstr "Baluchi oriental"
#. name for bgq
msgid "Bagri"
-msgstr ""
+msgstr "Bagri"
#. name for bgr
msgid "Chin, Bawm"
-msgstr ""
+msgstr "Chin bawm"
#. name for bgs
msgid "Tagabawa"
-msgstr ""
+msgstr "Tagabawa"
#. name for bgt
msgid "Bughotu"
-msgstr ""
+msgstr "Bughotu"
#. name for bgu
msgid "Mbongno"
-msgstr ""
+msgstr "Mbongno"
#. name for bgv
msgid "Warkay-Bipim"
-msgstr ""
+msgstr "Warkay-bipim"
#. name for bgw
msgid "Bhatri"
-msgstr ""
+msgstr "Bhatri"
#. name for bgx
msgid "Turkish, Balkan Gagauz"
@@ -2735,59 +2735,59 @@ msgstr "Turco balcánico"
#. name for bgy
msgid "Benggoi"
-msgstr ""
+msgstr "Benggoi"
#. name for bgz
msgid "Banggai"
-msgstr ""
+msgstr "Banggai"
#. name for bha
msgid "Bharia"
-msgstr ""
+msgstr "Bharia"
#. name for bhb
msgid "Bhili"
-msgstr ""
+msgstr "Bhili"
#. name for bhc
msgid "Biga"
-msgstr ""
+msgstr "Biga"
#. name for bhd
msgid "Bhadrawahi"
-msgstr ""
+msgstr "Bhadrawahi"
#. name for bhe
msgid "Bhaya"
-msgstr ""
+msgstr "Bhaya"
#. name for bhf
msgid "Odiai"
-msgstr ""
+msgstr "Odiai"
#. name for bhg
msgid "Binandere"
-msgstr ""
+msgstr "Binandere"
#. name for bhh
msgid "Bukharic"
-msgstr ""
+msgstr "Bukharic"
#. name for bhi
msgid "Bhilali"
-msgstr ""
+msgstr "Bhilali"
#. name for bhj
msgid "Bahing"
-msgstr ""
+msgstr "Bahing"
#. name for bhl
msgid "Bimin"
-msgstr ""
+msgstr "Bimin"
#. name for bhm
msgid "Bathari"
-msgstr ""
+msgstr "Bathari"
#. name for bhn
msgid "Neo-Aramaic, Bohtan"
@@ -2799,7 +2799,7 @@ msgstr "Bopurí"
#. name for bhp
msgid "Bima"
-msgstr ""
+msgstr "Bima"
#. name for bhq
msgid "Tukang Besi South"
@@ -2811,67 +2811,67 @@ msgstr "Malgache bara"
#. name for bhs
msgid "Buwal"
-msgstr ""
+msgstr "Buwal"
#. name for bht
msgid "Bhattiyali"
-msgstr ""
+msgstr "Bhattiyali"
#. name for bhu
msgid "Bhunjia"
-msgstr ""
+msgstr "Bhunjia"
#. name for bhv
msgid "Bahau"
-msgstr ""
+msgstr "Bahau"
#. name for bhw
msgid "Biak"
-msgstr ""
+msgstr "Biak"
#. name for bhx
msgid "Bhalay"
-msgstr ""
+msgstr "Bhalay"
#. name for bhy
msgid "Bhele"
-msgstr ""
+msgstr "Bhele"
#. name for bhz
msgid "Bada (Indonesia)"
-msgstr ""
+msgstr "Bada (Indonesia)"
#. name for bia
msgid "Badimaya"
-msgstr ""
+msgstr "Badimaya"
#. name for bib
msgid "Bissa"
-msgstr ""
+msgstr "Bissa"
#. name for bic
msgid "Bikaru"
-msgstr ""
+msgstr "Bikaru"
#. name for bid
msgid "Bidiyo"
-msgstr ""
+msgstr "Bidiyo"
#. name for bie
msgid "Bepour"
-msgstr ""
+msgstr "Bepour"
#. name for bif
msgid "Biafada"
-msgstr ""
+msgstr "Biafada"
#. name for big
msgid "Biangai"
-msgstr ""
+msgstr "Biangai"
#. name for bij
msgid "Vaghat-Ya-Bijim-Legeri"
-msgstr ""
+msgstr "Vaghat-ya-bijim-legeri"
#. name for bik
msgid "Bikol"
@@ -2879,11 +2879,11 @@ msgstr "Bicolano"
#. name for bil
msgid "Bile"
-msgstr ""
+msgstr "Bile"
#. name for bim
msgid "Bimoba"
-msgstr ""
+msgstr "Bimoba"
#. name for bin
msgid "Bini"
@@ -2891,19 +2891,19 @@ msgstr "Bini"
#. name for bio
msgid "Nai"
-msgstr ""
+msgstr "Nai"
#. name for bip
msgid "Bila"
-msgstr ""
+msgstr "Bila"
#. name for biq
msgid "Bipi"
-msgstr ""
+msgstr "Bipi"
#. name for bir
msgid "Bisorio"
-msgstr ""
+msgstr "Bisorio"
#. name for bis
msgid "Bislama"
@@ -2911,11 +2911,11 @@ msgstr "Bislama"
#. name for bit
msgid "Berinomo"
-msgstr ""
+msgstr "Berinomo"
#. name for biu
msgid "Biete"
-msgstr ""
+msgstr "Biete"
#. name for biv
msgid "Birifor, Southern"
@@ -2923,39 +2923,39 @@ msgstr "Birifor meridional"
#. name for biw
msgid "Kol (Cameroon)"
-msgstr ""
+msgstr "Kol (Camerún)"
#. name for bix
msgid "Bijori"
-msgstr ""
+msgstr "Bijori"
#. name for biy
msgid "Birhor"
-msgstr ""
+msgstr "Birhor"
#. name for biz
msgid "Baloi"
-msgstr ""
+msgstr "Baloi"
#. name for bja
msgid "Budza"
-msgstr ""
+msgstr "Budza"
#. name for bjb
msgid "Banggarla"
-msgstr ""
+msgstr "Banggarla"
#. name for bjc
msgid "Bariji"
-msgstr ""
+msgstr "Bariji"
#. name for bjd
msgid "Bandjigali"
-msgstr ""
+msgstr "Bandjigali"
#. name for bje
msgid "Mien, Biao-Jiao"
-msgstr ""
+msgstr "Mien biao-jiao"
#. name for bjf
msgid "Neo-Aramaic, Barzani Jewish"
@@ -2963,35 +2963,35 @@ msgstr "Arameo moderno judeo-barzaní"
#. name for bjg
msgid "Bidyogo"
-msgstr ""
+msgstr "Bidyogo"
#. name for bjh
msgid "Bahinemo"
-msgstr ""
+msgstr "Bahinemo"
#. name for bji
msgid "Burji"
-msgstr ""
+msgstr "Burji"
#. name for bjj
msgid "Kanauji"
-msgstr ""
+msgstr "Kanauji"
#. name for bjk
msgid "Barok"
-msgstr ""
+msgstr "Barok"
#. name for bjl
msgid "Bulu (Papua New Guinea)"
-msgstr ""
+msgstr "Bulu (Papúa Nueva Guinea)"
#. name for bjm
msgid "Bajelani"
-msgstr ""
+msgstr "Bajelani"
#. name for bjn
msgid "Banjar"
-msgstr ""
+msgstr "Banjar"
#. name for bjo
msgid "Banda, Mid-Southern"
@@ -2999,103 +2999,103 @@ msgstr "Banda, centromeridional"
#. name for bjr
msgid "Binumarien"
-msgstr ""
+msgstr "Binumarien"
#. name for bjs
msgid "Bajan"
-msgstr ""
+msgstr "Bajan"
#. name for bjt
msgid "Balanta-Ganja"
-msgstr ""
+msgstr "Balanta-ganja"
#. name for bju
msgid "Busuu"
-msgstr ""
+msgstr "Busuu"
#. name for bjv
msgid "Bedjond"
-msgstr ""
+msgstr "Bedjond"
#. name for bjw
msgid "Bakwé"
-msgstr ""
+msgstr "Bakwé"
#. name for bjx
msgid "Itneg, Banao"
-msgstr ""
+msgstr "Itneg banao"
#. name for bjy
msgid "Bayali"
-msgstr ""
+msgstr "Bayali"
#. name for bjz
msgid "Baruga"
-msgstr ""
+msgstr "Baruga"
#. name for bka
msgid "Kyak"
-msgstr ""
+msgstr "Kyak"
#. name for bkc
msgid "Baka (Cameroon)"
-msgstr ""
+msgstr "Baka (Camerún)"
#. name for bkd
msgid "Binukid"
-msgstr ""
+msgstr "Binukid"
#. name for bkf
msgid "Beeke"
-msgstr ""
+msgstr "Beeke"
#. name for bkg
msgid "Buraka"
-msgstr ""
+msgstr "Buraka"
#. name for bkh
msgid "Bakoko"
-msgstr ""
+msgstr "Bakoko"
#. name for bki
msgid "Baki"
-msgstr ""
+msgstr "Baki"
#. name for bkj
msgid "Pande"
-msgstr ""
+msgstr "Pande"
#. name for bkk
msgid "Brokskat"
-msgstr ""
+msgstr "Brokskat"
#. name for bkl
msgid "Berik"
-msgstr ""
+msgstr "Berik"
#. name for bkm
msgid "Kom (Cameroon)"
-msgstr ""
+msgstr "Kom (Camerún)"
#. name for bkn
msgid "Bukitan"
-msgstr ""
+msgstr "Bukitan"
#. name for bko
msgid "Kwa'"
-msgstr ""
+msgstr "Kwa'"
#. name for bkp
msgid "Boko (Democratic Republic of Congo)"
-msgstr ""
+msgstr "Boko (República Democrática del Congo)"
#. name for bkq
msgid "Bakairí"
-msgstr ""
+msgstr "Bakairí"
#. name for bkr
msgid "Bakumpai"
-msgstr ""
+msgstr "Bakumpai"
#. name for bks
msgid "Sorsoganon, Northern"
@@ -3103,31 +3103,31 @@ msgstr ""
#. name for bkt
msgid "Boloki"
-msgstr ""
+msgstr "Boloki"
#. name for bku
msgid "Buhid"
-msgstr ""
+msgstr "Buhid"
#. name for bkv
msgid "Bekwarra"
-msgstr ""
+msgstr "Bekwarra"
#. name for bkw
msgid "Bekwel"
-msgstr ""
+msgstr "Bekwel"
#. name for bkx
msgid "Baikeno"
-msgstr ""
+msgstr "Baikeno"
#. name for bky
msgid "Bokyi"
-msgstr ""
+msgstr "Bokyi"
#. name for bkz
msgid "Bungku"
-msgstr ""
+msgstr "Bungku"
#. name for bla
msgid "Siksika"
@@ -3135,51 +3135,51 @@ msgstr "Siksiká"
#. name for blb
msgid "Bilua"
-msgstr ""
+msgstr "Bilua"
#. name for blc
msgid "Bella Coola"
-msgstr ""
+msgstr "Bella coola"
#. name for bld
msgid "Bolango"
-msgstr ""
+msgstr "Bolango"
#. name for ble
msgid "Balanta-Kentohe"
-msgstr ""
+msgstr "Balanta-kentohe"
#. name for blf
msgid "Buol"
-msgstr ""
+msgstr "Buol"
#. name for blg
msgid "Balau"
-msgstr ""
+msgstr "Balau"
#. name for blh
msgid "Kuwaa"
-msgstr ""
+msgstr "Kuwaa"
#. name for bli
msgid "Bolia"
-msgstr ""
+msgstr "Bolia"
#. name for blj
msgid "Bolongan"
-msgstr ""
+msgstr "Bolongan"
#. name for blk
msgid "Karen, Pa'o"
-msgstr ""
+msgstr "Karen pa'o"
#. name for bll
msgid "Biloxi"
-msgstr ""
+msgstr "Biloxi"
#. name for blm
msgid "Beli (Sudan)"
-msgstr ""
+msgstr "Beli (Sudán)"
#. name for bln
msgid "Bicolano, Southern Catanduanes"
@@ -3187,35 +3187,35 @@ msgstr "Virac"
#. name for blo
msgid "Anii"
-msgstr ""
+msgstr "Anii"
#. name for blp
msgid "Blablanga"
-msgstr ""
+msgstr "Blablanga"
#. name for blq
msgid "Baluan-Pam"
-msgstr ""
+msgstr "Baluan-pam"
#. name for blr
msgid "Blang"
-msgstr ""
+msgstr "Blang"
#. name for bls
msgid "Balaesang"
-msgstr ""
+msgstr "Balaesang"
#. name for blt
msgid "Tai Dam"
-msgstr ""
+msgstr "Tai dam"
#. name for blv
msgid "Bolo"
-msgstr ""
+msgstr "Bolo"
#. name for blw
msgid "Balangao"
-msgstr ""
+msgstr "Balangao"
#. name for blx
msgid "Ayta, Mag-Indi"
@@ -3223,59 +3223,59 @@ msgstr "Aeta mag-indi"
#. name for bly
msgid "Notre"
-msgstr ""
+msgstr "Notre"
#. name for blz
msgid "Balantak"
-msgstr ""
+msgstr "Balantak"
#. name for bma
msgid "Lame"
-msgstr ""
+msgstr "Lame"
#. name for bmb
msgid "Bembe"
-msgstr ""
+msgstr "Bemba"
#. name for bmc
msgid "Biem"
-msgstr ""
+msgstr "Biem"
#. name for bmd
msgid "Manduri, Baga"
-msgstr ""
+msgstr "Baga manduri"
#. name for bme
msgid "Limassa"
-msgstr ""
+msgstr "Limassa"
#. name for bmf
msgid "Bom"
-msgstr ""
+msgstr "Bom"
#. name for bmg
msgid "Bamwe"
-msgstr ""
+msgstr "Bamwe"
#. name for bmh
msgid "Kein"
-msgstr ""
+msgstr "Kein"
#. name for bmi
msgid "Bagirmi"
-msgstr ""
+msgstr "Bagirmi"
#. name for bmj
msgid "Bote-Majhi"
-msgstr ""
+msgstr "Bote-majhi"
#. name for bmk
msgid "Ghayavi"
-msgstr ""
+msgstr "Ghayavi"
#. name for bml
msgid "Bomboli"
-msgstr ""
+msgstr "Bomboli"
#. name for bmm
msgid "Malagasy, Northern Betsimisaraka"
@@ -3283,87 +3283,87 @@ msgstr "Malgache betsimisaraka septentrional"
#. name for bmn
msgid "Bina (Papua New Guinea)"
-msgstr ""
+msgstr "Bina (Papúa Nueva Guinea)"
#. name for bmo
msgid "Bambalang"
-msgstr ""
+msgstr "Bambalang"
#. name for bmp
msgid "Bulgebi"
-msgstr ""
+msgstr "Bulgebi"
#. name for bmq
msgid "Bomu"
-msgstr ""
+msgstr "Bomu"
#. name for bmr
msgid "Muinane"
-msgstr ""
+msgstr "Muinane"
#. name for bms
msgid "Kanuri, Bilma"
-msgstr ""
+msgstr "Kanuri bilma"
#. name for bmt
msgid "Biao Mon"
-msgstr ""
+msgstr "Biao mon"
#. name for bmu
msgid "Somba-Siawari"
-msgstr ""
+msgstr "Somba-siawari"
#. name for bmv
msgid "Bum"
-msgstr ""
+msgstr "Bum"
#. name for bmw
msgid "Bomwali"
-msgstr ""
+msgstr "Bomwali"
#. name for bmx
msgid "Baimak"
-msgstr ""
+msgstr "Baimak"
#. name for bmy
msgid "Bemba (Democratic Republic of Congo)"
-msgstr ""
+msgstr "Bemba (República Democrática del Congo)"
#. name for bmz
msgid "Baramu"
-msgstr ""
+msgstr "Baramu"
#. name for bna
msgid "Bonerate"
-msgstr ""
+msgstr "Bonerate"
#. name for bnb
msgid "Bookan"
-msgstr ""
+msgstr "Bookan"
#. name for bnc
msgid "Bontok"
-msgstr ""
+msgstr "Bontok"
#. name for bnd
msgid "Banda (Indonesia)"
-msgstr ""
+msgstr "Banda (Indonesia)"
#. name for bne
msgid "Bintauna"
-msgstr ""
+msgstr "Bintauna"
#. name for bnf
msgid "Masiwang"
-msgstr ""
+msgstr "Masiwang"
#. name for bng
msgid "Benga"
-msgstr ""
+msgstr "Benga"
#. name for bni
msgid "Bangi"
-msgstr ""
+msgstr "Bangi"
#. name for bnj
msgid "Tawbuid, Eastern"
@@ -3371,71 +3371,71 @@ msgstr ""
#. name for bnk
msgid "Bierebo"
-msgstr ""
+msgstr "Bierebo"
#. name for bnl
msgid "Boon"
-msgstr ""
+msgstr "Boon"
#. name for bnm
msgid "Batanga"
-msgstr ""
+msgstr "Batanga"
#. name for bnn
msgid "Bunun"
-msgstr ""
+msgstr "Bunun"
#. name for bno
msgid "Bantoanon"
-msgstr ""
+msgstr "Bantoanon"
#. name for bnp
msgid "Bola"
-msgstr ""
+msgstr "Bola"
#. name for bnq
msgid "Bantik"
-msgstr ""
+msgstr "Bantik"
#. name for bnr
msgid "Butmas-Tur"
-msgstr ""
+msgstr "Butmas-tur"
#. name for bns
msgid "Bundeli"
-msgstr ""
+msgstr "Bundeli"
#. name for bnu
msgid "Bentong"
-msgstr ""
+msgstr "Bentong"
#. name for bnv
msgid "Bonerif"
-msgstr ""
+msgstr "Bonerif"
#. name for bnw
msgid "Bisis"
-msgstr ""
+msgstr "Bisis"
#. name for bnx
msgid "Bangubangu"
-msgstr ""
+msgstr "Bangubangu"
#. name for bny
msgid "Bintulu"
-msgstr ""
+msgstr "Bintulu"
#. name for bnz
msgid "Beezen"
-msgstr ""
+msgstr "Beezen"
#. name for boa
msgid "Bora"
-msgstr ""
+msgstr "Bora"
#. name for bob
msgid "Aweer"
-msgstr ""
+msgstr "Aweer"
#. name for bod
msgid "Tibetan"
@@ -3443,11 +3443,11 @@ msgstr "Tibetano"
#. name for boe
msgid "Mundabli"
-msgstr ""
+msgstr "Mundabli"
#. name for bof
msgid "Bolon"
-msgstr ""
+msgstr "Bolon"
#. name for bog
msgid "Bamako Sign Language"
@@ -3455,7 +3455,7 @@ msgstr "Lengua de signos de Bamako"
#. name for boh
msgid "Boma"
-msgstr ""
+msgstr "Boma"
#. name for boi
msgid "Barbareño"
@@ -3463,39 +3463,39 @@ msgstr "Barbareño"
#. name for boj
msgid "Anjam"
-msgstr ""
+msgstr "Anjam"
#. name for bok
msgid "Bonjo"
-msgstr ""
+msgstr "Bonjo"
#. name for bol
msgid "Bole"
-msgstr ""
+msgstr "Bole"
#. name for bom
msgid "Berom"
-msgstr ""
+msgstr "Berom"
#. name for bon
msgid "Bine"
-msgstr ""
+msgstr "Bine"
#. name for boo
msgid "Bozo, Tiemacèwè"
-msgstr ""
+msgstr "Bozo tiemacèwè"
#. name for bop
msgid "Bonkiman"
-msgstr ""
+msgstr "Bonkiman"
#. name for boq
msgid "Bogaya"
-msgstr ""
+msgstr "Bogaya"
#. name for bor
msgid "Borôro"
-msgstr ""
+msgstr "Borôro"
#. name for bos
msgid "Bosnian"
@@ -3503,171 +3503,171 @@ msgstr "Bosnio"
#. name for bot
msgid "Bongo"
-msgstr ""
+msgstr "Bongo"
#. name for bou
msgid "Bondei"
-msgstr ""
+msgstr "Bondei"
#. name for bov
msgid "Tuwuli"
-msgstr ""
+msgstr "Tuwuli"
#. name for bow
msgid "Rema"
-msgstr ""
+msgstr "Rema"
#. name for box
msgid "Buamu"
-msgstr ""
+msgstr "Buamu"
#. name for boy
msgid "Bodo (Central African Republic)"
-msgstr ""
+msgstr "Bodo (República Centroafricana)"
#. name for boz
msgid "Bozo, Tiéyaxo"
-msgstr ""
+msgstr "Bozo tiéyaxo"
#. name for bpa
msgid "Dakaka"
-msgstr ""
+msgstr "Dakaka"
#. name for bpb
msgid "Barbacoas"
-msgstr ""
+msgstr "Barbacoas"
#. name for bpd
msgid "Banda-Banda"
-msgstr ""
+msgstr "Banda-banda"
#. name for bpg
msgid "Bonggo"
-msgstr ""
+msgstr "Bonggo"
#. name for bph
msgid "Botlikh"
-msgstr ""
+msgstr "Botlikh"
#. name for bpi
msgid "Bagupi"
-msgstr ""
+msgstr "Bagupi"
#. name for bpj
msgid "Binji"
-msgstr ""
+msgstr "Binji"
#. name for bpk
msgid "Orowe"
-msgstr ""
+msgstr "Orowe"
#. name for bpl
msgid "Broome Pearling Lugger Pidgin"
-msgstr ""
+msgstr "Pidyin de lugre perlero de Broome"
#. name for bpm
msgid "Biyom"
-msgstr ""
+msgstr "Biyom"
#. name for bpn
msgid "Dzao Min"
-msgstr ""
+msgstr "Dzao min"
#. name for bpo
msgid "Anasi"
-msgstr ""
+msgstr "Anasi"
#. name for bpp
msgid "Kaure"
-msgstr ""
+msgstr "Kaure"
#. name for bpq
msgid "Malay, Banda"
-msgstr ""
+msgstr "Malayo de Banda"
#. name for bpr
msgid "Blaan, Koronadal"
-msgstr ""
+msgstr "Bilano de Koronadal"
#. name for bps
msgid "Blaan, Sarangani"
-msgstr ""
+msgstr "Bilano de Sarangani"
#. name for bpt
msgid "Barrow Point"
-msgstr ""
+msgstr "Punta Barrow"
#. name for bpu
msgid "Bongu"
-msgstr ""
+msgstr "Bongu"
#. name for bpv
msgid "Marind, Bian"
-msgstr ""
+msgstr "Marind bian"
#. name for bpw
msgid "Bo (Papua New Guinea)"
-msgstr ""
+msgstr "Bo (Papúa Nueva Guinea)"
#. name for bpx
msgid "Bareli, Palya"
-msgstr ""
+msgstr "Bareli palya"
#. name for bpy
msgid "Bishnupriya"
-msgstr ""
+msgstr "Bishnupriya"
#. name for bpz
msgid "Bilba"
-msgstr ""
+msgstr "Bilba"
#. name for bqa
msgid "Tchumbuli"
-msgstr ""
+msgstr "Tchumbuli"
#. name for bqb
msgid "Bagusa"
-msgstr ""
+msgstr "Bagusa"
#. name for bqc
msgid "Boko (Benin)"
-msgstr ""
+msgstr "Boko (Benín)"
#. name for bqd
msgid "Bung"
-msgstr ""
+msgstr "Bung"
#. name for bqf
msgid "Baga Kaloum"
-msgstr ""
+msgstr "Baga kaloum"
#. name for bqg
msgid "Bago-Kusuntu"
-msgstr ""
+msgstr "Bago-kusuntu"
#. name for bqh
msgid "Baima"
-msgstr ""
+msgstr "Baima"
#. name for bqi
msgid "Bakhtiari"
-msgstr ""
+msgstr "Bakhtiari"
#. name for bqj
msgid "Bandial"
-msgstr ""
+msgstr "Bandial"
#. name for bqk
msgid "Banda-Mbrès"
-msgstr ""
+msgstr "Banda-mbrès"
#. name for bql
msgid "Bilakura"
-msgstr ""
+msgstr "Bilakura"
#. name for bqm
msgid "Wumboko"
-msgstr ""
+msgstr "Wumboko"
#. name for bqn
msgid "Bulgarian Sign Language"
@@ -3675,43 +3675,43 @@ msgstr "Lengua de signos búlgara"
#. name for bqo
msgid "Balo"
-msgstr ""
+msgstr "Balo"
#. name for bqp
msgid "Busa"
-msgstr ""
+msgstr "Busa"
#. name for bqq
msgid "Biritai"
-msgstr ""
+msgstr "Biritai"
#. name for bqr
msgid "Burusu"
-msgstr ""
+msgstr "Burusu"
#. name for bqs
msgid "Bosngun"
-msgstr ""
+msgstr "Bosngun"
#. name for bqt
msgid "Bamukumbit"
-msgstr ""
+msgstr "Bamukumbit"
#. name for bqu
msgid "Boguru"
-msgstr ""
+msgstr "Boguru"
#. name for bqv
msgid "Begbere-Ejar"
-msgstr ""
+msgstr "Begbere-ejar"
#. name for bqw
msgid "Buru (Nigeria)"
-msgstr ""
+msgstr "Buru (Nigeria)"
#. name for bqx
msgid "Baangi"
-msgstr ""
+msgstr "Baangi"
#. name for bqy
msgid "Bengkala Sign Language"
@@ -3719,7 +3719,7 @@ msgstr "Lengua de signos de Benkala"
#. name for bqz
msgid "Bakaka"
-msgstr ""
+msgstr "Bakaka"
#. name for bra
msgid "Braj"
@@ -3727,7 +3727,7 @@ msgstr "Braj"
#. name for brb
msgid "Lave"
-msgstr ""
+msgstr "Lave"
#. name for brc
msgid "Creole Dutch, Berbice"
@@ -3735,7 +3735,7 @@ msgstr ""
#. name for brd
msgid "Baraamu"
-msgstr ""
+msgstr "Baraamu"
#. name for bre
msgid "Breton"
@@ -3743,215 +3743,215 @@ msgstr "Bretón"
#. name for brf
msgid "Bera"
-msgstr ""
+msgstr "Bera"
#. name for brg
msgid "Baure"
-msgstr ""
+msgstr "Baure"
#. name for brh
msgid "Brahui"
-msgstr ""
+msgstr "Brahui"
#. name for bri
msgid "Mokpwe"
-msgstr ""
+msgstr "Mokpwe"
#. name for brj
msgid "Bieria"
-msgstr ""
+msgstr "Bieria"
#. name for brk
msgid "Birked"
-msgstr ""
+msgstr "Birked"
#. name for brl
msgid "Birwa"
-msgstr ""
+msgstr "Birwa"
#. name for brm
msgid "Barambu"
-msgstr ""
+msgstr "Barambu"
#. name for brn
msgid "Boruca"
-msgstr ""
+msgstr "Boruca"
#. name for bro
msgid "Brokkat"
-msgstr ""
+msgstr "Brokkat"
#. name for brp
msgid "Barapasi"
-msgstr ""
+msgstr "Barapasi"
#. name for brq
msgid "Breri"
-msgstr ""
+msgstr "Breri"
#. name for brr
msgid "Birao"
-msgstr ""
+msgstr "Birao"
#. name for brs
msgid "Baras"
-msgstr ""
+msgstr "Baras"
#. name for brt
msgid "Bitare"
-msgstr ""
+msgstr "Bitare"
#. name for bru
msgid "Bru, Eastern"
-msgstr ""
+msgstr "Bru oriental"
#. name for brv
msgid "Bru, Western"
-msgstr ""
+msgstr "Bru occidental"
#. name for brw
msgid "Bellari"
-msgstr ""
+msgstr "Bellari"
#. name for brx
msgid "Bodo (India)"
-msgstr ""
+msgstr "Bodo (India)"
#. name for bry
msgid "Burui"
-msgstr ""
+msgstr "Burui"
#. name for brz
msgid "Bilbil"
-msgstr ""
+msgstr "Bilbil"
#. name for bsa
msgid "Abinomn"
-msgstr ""
+msgstr "Abinomn"
#. name for bsb
msgid "Bisaya, Brunei"
-msgstr ""
+msgstr "Bisaya bruneano"
#. name for bsc
msgid "Bassari"
-msgstr ""
+msgstr "Bassari"
#. name for bse
msgid "Wushi"
-msgstr ""
+msgstr "Wushi"
#. name for bsf
msgid "Bauchi"
-msgstr ""
+msgstr "Bauchi"
#. name for bsg
msgid "Bashkardi"
-msgstr ""
+msgstr "Bashkardi"
#. name for bsh
msgid "Kati"
-msgstr ""
+msgstr "Kati"
#. name for bsi
msgid "Bassossi"
-msgstr ""
+msgstr "Bassossi"
#. name for bsj
msgid "Bangwinji"
-msgstr ""
+msgstr "Bangwinji"
#. name for bsk
msgid "Burushaski"
-msgstr ""
+msgstr "Burushaski"
#. name for bsl
msgid "Basa-Gumna"
-msgstr ""
+msgstr "Basa-gumna"
#. name for bsm
msgid "Busami"
-msgstr ""
+msgstr "Busami"
#. name for bsn
msgid "Barasana-Eduria"
-msgstr ""
+msgstr "Barasana-eduria"
#. name for bso
msgid "Buso"
-msgstr ""
+msgstr "Buso"
#. name for bsp
msgid "Baga Sitemu"
-msgstr ""
+msgstr "Baga sitemu"
#. name for bsq
msgid "Bassa"
-msgstr ""
+msgstr "Bassa"
#. name for bsr
msgid "Bassa-Kontagora"
-msgstr ""
+msgstr "Bassa-kontagora"
#. name for bss
msgid "Akoose"
-msgstr ""
+msgstr "Akoose"
#. name for bst
msgid "Basketo"
-msgstr ""
+msgstr "Basketo"
#. name for bsu
msgid "Bahonsuai"
-msgstr ""
+msgstr "Bahonsuai"
#. name for bsv
msgid "Baga Sobané"
-msgstr ""
+msgstr "Baga sobané"
#. name for bsw
msgid "Baiso"
-msgstr ""
+msgstr "Baiso"
#. name for bsx
msgid "Yangkam"
-msgstr ""
+msgstr "Yangkam"
#. name for bsy
msgid "Bisaya, Sabah"
-msgstr ""
+msgstr "Bisaya de Sabah"
#. name for bta
msgid "Bata"
-msgstr ""
+msgstr "Bata"
#. name for btc
msgid "Bati (Cameroon)"
-msgstr ""
+msgstr "Bati (Camerún)"
#. name for btd
msgid "Batak Dairi"
-msgstr ""
+msgstr "Batak dairi"
#. name for bte
msgid "Gamo-Ningi"
-msgstr ""
+msgstr "Gamo-ningi"
#. name for btf
msgid "Birgit"
-msgstr ""
+msgstr "Birgit"
#. name for btg
msgid "Bété, Gagnoa"
-msgstr ""
+msgstr "Bété de Gagnoa"
#. name for bth
msgid "Bidayuh, Biatah"
-msgstr ""
+msgstr "Bidayuh biatah"
#. name for bti
msgid "Burate"
-msgstr ""
+msgstr "Burate"
#. name for btj
msgid "Malay, Bacanese"
@@ -3959,63 +3959,63 @@ msgstr "Malayo bacanés"
#. name for btl
msgid "Bhatola"
-msgstr ""
+msgstr "Bhatola"
#. name for btm
msgid "Batak Mandailing"
-msgstr ""
+msgstr "Batak mandailing"
#. name for btn
msgid "Ratagnon"
-msgstr ""
+msgstr "Ratagnon"
#. name for bto
msgid "Bikol, Rinconada"
-msgstr ""
+msgstr "Bikolano de Rinconada"
#. name for btp
msgid "Budibud"
-msgstr ""
+msgstr "Budibud"
#. name for btq
msgid "Batek"
-msgstr ""
+msgstr "Batek"
#. name for btr
msgid "Baetora"
-msgstr ""
+msgstr "Baetora"
#. name for bts
msgid "Batak Simalungun"
-msgstr ""
+msgstr "Batak simalungun"
#. name for btt
msgid "Bete-Bendi"
-msgstr ""
+msgstr "Bete-bendi"
#. name for btu
msgid "Batu"
-msgstr ""
+msgstr "Batu"
#. name for btv
msgid "Bateri"
-msgstr ""
+msgstr "Bateri"
#. name for btw
msgid "Butuanon"
-msgstr ""
+msgstr "Butuanon"
#. name for btx
msgid "Batak Karo"
-msgstr ""
+msgstr "Batak karo"
#. name for bty
msgid "Bobot"
-msgstr ""
+msgstr "Bobot"
#. name for btz
msgid "Batak Alas-Kluet"
-msgstr ""
+msgstr "Batak alas-kluet"
#. name for bua
msgid "Buriat"
@@ -4023,23 +4023,23 @@ msgstr "Buriato"
#. name for bub
msgid "Bua"
-msgstr ""
+msgstr "Bua"
#. name for buc
msgid "Bushi"
-msgstr ""
+msgstr "Bushi"
#. name for bud
msgid "Ntcham"
-msgstr ""
+msgstr "Ntcham"
#. name for bue
msgid "Beothuk"
-msgstr ""
+msgstr "Beothuk"
#. name for buf
msgid "Bushoong"
-msgstr ""
+msgstr "Bushoong"
#. name for bug
msgid "Buginese"
@@ -4047,19 +4047,19 @@ msgstr "Buginés"
#. name for buh
msgid "Bunu, Younuo"
-msgstr ""
+msgstr "Bunu younuo"
#. name for bui
msgid "Bongili"
-msgstr ""
+msgstr "Bongili"
#. name for buj
msgid "Basa-Gurmana"
-msgstr ""
+msgstr "Basa-gurmana"
#. name for buk
msgid "Bugawac"
-msgstr ""
+msgstr "Bugawac"
#. name for bul
msgid "Bulgarian"
@@ -4067,99 +4067,99 @@ msgstr "Búlgaro"
#. name for bum
msgid "Bulu (Cameroon)"
-msgstr ""
+msgstr "Bulu (Camerún)"
#. name for bun
msgid "Sherbro"
-msgstr ""
+msgstr "Sherbro"
#. name for buo
msgid "Terei"
-msgstr ""
+msgstr "Terei"
#. name for bup
msgid "Busoa"
-msgstr ""
+msgstr "Busoa"
#. name for buq
msgid "Brem"
-msgstr ""
+msgstr "Brem"
#. name for bus
msgid "Bokobaru"
-msgstr ""
+msgstr "Bokobaru"
#. name for but
msgid "Bungain"
-msgstr ""
+msgstr "Bungain"
#. name for buu
msgid "Budu"
-msgstr ""
+msgstr "Budu"
#. name for buv
msgid "Bun"
-msgstr ""
+msgstr "Bun"
#. name for buw
msgid "Bubi"
-msgstr ""
+msgstr "Bubi"
#. name for bux
msgid "Boghom"
-msgstr ""
+msgstr "Boghom"
#. name for buy
msgid "Bullom So"
-msgstr ""
+msgstr "Bullom so"
#. name for buz
msgid "Bukwen"
-msgstr ""
+msgstr "Bukwen"
#. name for bva
msgid "Barein"
-msgstr ""
+msgstr "Barein"
#. name for bvb
msgid "Bube"
-msgstr ""
+msgstr "Bube"
#. name for bvc
msgid "Baelelea"
-msgstr ""
+msgstr "Baelelea"
#. name for bvd
msgid "Baeggu"
-msgstr ""
+msgstr "Baeggu"
#. name for bve
msgid "Malay, Berau"
-msgstr ""
+msgstr "Malayo berau"
#. name for bvf
msgid "Boor"
-msgstr ""
+msgstr "Boor"
#. name for bvg
msgid "Bonkeng"
-msgstr ""
+msgstr "Bonkeng"
#. name for bvh
msgid "Bure"
-msgstr ""
+msgstr "Bure"
#. name for bvi
msgid "Belanda Viri"
-msgstr ""
+msgstr "Belanda viri"
#. name for bvj
msgid "Baan"
-msgstr ""
+msgstr "Baan"
#. name for bvk
msgid "Bukat"
-msgstr ""
+msgstr "Bukat"
#. name for bvl
msgid "Bolivian Sign Language"
@@ -4167,115 +4167,115 @@ msgstr "Lengua de signos boliviana"
#. name for bvm
msgid "Bamunka"
-msgstr ""
+msgstr "Bamunka"
#. name for bvn
msgid "Buna"
-msgstr ""
+msgstr "Buna"
#. name for bvo
msgid "Bolgo"
-msgstr ""
+msgstr "Bolgo"
#. name for bvq
msgid "Birri"
-msgstr ""
+msgstr "Birri"
#. name for bvr
msgid "Burarra"
-msgstr ""
+msgstr "Burarra"
#. name for bvt
msgid "Bati (Indonesia)"
-msgstr ""
+msgstr "Bati (Indonesia)"
#. name for bvu
msgid "Malay, Bukit"
-msgstr ""
+msgstr "Malayo, Bukit"
#. name for bvv
msgid "Baniva"
-msgstr ""
+msgstr "Baniva"
#. name for bvw
msgid "Boga"
-msgstr ""
+msgstr "Boga"
#. name for bvx
msgid "Dibole"
-msgstr ""
+msgstr "Dibole"
#. name for bvy
msgid "Baybayanon"
-msgstr ""
+msgstr "Baybayanon"
#. name for bvz
msgid "Bauzi"
-msgstr ""
+msgstr "Bauzi"
#. name for bwa
msgid "Bwatoo"
-msgstr ""
+msgstr "Bwatoo"
#. name for bwb
msgid "Namosi-Naitasiri-Serua"
-msgstr ""
+msgstr "Namosi-Naitasiri-Serua"
#. name for bwc
msgid "Bwile"
-msgstr ""
+msgstr "Bwile"
#. name for bwd
msgid "Bwaidoka"
-msgstr ""
+msgstr "Bwaidoka"
#. name for bwe
msgid "Karen, Bwe"
-msgstr ""
+msgstr "Karen, Bwe"
#. name for bwf
msgid "Boselewa"
-msgstr ""
+msgstr "Boselewa"
#. name for bwg
msgid "Barwe"
-msgstr ""
+msgstr "Barwe"
#. name for bwh
msgid "Bishuo"
-msgstr ""
+msgstr "Bishuo"
#. name for bwi
msgid "Baniwa"
-msgstr ""
+msgstr "Baniwa"
#. name for bwj
msgid "Bwamu, Láá Láá"
-msgstr ""
+msgstr "Bwamu, Láá Láá"
#. name for bwk
msgid "Bauwaki"
-msgstr ""
+msgstr "Bauwaki"
#. name for bwl
msgid "Bwela"
-msgstr ""
+msgstr "Bwela"
#. name for bwm
msgid "Biwat"
-msgstr ""
+msgstr "Biwat"
#. name for bwn
msgid "Bunu, Wunai"
-msgstr ""
+msgstr "Bunu wunai"
#. name for bwo
msgid "Boro (Ethiopia)"
-msgstr ""
+msgstr "Boro (Etiopía)"
#. name for bwp
msgid "Mandobo Bawah"
-msgstr ""
+msgstr "Mandobo bawah"
#. name for bwq
msgid "Bobo Madaré, Southern"
@@ -4283,95 +4283,95 @@ msgstr ""
#. name for bwr
msgid "Bura-Pabir"
-msgstr ""
+msgstr "Bura-Pabir"
#. name for bws
msgid "Bomboma"
-msgstr ""
+msgstr "Bomboma"
#. name for bwt
msgid "Bafaw-Balong"
-msgstr ""
+msgstr "Bafaw-Balong"
#. name for bwu
msgid "Buli (Ghana)"
-msgstr ""
+msgstr "Buli (Ghana)"
#. name for bww
msgid "Bwa"
-msgstr ""
+msgstr "Bwa"
#. name for bwx
msgid "Bunu, Bu-Nao"
-msgstr ""
+msgstr "Bunu, Bu-Nao"
#. name for bwy
msgid "Bwamu, Cwi"
-msgstr ""
+msgstr "Bwamu, Cwi"
#. name for bwz
msgid "Bwisi"
-msgstr ""
+msgstr "Bwisi"
#. name for bxa
msgid "Bauro"
-msgstr ""
+msgstr "Bauro"
#. name for bxb
msgid "Bor, Belanda"
-msgstr ""
+msgstr "Bor, Belanda"
#. name for bxc
msgid "Molengue"
-msgstr ""
+msgstr "Molengue"
#. name for bxd
msgid "Pela"
-msgstr ""
+msgstr "Pela"
#. name for bxe
msgid "Birale"
-msgstr ""
+msgstr "Birale"
#. name for bxf
msgid "Bilur"
-msgstr ""
+msgstr "Bilur"
#. name for bxg
msgid "Bangala"
-msgstr ""
+msgstr "Bangala"
#. name for bxh
msgid "Buhutu"
-msgstr ""
+msgstr "Buhutu"
#. name for bxi
msgid "Pirlatapa"
-msgstr ""
+msgstr "Pirlatapa"
#. name for bxj
msgid "Bayungu"
-msgstr ""
+msgstr "Bayungu"
#. name for bxk
msgid "Bukusu"
-msgstr ""
+msgstr "Bukusu"
#. name for bxl
msgid "Jalkunan"
-msgstr ""
+msgstr "Jalkunan"
#. name for bxm
msgid "Buriat, Mongolia"
-msgstr ""
+msgstr "Buriat, Mongolia"
#. name for bxn
msgid "Burduna"
-msgstr ""
+msgstr "Burduna"
#. name for bxo
msgid "Barikanchi"
-msgstr ""
+msgstr "Barikanchi"
#. name for bxp
msgid "Bebil"
@@ -4391,75 +4391,75 @@ msgstr ""
#. name for bxu
msgid "Buriat, China"
-msgstr ""
+msgstr "Buriat, China"
#. name for bxv
msgid "Berakou"
-msgstr ""
+msgstr "Berakou"
#. name for bxw
msgid "Bankagooma"
-msgstr ""
+msgstr "Bankagooma"
#. name for bxx
msgid "Borna (Democratic Republic of Congo)"
-msgstr ""
+msgstr "Borna (República Democrática del Congo)"
#. name for bxz
msgid "Binahari"
-msgstr ""
+msgstr "Binahari"
#. name for bya
msgid "Batak"
-msgstr ""
+msgstr "Batak"
#. name for byb
msgid "Bikya"
-msgstr ""
+msgstr "Bikya"
#. name for byc
msgid "Ubaghara"
-msgstr ""
+msgstr "Ubaghara"
#. name for byd
msgid "Benyadu'"
-msgstr ""
+msgstr "Benyadu'"
#. name for bye
msgid "Pouye"
-msgstr ""
+msgstr "Pouye"
#. name for byf
msgid "Bete"
-msgstr ""
+msgstr "Bete"
#. name for byg
msgid "Baygo"
-msgstr ""
+msgstr "Baygo"
#. name for byh
msgid "Bhujel"
-msgstr ""
+msgstr "Bhujel"
#. name for byi
msgid "Buyu"
-msgstr ""
+msgstr "Buyu"
#. name for byj
msgid "Bina (Nigeria)"
-msgstr ""
+msgstr "Bina (Nigeria)"
#. name for byk
msgid "Biao"
-msgstr ""
+msgstr "Biao"
#. name for byl
msgid "Bayono"
-msgstr ""
+msgstr "Bayono"
#. name for bym
msgid "Bidyara"
-msgstr ""
+msgstr "Bidyara"
#. name for byn
msgid "Bilin"
@@ -4471,51 +4471,51 @@ msgstr ""
#. name for byp
msgid "Bumaji"
-msgstr ""
+msgstr "Bumaji"
#. name for byq
msgid "Basay"
-msgstr ""
+msgstr "Basay"
#. name for byr
msgid "Baruya"
-msgstr ""
+msgstr "Baruya"
#. name for bys
msgid "Burak"
-msgstr ""
+msgstr "Burak"
#. name for byt
msgid "Berti"
-msgstr ""
+msgstr "Berti"
#. name for byv
msgid "Medumba"
-msgstr ""
+msgstr "Medumba"
#. name for byw
msgid "Belhariya"
-msgstr ""
+msgstr "Belhariya"
#. name for byx
msgid "Qaqet"
-msgstr ""
+msgstr "Qaqet"
#. name for byy
msgid "Buya"
-msgstr ""
+msgstr "Buya"
#. name for byz
msgid "Banaro"
-msgstr ""
+msgstr "Banaro"
#. name for bza
msgid "Bandi"
-msgstr ""
+msgstr "Bandi"
#. name for bzb
msgid "Andio"
-msgstr ""
+msgstr "Andio"
#. name for bzc
msgid "Malagasy, Southern Betsimisaraka"
@@ -4523,27 +4523,27 @@ msgstr "Malgache betsimisaraka meridional"
#. name for bzd
msgid "Bribri"
-msgstr ""
+msgstr "Bribri"
#. name for bze
msgid "Bozo, Jenaama"
-msgstr ""
+msgstr "Bozo, Jenaama"
#. name for bzf
msgid "Boikin"
-msgstr ""
+msgstr "Boikin"
#. name for bzg
msgid "Babuza"
-msgstr ""
+msgstr "Babuza"
#. name for bzh
msgid "Buang, Mapos"
-msgstr ""
+msgstr "Buang, Mapos"
#. name for bzi
msgid "Bisu"
-msgstr ""
+msgstr "Bisu"
#. name for bzj
msgid "Kriol English, Belize"
@@ -4555,31 +4555,31 @@ msgstr "Inglés criollo nicaragüense"
#. name for bzl
msgid "Boano (Sulawesi)"
-msgstr ""
+msgstr "Boano (Sulawesi)"
#. name for bzm
msgid "Bolondo"
-msgstr ""
+msgstr "Bolondo"
#. name for bzn
msgid "Boano (Maluku)"
-msgstr ""
+msgstr "Boano (Maluku)"
#. name for bzo
msgid "Bozaba"
-msgstr ""
+msgstr "Bozaba"
#. name for bzp
msgid "Kemberano"
-msgstr ""
+msgstr "Kemberano"
#. name for bzq
msgid "Buli (Indonesia)"
-msgstr ""
+msgstr "Buli (Indonesia)"
#. name for bzr
msgid "Biri"
-msgstr ""
+msgstr "Biri"
#. name for bzs
msgid "Brazilian Sign Language"
@@ -4587,43 +4587,43 @@ msgstr "Lengua de signos brasileña"
#. name for bzt
msgid "Brithenig"
-msgstr ""
+msgstr "Brithenig"
#. name for bzu
msgid "Burmeso"
-msgstr ""
+msgstr "Burmeso"
#. name for bzv
msgid "Bebe"
-msgstr ""
+msgstr "Bebe"
#. name for bzw
msgid "Basa (Nigeria)"
-msgstr ""
+msgstr "Basa (Nigeria)"
#. name for bzx
msgid "Bozo, Kɛlɛngaxo"
-msgstr ""
+msgstr "Bozo, Kɛlɛngaxo"
#. name for bzy
msgid "Obanliku"
-msgstr ""
+msgstr "Obanliku"
#. name for bzz
msgid "Evant"
-msgstr ""
+msgstr "Evant"
#. name for caa
msgid "Chortí"
-msgstr ""
+msgstr "Chortí"
#. name for cab
msgid "Garifuna"
-msgstr ""
+msgstr "Garifuna"
#. name for cac
msgid "Chuj"
-msgstr ""
+msgstr "Chuj"
#. name for cad
msgid "Caddo"
@@ -4631,7 +4631,7 @@ msgstr "Caddo"
#. name for cae
msgid "Lehar"
-msgstr ""
+msgstr "Lehar"
#. name for caf
msgid "Carrier, Southern"
@@ -4639,19 +4639,19 @@ msgstr ""
#. name for cag
msgid "Nivaclé"
-msgstr ""
+msgstr "Nivaclé"
#. name for cah
msgid "Cahuarano"
-msgstr ""
+msgstr "Cahuarano"
#. name for caj
msgid "Chané"
-msgstr ""
+msgstr "Chané"
#. name for cak
msgid "Kaqchikel"
-msgstr ""
+msgstr "Kaqchikel"
#. name for cal
msgid "Carolinian"
@@ -7543,7 +7543,7 @@ msgstr ""
#. name for fbl
msgid "Bikol, West Albay"
-msgstr ""
+msgstr "Bikolano de Albay occidental"
#. name for fcs
msgid "Quebec Sign Language"
@@ -10363,7 +10363,7 @@ msgstr "Lengua de signos italiana"
#. name for isg
msgid "Irish Sign Language"
-msgstr "Lengua de signos Irlandesa"
+msgstr "Lengua de signos irlandesa"
#. name for ish
msgid "Esan"
@@ -13815,7 +13815,7 @@ msgstr ""
#. name for lbl
msgid "Bikol, Libon"
-msgstr ""
+msgstr "Bicolano de Libon"
#. name for lbm
msgid "Lodhi"
@@ -21887,7 +21887,7 @@ msgstr ""
#. name for rbl
msgid "Bikol, Miraya"
-msgstr ""
+msgstr "Bicolano de Miraya"
#. name for rcf
msgid "Creole French, Réunion"
@@ -26523,7 +26523,7 @@ msgstr ""
#. name for ubl
msgid "Bikol, Buhi'non"
-msgstr ""
+msgstr "Bikolano de Buhi'non"
#. name for ubr
msgid "Ubir"
@@ -30319,7 +30319,7 @@ msgstr ""
#. name for zhn
msgid "Zhuang, Nong"
-msgstr ""
+msgstr "Zhuang, Nong"
#. name for zho
msgid "Chinese"
@@ -30367,15 +30367,15 @@ msgstr ""
#. name for zka
msgid "Kaimbulawa"
-msgstr ""
+msgstr "Kaimbulawa"
#. name for zkb
msgid "Koibal"
-msgstr ""
+msgstr "Koibal"
#. name for zkg
msgid "Koguryo"
-msgstr ""
+msgstr "Koguryo"
#. name for zkh
msgid "Khorezmian"
@@ -30383,27 +30383,27 @@ msgstr ""
#. name for zkk
msgid "Karankawa"
-msgstr ""
+msgstr "Karankawa"
#. name for zko
msgid "Kott"
-msgstr ""
+msgstr "Kott"
#. name for zkp
msgid "Kaingáng, São Paulo"
-msgstr ""
+msgstr "Kaingáng, São Paulo"
#. name for zkr
msgid "Zakhring"
-msgstr ""
+msgstr "Zakhring"
#. name for zkt
msgid "Kitan"
-msgstr ""
+msgstr "Kitan"
#. name for zku
msgid "Kaurna"
-msgstr ""
+msgstr "Kaurna"
#. name for zkv
msgid "Krevinian"
@@ -30411,11 +30411,11 @@ msgstr ""
#. name for zkz
msgid "Khazar"
-msgstr ""
+msgstr "Khazar"
#. name for zlj
msgid "Zhuang, Liujiang"
-msgstr ""
+msgstr "Zhuang, Liujiang"
#. name for zlm
msgid "Malay (individual language)"
@@ -30423,147 +30423,147 @@ msgstr "Malayo"
#. name for zln
msgid "Zhuang, Lianshan"
-msgstr ""
+msgstr "Zhuang, Lianshan"
#. name for zlq
msgid "Zhuang, Liuqian"
-msgstr ""
+msgstr "Zhuang, Liuqian"
#. name for zma
msgid "Manda (Australia)"
-msgstr ""
+msgstr "Manda (Australia)"
#. name for zmb
msgid "Zimba"
-msgstr ""
+msgstr "Zimba"
#. name for zmc
msgid "Margany"
-msgstr ""
+msgstr "Margany"
#. name for zmd
msgid "Maridan"
-msgstr ""
+msgstr "Maridan"
#. name for zme
msgid "Mangerr"
-msgstr ""
+msgstr "Mangerr"
#. name for zmf
msgid "Mfinu"
-msgstr ""
+msgstr "Mfinu"
#. name for zmg
msgid "Marti Ke"
-msgstr ""
+msgstr "Marti Ke"
#. name for zmh
msgid "Makolkol"
-msgstr ""
+msgstr "Makolkol"
#. name for zmi
msgid "Negeri Sembilan Malay"
-msgstr ""
+msgstr "Negeri Sembilan Malay"
#. name for zmj
msgid "Maridjabin"
-msgstr ""
+msgstr "Maridjabin"
#. name for zmk
msgid "Mandandanyi"
-msgstr ""
+msgstr "Mandandanyi"
#. name for zml
msgid "Madngele"
-msgstr ""
+msgstr "Madngele"
#. name for zmm
msgid "Marimanindji"
-msgstr ""
+msgstr "Marimanindji"
#. name for zmn
msgid "Mbangwe"
-msgstr ""
+msgstr "Mbangwe"
#. name for zmo
msgid "Molo"
-msgstr ""
+msgstr "Molo"
#. name for zmp
msgid "Mpuono"
-msgstr ""
+msgstr "Mpuono"
#. name for zmq
msgid "Mituku"
-msgstr ""
+msgstr "Mituku"
#. name for zmr
msgid "Maranunggu"
-msgstr ""
+msgstr "Maranunggu"
#. name for zms
msgid "Mbesa"
-msgstr ""
+msgstr "Mbesa"
#. name for zmt
msgid "Maringarr"
-msgstr ""
+msgstr "Maringarr"
#. name for zmu
msgid "Muruwari"
-msgstr ""
+msgstr "Muruwari"
#. name for zmv
msgid "Mbariman-Gudhinma"
-msgstr ""
+msgstr "Mbariman-Gudhinma"
#. name for zmw
msgid "Mbo (Democratic Republic of Congo)"
-msgstr ""
+msgstr "Mbo (República democrática del Congo)"
#. name for zmx
msgid "Bomitaba"
-msgstr ""
+msgstr "Bomitaba"
#. name for zmy
msgid "Mariyedi"
-msgstr ""
+msgstr "Mariyedi"
#. name for zmz
msgid "Mbandja"
-msgstr ""
+msgstr "Mbandja"
#. name for zna
msgid "Zan Gula"
-msgstr ""
+msgstr "Zan Gula"
#. name for zne
msgid "Zande (individual language)"
-msgstr ""
+msgstr "Zande (idioma individual)"
#. name for zng
msgid "Mang"
-msgstr ""
+msgstr "Mang"
#. name for znk
msgid "Manangkari"
-msgstr ""
+msgstr "Manangkari"
#. name for zns
msgid "Mangas"
-msgstr ""
+msgstr "Mangas"
#. name for zoc
msgid "Zoque, Copainalá"
-msgstr ""
+msgstr "Zoque, Copainalá"
#. name for zoh
msgid "Zoque, Chimalapa"
-msgstr ""
+msgstr "Zoque, Chimalapa"
#. name for zom
msgid "Zou"
-msgstr ""
+msgstr "Zou"
#. name for zoo
msgid "Zapotec, Asunción Mixtepec"
@@ -30571,15 +30571,15 @@ msgstr ""
#. name for zoq
msgid "Zoque, Tabasco"
-msgstr ""
+msgstr "Zoque, Tabasco"
#. name for zor
msgid "Zoque, Rayón"
-msgstr ""
+msgstr "Zoque, Rayón"
#. name for zos
msgid "Zoque, Francisco León"
-msgstr ""
+msgstr "Zoque, Francisco León"
#. name for zpa
msgid "Zapotec, Lachiguiri"
@@ -30691,35 +30691,35 @@ msgstr ""
#. name for zra
msgid "Kara (Korea)"
-msgstr ""
+msgstr "Kara (Korea)"
#. name for zrg
msgid "Mirgan"
-msgstr ""
+msgstr "Mirgan"
#. name for zrn
msgid "Zerenkel"
-msgstr ""
+msgstr "Zerenkel"
#. name for zro
msgid "Záparo"
-msgstr ""
+msgstr "Záparo"
#. name for zrp
msgid "Zarphatic"
-msgstr ""
+msgstr "Zarphatic"
#. name for zrs
msgid "Mairasi"
-msgstr ""
+msgstr "Mairasi"
#. name for zsa
msgid "Sarasira"
-msgstr ""
+msgstr "Sarasira"
#. name for zsk
msgid "Kaskean"
-msgstr ""
+msgstr "Kaskean"
#. name for zsl
msgid "Zambian Sign Language"
@@ -30727,7 +30727,7 @@ msgstr "Lengua de signos zambiana"
#. name for zsm
msgid "Malay, Standard"
-msgstr ""
+msgstr "Malay, Standard"
#. name for zsr
msgid "Zapotec, Southern Rincon"
@@ -30739,23 +30739,23 @@ msgstr "Sukurum"
#. name for zte
msgid "Zapotec, Elotepec"
-msgstr "Zapoteco, Elotepec"
+msgstr "Zapoteco de Elotepec"
#. name for ztg
msgid "Zapotec, Xanaguía"
-msgstr "Zapoteco, Xanaguía"
+msgstr "Zapoteco de Xanaguía"
#. name for ztl
msgid "Zapotec, Lapaguía-Guivini"
-msgstr "Zapoteco, Lapaguía-Guivini"
+msgstr "Zapoteco de Lapaguía-Guivini"
#. name for ztm
msgid "Zapotec, San Agustín Mixtepec"
-msgstr ""
+msgstr "Zapoteco de San Agustín Mixtepec"
#. name for ztn
msgid "Zapotec, Santa Catarina Albarradas"
-msgstr ""
+msgstr "Zapoteco de Santa Catarina Albarradas"
#. name for ztp
msgid "Zapotec, Loxicha"
@@ -30775,15 +30775,15 @@ msgstr ""
#. name for ztu
msgid "Zapotec, Güilá"
-msgstr ""
+msgstr "Zapoteco, Güilá"
#. name for ztx
msgid "Zapotec, Zaachila"
-msgstr ""
+msgstr "Zapoteco, Zaachila"
#. name for zty
msgid "Zapotec, Yatee"
-msgstr ""
+msgstr "Zapoteca, Yatee"
#. name for zua
msgid "Zeem"
@@ -30819,19 +30819,19 @@ msgstr "Sin contenido lingüístico"
#. name for zyb
msgid "Zhuang, Yongbei"
-msgstr "Zhuang, Yongbei"
+msgstr "Chuang yongbei"
#. name for zyg
msgid "Zhuang, Yang"
-msgstr "Zhuang, Yang"
+msgstr "Chuang yang"
#. name for zyj
msgid "Zhuang, Youjiang"
-msgstr "Zhuang, Youjiang"
+msgstr "Chuang youjiang"
#. name for zyn
msgid "Zhuang, Yongnan"
-msgstr "Zhuang, Yongnan"
+msgstr "Chuang yongnan"
#. name for zyp
msgid "Zyphe"
@@ -30843,4 +30843,4 @@ msgstr "Zaza"
#. name for zzj
msgid "Zhuang, Zuojiang"
-msgstr "Zhuang, Zuojiang"
+msgstr "Chuang zuojiang"
diff --git a/setup/iso_639/et.po b/setup/iso_639/et.po
index f2ae4131bb..b0550432b0 100644
--- a/setup/iso_639/et.po
+++ b/setup/iso_639/et.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:11+0000\n"
"Last-Translator: Tõivo Leedjärv \n"
"Language-Team: Estonian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:42+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:00+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: et\n"
#. name for aaa
diff --git a/setup/iso_639/eu.po b/setup/iso_639/eu.po
index 08f9cd2277..e076ced5fc 100644
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:03+0000\n"
"Last-Translator: Piarres Beobide \n"
"Language-Team: Euskara \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:36+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:55+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: eu\n"
#. name for aaa
diff --git a/setup/iso_639/fa.po b/setup/iso_639/fa.po
index c6220934b7..bce4eefede 100644
--- a/setup/iso_639/fa.po
+++ b/setup/iso_639/fa.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:07+0000\n"
"Last-Translator: Roozbeh Pournader \n"
"Language-Team: Persian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:02+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:19+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: fa\n"
#. name for aaa
diff --git a/setup/iso_639/fi.po b/setup/iso_639/fi.po
index 792c687344..27e92275d2 100644
--- a/setup/iso_639/fi.po
+++ b/setup/iso_639/fi.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:53+0000\n"
"Last-Translator: Tommi Vainikainen \n"
"Language-Team: Finnish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:43+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:01+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: fi\n"
#. name for aaa
diff --git a/setup/iso_639/fr.po b/setup/iso_639/fr.po
index 83c7a68ca8..36bbda7c8f 100644
--- a/setup/iso_639/fr.po
+++ b/setup/iso_639/fr.po
@@ -12,15 +12,15 @@ msgstr ""
"Project-Id-Version: fr\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:31+0000\n"
"Last-Translator: Christian Perrier \n"
"Language-Team: French \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:43+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:02+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: fr\n"
#. name for aaa
diff --git a/setup/iso_639/ga.po b/setup/iso_639/ga.po
index 54afc33545..f7da11c495 100644
--- a/setup/iso_639/ga.po
+++ b/setup/iso_639/ga.po
@@ -14,15 +14,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:59+0000\n"
"Last-Translator: Kevin Patrick Scannell \n"
"Language-Team: Irish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:45+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:04+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ga\n"
#. name for aaa
diff --git a/setup/iso_639/gez.po b/setup/iso_639/gez.po
index 96c910c933..9cce35ebb3 100644
--- a/setup/iso_639/gez.po
+++ b/setup/iso_639/gez.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:19+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Geez\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:44+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:03+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/gl.po b/setup/iso_639/gl.po
index 892b9109c5..59a2ad9ddf 100644
--- a/setup/iso_639/gl.po
+++ b/setup/iso_639/gl.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:22+0000\n"
"Last-Translator: Fran Diéguez \n"
"Language-Team: Galician \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:45+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:04+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: gl\n"
#. name for aaa
diff --git a/setup/iso_639/gu.po b/setup/iso_639/gu.po
index ac7f501d5f..4461fc76b1 100644
--- a/setup/iso_639/gu.po
+++ b/setup/iso_639/gu.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:48+0000\n"
"Last-Translator: Ankit Patel \n"
"Language-Team: Gujarati \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:47+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:05+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: gu\n"
#. name for aaa
diff --git a/setup/iso_639/he.po b/setup/iso_639/he.po
index 996e45bd75..ee7078ea92 100644
--- a/setup/iso_639/he.po
+++ b/setup/iso_639/he.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:12+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Hebrew \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:47+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:06+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: he\n"
#. name for aaa
diff --git a/setup/iso_639/hi.po b/setup/iso_639/hi.po
index d9f5d79ee1..30afc8abb3 100644
--- a/setup/iso_639/hi.po
+++ b/setup/iso_639/hi.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:37+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Hindi\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:48+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:06+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/hr.po b/setup/iso_639/hr.po
index 8052290b9d..de534b5c09 100644
--- a/setup/iso_639/hr.po
+++ b/setup/iso_639/hr.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:17+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Croatian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:05+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:23+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: hr\n"
#. name for aaa
diff --git a/setup/iso_639/hu.po b/setup/iso_639/hu.po
index b0c790250a..eb07b0b800 100644
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:42+0000\n"
"Last-Translator: SZERVÑC Attila \n"
"Language-Team: Hungarian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:48+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:07+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"X-Poedit-Country: HUNGARY\n"
"Language: hu\n"
"X-Poedit-Language: Hungarian\n"
diff --git a/setup/iso_639/id.po b/setup/iso_639/id.po
index 92f9364b60..0229335398 100644
--- a/setup/iso_639/id.po
+++ b/setup/iso_639/id.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:34+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Indonesia \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:49+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:08+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/is.po b/setup/iso_639/is.po
index 1841dc5bff..0e1086a051 100644
--- a/setup/iso_639/is.po
+++ b/setup/iso_639/is.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 09:09+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Icelandic \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:49+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:07+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: is\n"
#. name for aaa
diff --git a/setup/iso_639/it.po b/setup/iso_639/it.po
index f2a3800fe3..b1eb0997d6 100644
--- a/setup/iso_639/it.po
+++ b/setup/iso_639/it.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:52+0000\n"
"Last-Translator: Milo Casagrande \n"
"Language-Team: Italian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:50+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:08+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: it\n"
#. name for aaa
diff --git a/setup/iso_639/ja.po b/setup/iso_639/ja.po
index d6597c7436..a4a8fe945c 100644
--- a/setup/iso_639/ja.po
+++ b/setup/iso_639/ja.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:06+0000\n"
"Last-Translator: IIDA Yosiaki \n"
"Language-Team: Japanese \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:50+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:09+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ja\n"
#. name for aaa
diff --git a/setup/iso_639/kn.po b/setup/iso_639/kn.po
index a605893b2e..1367824d02 100644
--- a/setup/iso_639/kn.po
+++ b/setup/iso_639/kn.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:27+0000\n"
"Last-Translator: shankar Prasad \n"
"Language-Team: Kannada \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:51+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:10+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: kn\n"
#. name for aaa
diff --git a/setup/iso_639/ko.po b/setup/iso_639/ko.po
index ef607216b6..fb816cf31a 100644
--- a/setup/iso_639/ko.po
+++ b/setup/iso_639/ko.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:09+0000\n"
"Last-Translator: Eungkyu Song \n"
"Language-Team: Korean \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:53+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:11+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ko\n"
#. name for aaa
diff --git a/setup/iso_639/kok.po b/setup/iso_639/kok.po
index 9aa9786bb8..112d948563 100644
--- a/setup/iso_639/kok.po
+++ b/setup/iso_639/kok.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:49+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Konkani\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:52+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:11+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/lt.po b/setup/iso_639/lt.po
index de61d84699..c6be7b2d1a 100644
--- a/setup/iso_639/lt.po
+++ b/setup/iso_639/lt.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:30+0000\n"
"Last-Translator: Kęstutis Biliūnas \n"
"Language-Team: Lithuanian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:54+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:12+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: lt\n"
#. name for aaa
diff --git a/setup/iso_639/lv.po b/setup/iso_639/lv.po
index 0816047527..bf8e18d8fd 100644
--- a/setup/iso_639/lv.po
+++ b/setup/iso_639/lv.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 09:02+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Latvian\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:53+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:12+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/mi.po b/setup/iso_639/mi.po
index bd3cf67b3e..6ed7eeb2d4 100644
--- a/setup/iso_639/mi.po
+++ b/setup/iso_639/mi.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:56+0000\n"
"Last-Translator: James Gasson \n"
"Language-Team: Maori \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:55+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:13+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: mi\n"
#. name for aaa
diff --git a/setup/iso_639/mk.po b/setup/iso_639/mk.po
index ffad969cb5..95973c18f9 100644
--- a/setup/iso_639/mk.po
+++ b/setup/iso_639/mk.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:58+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Macedonian\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:54+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:13+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/mn.po b/setup/iso_639/mn.po
index eb0a3261a9..3a382c1251 100644
--- a/setup/iso_639/mn.po
+++ b/setup/iso_639/mn.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:28+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Mongolian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:57+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:15+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: mn\n"
#. name for aaa
diff --git a/setup/iso_639/mr.po b/setup/iso_639/mr.po
index 6a108ae791..e4879f4632 100644
--- a/setup/iso_639/mr.po
+++ b/setup/iso_639/mr.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:51+0000\n"
"Last-Translator: Sandeep Shedmake \n"
"Language-Team: Marathi \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:55+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:14+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: mr\n"
#. name for aaa
diff --git a/setup/iso_639/ms.po b/setup/iso_639/ms.po
index 0b3340802f..b0c0bd3f86 100644
--- a/setup/iso_639/ms.po
+++ b/setup/iso_639/ms.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:24+0000\n"
"Last-Translator: Hasbullah Bin Pit \n"
"Language-Team: Projek Gabai \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:56+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:14+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/mt.po b/setup/iso_639/mt.po
index b626564ae3..f54cca87e5 100644
--- a/setup/iso_639/mt.po
+++ b/setup/iso_639/mt.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:00+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Maltese \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:56+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:15+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: mt\n"
#. name for aaa
diff --git a/setup/iso_639/nb.po b/setup/iso_639/nb.po
index 72cbb1d349..8e0406974c 100644
--- a/setup/iso_639/nb.po
+++ b/setup/iso_639/nb.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:54+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Norsk bokmål\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:58+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:17+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/nl.po b/setup/iso_639/nl.po
index 3b4794c49f..398233d8f9 100644
--- a/setup/iso_639/nl.po
+++ b/setup/iso_639/nl.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-31 14:12+0000\n"
"Last-Translator: drMerry \n"
"Language-Team: Dutch \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-01 04:38+0000\n"
-"X-Generator: Launchpad (build 13827)\n"
+"X-Launchpad-Export-Date: 2011-09-03 04:59+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: nl\n"
#. name for aaa
diff --git a/setup/iso_639/nn.po b/setup/iso_639/nn.po
index c2ac85ccee..f60c8a1cf9 100644
--- a/setup/iso_639/nn.po
+++ b/setup/iso_639/nn.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:51+0000\n"
"Last-Translator: Karl Ove Hufthammer \n"
"Language-Team: Norwegian Nynorsk \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:58+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:16+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: nn\n"
#. name for aaa
diff --git a/setup/iso_639/nso.po b/setup/iso_639/nso.po
index 602e797e77..4da27aedb6 100644
--- a/setup/iso_639/nso.po
+++ b/setup/iso_639/nso.po
@@ -7,15 +7,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:46+0000\n"
"Last-Translator: Jerry Thobejane \n"
"Language-Team: Northern Sotho \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:59+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:17+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: nso\n"
#. name for aaa
diff --git a/setup/iso_639/oc.po b/setup/iso_639/oc.po
index 2e2624828b..eb4cc3ef0a 100644
--- a/setup/iso_639/oc.po
+++ b/setup/iso_639/oc.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:03+0000\n"
"Last-Translator: Joan Luc Labòrda \n"
"Language-Team: OCCITAN \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:59+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:18+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/or.po b/setup/iso_639/or.po
index 4ff6cc10ce..e8c223a250 100644
--- a/setup/iso_639/or.po
+++ b/setup/iso_639/or.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:32+0000\n"
"Last-Translator: Manoj Kumar Giri \n"
"Language-Team: Oriya \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:00+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:18+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: or\n"
#. name for aaa
diff --git a/setup/iso_639/pa.po b/setup/iso_639/pa.po
index 6650ea70d5..ed4631c6ab 100644
--- a/setup/iso_639/pa.po
+++ b/setup/iso_639/pa.po
@@ -13,15 +13,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:14+0000\n"
"Last-Translator: A S Alam \n"
"Language-Team: Punjabi \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:00+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:19+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: pa\n"
#. name for aaa
diff --git a/setup/iso_639/pl.po b/setup/iso_639/pl.po
index b366916a69..f005db2253 100644
--- a/setup/iso_639/pl.po
+++ b/setup/iso_639/pl.po
@@ -12,15 +12,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:49+0000\n"
"Last-Translator: Jakub Bogusz \n"
"Language-Team: Polish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:02+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:20+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: pl\n"
#. name for aaa
diff --git a/setup/iso_639/ps.po b/setup/iso_639/ps.po
index e66ba1fd66..1b27292d47 100644
--- a/setup/iso_639/ps.po
+++ b/setup/iso_639/ps.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:58+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Pushto\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:03+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:21+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/pt.po b/setup/iso_639/pt.po
index c63dca63ab..d597b30505 100644
--- a/setup/iso_639/pt.po
+++ b/setup/iso_639/pt.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:19+0000\n"
"Last-Translator: Filipe Maia \n"
"Language-Team: Portuguese \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:03+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:20+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: pt\n"
#. name for aaa
diff --git a/setup/iso_639/pt_BR.po b/setup/iso_639/pt_BR.po
index cc40c20570..b8a84fd856 100644
--- a/setup/iso_639/pt_BR.po
+++ b/setup/iso_639/pt_BR.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:13+0000\n"
"Last-Translator: Juan Carlos Castro y Castro \n"
"Language-Team: Brazilian Portuguese\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:16+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:34+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/ro.po b/setup/iso_639/ro.po
index 23c2a49318..1e39008b85 100644
--- a/setup/iso_639/ro.po
+++ b/setup/iso_639/ro.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:33+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Romanian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:04+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:21+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ro\n"
"PO-Creation-Date: 2000-09-24 15:45+0300\n"
diff --git a/setup/iso_639/ru.po b/setup/iso_639/ru.po
index d82b28ad53..b645a09794 100644
--- a/setup/iso_639/ru.po
+++ b/setup/iso_639/ru.po
@@ -12,15 +12,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:08+0000\n"
"Last-Translator: Yuri Kozlov \n"
"Language-Team: Russian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:04+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:22+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ru\n"
#. name for aaa
diff --git a/setup/iso_639/rw.po b/setup/iso_639/rw.po
index 58639163ba..e47704953c 100644
--- a/setup/iso_639/rw.po
+++ b/setup/iso_639/rw.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:43+0000\n"
"Last-Translator: Steve Murphy \n"
"Language-Team: Kinyarwanda \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 04:52+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:10+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: rw\n"
#. name for aaa
diff --git a/setup/iso_639/sk.po b/setup/iso_639/sk.po
index 03979d6f6a..f78e7a6961 100644
--- a/setup/iso_639/sk.po
+++ b/setup/iso_639/sk.po
@@ -10,15 +10,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:52+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Slovak \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:06+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:24+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: sk\n"
#. name for aaa
diff --git a/setup/iso_639/sl.po b/setup/iso_639/sl.po
index ef09635b8c..a3f0dbd0f6 100644
--- a/setup/iso_639/sl.po
+++ b/setup/iso_639/sl.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:04+0000\n"
"Last-Translator: Primoz Peterlin \n"
"Language-Team: Slovenian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:06+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:24+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: sl\n"
#. name for aaa
diff --git a/setup/iso_639/sr.po b/setup/iso_639/sr.po
index 4942910d39..b1b6c27e3f 100644
--- a/setup/iso_639/sr.po
+++ b/setup/iso_639/sr.po
@@ -7,15 +7,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:37+0000\n"
"Last-Translator: Данило Шеган \n"
"Language-Team: Serbian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:05+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:23+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: sr\n"
#. name for aaa
diff --git a/setup/iso_639/sr@latin.po b/setup/iso_639/sr@latin.po
index ffd0656f3d..60552c387a 100644
--- a/setup/iso_639/sr@latin.po
+++ b/setup/iso_639/sr@latin.po
@@ -7,15 +7,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:12+0000\n"
"Last-Translator: Данило Шеган \n"
"Language-Team: Serbian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:17+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:35+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: sr\n"
#. name for aaa
diff --git a/setup/iso_639/sv.po b/setup/iso_639/sv.po
index 38b6d038f0..f61bc62c7d 100644
--- a/setup/iso_639/sv.po
+++ b/setup/iso_639/sv.po
@@ -29,15 +29,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:54+0000\n"
"Last-Translator: Christian Rose \n"
"Language-Team: Swedish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:07+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:25+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: sv\n"
#. name for aaa
diff --git a/setup/iso_639/ta.po b/setup/iso_639/ta.po
index 4464d639ab..952c218a78 100644
--- a/setup/iso_639/ta.po
+++ b/setup/iso_639/ta.po
@@ -12,15 +12,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 08:05+0000\n"
"Last-Translator: Felix \n"
"Language-Team: Tamil \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:08+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:26+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"X-Poedit-Country: INDIA\n"
"Language: ta\n"
"X-Poedit-Language: Tamil\n"
diff --git a/setup/iso_639/th.po b/setup/iso_639/th.po
index 75337f8bfa..feb435e4db 100644
--- a/setup/iso_639/th.po
+++ b/setup/iso_639/th.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:45+0000\n"
"Last-Translator: Theppitak Karoonboonyanan \n"
"Language-Team: Thai \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:09+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:27+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: th\n"
#. name for aaa
diff --git a/setup/iso_639/ti.po b/setup/iso_639/ti.po
index 5f90c2e24a..c355651630 100644
--- a/setup/iso_639/ti.po
+++ b/setup/iso_639/ti.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 04:27+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Tigrinya\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:10+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:28+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/tig.po b/setup/iso_639/tig.po
index 7886bb21da..90f8ce8fca 100644
--- a/setup/iso_639/tig.po
+++ b/setup/iso_639/tig.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:46+0000\n"
"Last-Translator: Alastair McKinstry \n"
"Language-Team: Tigre\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:10+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:27+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/tr.po b/setup/iso_639/tr.po
index efabf60b14..90d1796502 100644
--- a/setup/iso_639/tr.po
+++ b/setup/iso_639/tr.po
@@ -9,14 +9,14 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-09-01 09:30+0000\n"
"Last-Translator: zeugma \n"
"Language-Team: Turkish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-02 04:41+0000\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:28+0000\n"
"X-Generator: Launchpad (build 13830)\n"
"Language: tr\n"
diff --git a/setup/iso_639/tt.po b/setup/iso_639/tt.po
index 7a2671f173..9c81fd7925 100644
--- a/setup/iso_639/tt.po
+++ b/setup/iso_639/tt.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:06+0000\n"
"Last-Translator: al Beri \n"
"Language-Team: Tatarish\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:09+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:26+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/uk.po b/setup/iso_639/uk.po
index d8982da71d..51513cbbd4 100644
--- a/setup/iso_639/uk.po
+++ b/setup/iso_639/uk.po
@@ -11,15 +11,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 05:12+0000\n"
"Last-Translator: yurchor \n"
"Language-Team: Ukrainian \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:11+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:29+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: uk\n"
#. name for aaa
diff --git a/setup/iso_639/ve.po b/setup/iso_639/ve.po
index 1aef9e5237..c859f08433 100644
--- a/setup/iso_639/ve.po
+++ b/setup/iso_639/ve.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 07:51+0000\n"
"Last-Translator: Fhatuwani Rambau \n"
"Language-Team: Venda \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:12+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:30+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: ve\n"
#. name for aaa
diff --git a/setup/iso_639/vi.po b/setup/iso_639/vi.po
index e00b0fd1ba..ce0f44605f 100644
--- a/setup/iso_639/vi.po
+++ b/setup/iso_639/vi.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 06:03+0000\n"
"Last-Translator: Clytie Siddall \n"
"Language-Team: Vietnamese \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:12+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:30+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: vi\n"
#. name for aaa
diff --git a/setup/iso_639/wa.po b/setup/iso_639/wa.po
index 11944c9eae..adec6af394 100644
--- a/setup/iso_639/wa.po
+++ b/setup/iso_639/wa.po
@@ -8,15 +8,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
-"POT-Creation-Date: 2011-08-27 02:50+0000\n"
+"POT-Creation-Date: 2011-09-02 16:21+0000\n"
"PO-Revision-Date: 2011-08-27 03:42+0000\n"
"Last-Translator: Pablo Saratxaga \n"
"Language-Team: Walloon \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-08-28 05:13+0000\n"
-"X-Generator: Launchpad (build 13794)\n"
+"X-Launchpad-Export-Date: 2011-09-03 05:31+0000\n"
+"X-Generator: Launchpad (build 13830)\n"
"Language: \n"
#. name for aaa
diff --git a/setup/iso_639/xh.po b/setup/iso_639/xh.po
index 97c228af91..800d3ee9ac 100644
--- a/setup/iso_639/xh.po
+++ b/setup/iso_639/xh.po
@@ -9,15 +9,15 @@ msgstr ""
"Project-Id-Version: iso_639_3\n"
"Report-Msgid-Bugs-To: Debian iso-codes team