Merge from trunk

This commit is contained in:
Charles Haley 2011-01-16 10:40:42 +00:00
commit 04869f7f5d
15 changed files with 270 additions and 193 deletions

View File

@ -1,5 +1,5 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup #from calibre.ebooks.BeautifulSoup import BeautifulSoup
from urllib import quote from urllib import quote
class SportsIllustratedRecipe(BasicNewsRecipe) : class SportsIllustratedRecipe(BasicNewsRecipe) :
@ -91,7 +91,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
# expire : no idea what value to use # expire : no idea what value to use
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js # All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
def preprocess_html(self, soup): '''def preprocess_html(self, soup):
header = soup.find('div', attrs = {'class' : 'siv_artheader'}) header = soup.find('div', attrs = {'class' : 'siv_artheader'})
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>') homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
body = homeMadeSoup.body body = homeMadeSoup.body
@ -115,4 +115,5 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
body.append(para) body.append(para)
return homeMadeSoup return homeMadeSoup
'''

View File

@ -33,6 +33,6 @@ class SNE(USBMS):
STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card' STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books' EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = False

View File

@ -286,7 +286,6 @@ class RTFInput(InputFormatPlugin):
try: try:
xml = self.generate_xml(stream.name) xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e: except RtfInvalidCodeException, e:
raise
raise ValueError(_('This RTF file has a feature calibre does not ' raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e) 'support. Convert it to HTML first and then try it.\n%s')%e)

View File

@ -226,10 +226,6 @@ class ParseRtf:
try: try:
return_value = process_tokens_obj.process_tokens() return_value = process_tokens_obj.process_tokens()
except InvalidRtfException, msg: except InvalidRtfException, msg:
try:
os.remove(self.__temp_file)
except OSError:
pass
#Check to see if the file is correctly encoded #Check to see if the file is correctly encoded
encode_obj = default_encoding.DefaultEncoding( encode_obj = default_encoding.DefaultEncoding(
in_file = self.__temp_file, in_file = self.__temp_file,
@ -241,14 +237,17 @@ class ParseRtf:
check_encoding_obj = check_encoding.CheckEncoding( check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
enc = encode_obj.get_codepage() enc = 'cp' + encode_obj.get_codepage()
if enc != 'mac_roman': msg = 'Exception in token processing'
enc = 'cp' + enc
if check_encoding_obj.check_encoding(self.__file, enc): if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \ file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8') else self.__file.encode('utf-8')
msg = 'File %s does not appear to be correctly encoded.\n' % file_name msg = 'File %s does not appear to be correctly encoded.\n' % file_name
raise InvalidRtfException, msg try:
os.remove(self.__temp_file)
except OSError:
pass
raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo( delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file, in_file = self.__temp_file,
copy = self.__copy, copy = self.__copy,

View File

@ -74,9 +74,6 @@ class DefaultEncoding:
if not self.__datafetched: if not self.__datafetched:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
if self.__platform == 'Macintosh':
code_page = self.__code_page
else:
code_page = 'ansicpg' + self.__code_page code_page = 'ansicpg' + self.__code_page
return self.__platform, code_page, self.__default_num return self.__platform, code_page, self.__default_num
@ -94,49 +91,60 @@ class DefaultEncoding:
def _encoding(self): def _encoding(self):
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
cpfound = False
if not self.__fetchraw: if not self.__fetchraw:
for line in read_obj: for line in read_obj:
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'mi<mk<rtfhed-end': if self.__token_info == 'mi<mk<rtfhed-end':
break break
if self.__token_info == 'cw<ri<ansi-codpg':
#cw<ri<ansi-codpg<nu<10000
self.__code_page = line[20:-1] if int(line[20:-1]) \
else '1252'
if self.__token_info == 'cw<ri<macintosh_': if self.__token_info == 'cw<ri<macintosh_':
self.__platform = 'Macintosh' self.__platform = 'Macintosh'
self.__code_page = 'mac_roman'
elif self.__token_info == 'cw<ri<pc________': elif self.__token_info == 'cw<ri<pc________':
self.__platform = 'IBMPC' self.__platform = 'IBMPC'
self.__code_page = '437'
elif self.__token_info == 'cw<ri<pca_______': elif self.__token_info == 'cw<ri<pca_______':
self.__platform = 'OS/2' self.__platform = 'OS/2'
self.__code_page = '850' if self.__token_info == 'cw<ri<ansi-codpg' \
and int(line[20:-1]):
self.__code_page = line[20:-1]
if self.__token_info == 'cw<ri<deflt-font': if self.__token_info == 'cw<ri<deflt-font':
self.__default_num = line[20:-1] self.__default_num = line[20:-1]
cpfound = True
#cw<ri<deflt-font<nu<0 #cw<ri<deflt-font<nu<0
if self.__platform != 'Windows' and \
not cpfound:
if self.__platform == 'Macintosh':
self.__code_page = '10000'
elif self.__platform == 'IBMPC':
self.__code_page = '437'
elif self.__platform == 'OS/2':
self.__code_page = '850'
else: else:
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+') fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+') fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
for line in read_obj: for line in read_obj:
if fenc.search(line):
enc = fenc.search(line).group(1)
if fenccp.search(line): if fenccp.search(line):
cp = fenccp.search(line).group(1) cp = fenccp.search(line).group(1)
if not int(cp): if not int(cp):
self.__code_page = cp self.__code_page = cp
cpfound = True
break break
if fenc.search(line): if self.__platform != 'Windows' and \
enc = fenc.search(line).group(1) not cpfound:
if enc == 'mac': if enc == 'mac':
self.__code_page = 'mac_roman' self.__code_page = '10000'
elif enc == 'pc': elif enc == 'pc':
self.__code_page = '437' self.__code_page = '437'
elif enc == 'pca': elif enc == 'pca':
self.__code_page = '850' self.__code_page = '850'
# if __name__ == '__main__': if __name__ == '__main__':
# encode_obj = DefaultEncoding( import sys
# in_file = sys.argv[1], encode_obj = DefaultEncoding(
# bug_handler = Exception, in_file = sys.argv[1],
# check_raw = True, bug_handler = Exception,
# ) check_raw = True,
# print encode_obj.get_codepage() )
print encode_obj.get_codepage()

View File

@ -20,7 +20,7 @@ import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class DeleteInfo: class DeleteInfo:
"""Delelet unecessary destination groups""" """Delete unecessary destination groups"""
def __init__(self, def __init__(self,
in_file , in_file ,
bug_handler, bug_handler,
@ -31,17 +31,14 @@ class DeleteInfo:
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level
self.__initiate_allow()
self.__bracket_count= 0 self.__bracket_count= 0
self.__ob_count = 0 self.__ob_count = 0
self.__cb_count = 0 self.__cb_count = 0
# self.__after_asterisk = False
# self.__delete = 0
self.__initiate_allow()
self.__ob = 0 self.__ob = 0
self.__write_cb = False self.__write_cb = False
self.__run_level = run_level
self.__found_delete = False self.__found_delete = False
# self.__list = False
def __initiate_allow(self): def __initiate_allow(self):
""" """
@ -57,6 +54,8 @@ class DeleteInfo:
'cw<an<annotation', 'cw<an<annotation',
'cw<cm<comment___', 'cw<cm<comment___',
'cw<it<lovr-table', 'cw<it<lovr-table',
# info table
'cw<di<company___',
# 'cw<ls<list______', # 'cw<ls<list______',
) )
self.__not_allowable = ( self.__not_allowable = (
@ -116,7 +115,6 @@ class DeleteInfo:
""" """
# Test for {\*}, in which case don't enter # Test for {\*}, in which case don't enter
# delete state # delete state
# self.__after_asterisk = False # only enter this function once
self.__found_delete = True self.__found_delete = True
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
if self.__delete_count == self.__cb_count: if self.__delete_count == self.__cb_count:
@ -128,7 +126,7 @@ class DeleteInfo:
# not sure what happens here! # not sure what happens here!
# believe I have a '{\*} # believe I have a '{\*}
if self.__run_level > 3: if self.__run_level > 3:
msg = 'flag problem\n' msg = 'Flag problem\n'
raise self.__bug_handler, msg raise self.__bug_handler, msg
return True return True
elif self.__token_info in self.__allowable : elif self.__token_info in self.__allowable :
@ -173,8 +171,8 @@ class DeleteInfo:
Return True for all control words. Return True for all control words.
Return False otherwise. Return False otherwise.
""" """
if self.__delete_count == self.__cb_count and self.__token_info ==\ if self.__delete_count == self.__cb_count and \
'cb<nu<clos-brack': self.__token_info == 'cb<nu<clos-brack':
self.__state = 'default' self.__state = 'default'
if self.__write_cb: if self.__write_cb:
self.__write_cb = False self.__write_cb = False
@ -186,32 +184,24 @@ class DeleteInfo:
return False return False
def delete_info(self): def delete_info(self):
"""Main method for handling other methods. Read one line in at """Main method for handling other methods. Read one line at
a time, and determine whether to print the line based on the state.""" a time, and determine whether to print the line based on the state."""
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj: with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj: for line in read_obj:
#ob<nu<open-brack<0001 #ob<nu<open-brack<0001
to_print = True
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
# Get action to perform
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if not action: if not action:
sys.stderr.write(_('No action in dictionary state is "%s" \n') sys.stderr.write('No action in dictionary state is "%s" \n'
% self.__state) % self.__state)
to_print = action(line) # Print if allowed by action
# if self.__after_asterisk: if action(line):
# to_print = self.__asterisk_func(line)
# elif self.__list:
# self.__in_list_func(line)
# elif self.__delete:
# to_print = self.__delete_func(line)
# else:
# to_print = self.__default_func(line)
if to_print:
self.__write_obj.write(line) self.__write_obj.write(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:

View File

@ -15,8 +15,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Info: class Info:
""" """
Make tags for document-information Make tags for document-information
@ -42,12 +44,14 @@ class Info:
self.__copy = copy self.__copy = copy
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
""" """
self.__text_string = '' self.__text_string = ''
self.__state = 'before_info_table' self.__state = 'before_info_table'
self.rmspace = re.compile(r'\s+')
self.__state_dict = { self.__state_dict = {
'before_info_table': self.__before_info_table_func, 'before_info_table': self.__before_info_table_func,
'after_info_table': self.__after_info_table_func, 'after_info_table': self.__after_info_table_func,
@ -58,27 +62,49 @@ class Info:
self.__info_table_dict = { self.__info_table_dict = {
'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'), 'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'),
'cw<di<author____' : (self.__found_tag_with_text_func, 'author'), 'cw<di<author____' : (self.__found_tag_with_text_func, 'author'),
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'),
'cw<di<manager___' : (self.__found_tag_with_text_func, 'manager'),
'cw<di<company___' : (self.__found_tag_with_text_func, 'company'),
'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'), 'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'),
'cw<di<category__' : (self.__found_tag_with_text_func, 'category'),
'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'), 'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'),
'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'), 'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'),
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'), 'cw<di<linkbase__' : (self.__found_tag_with_text_func, 'hyperlink-base'),
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'), 'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'), 'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'), 'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'), 'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'),
'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'), 'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'),
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'), 'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
'cw<di<version___' : (self.__single_field_func, 'version'),
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
} }
self.__token_dict = { self.__token_dict = {
'year______' : 'year', 'year______' : 'year',
'month_____' : 'month', 'month_____' : 'month',
'day_______' : 'day', 'day_______' : 'day',
'minute____' : 'minute', 'minute____' : 'minute',
'second____' : 'second',
'revis-time' : 'revision-time', 'revis-time' : 'revision-time',
'create-tim' : 'creation-time',
'edit-time_' : 'editing-time',
'print-time' : 'printing-time',
'backuptime' : 'backup-time',
'num-of-wor' : 'number-of-words', 'num-of-wor' : 'number-of-words',
'num-of-chr' : 'number-of-characters', 'num-of-chr' : 'number-of-characters',
'numofchrws' : 'number-of-characters-without-space',
'num-of-pag' : 'number-of-pages', 'num-of-pag' : 'number-of-pages',
'version___' : 'version',
'intern-ver' : 'internal-version-number',
'internalID' : 'internal-id-number',
} }
def __before_info_table_func(self, line): def __before_info_table_func(self, line):
""" """
Required: Required:
@ -92,6 +118,7 @@ class Info:
if self.__token_info == 'mi<mk<doc-in-beg': if self.__token_info == 'mi<mk<doc-in-beg':
self.__state = 'in_info_table' self.__state = 'in_info_table'
self.__write_obj.write(line) self.__write_obj.write(line)
def __in_info_table_func(self, line): def __in_info_table_func(self, line):
""" """
Requires: Requires:
@ -112,6 +139,7 @@ class Info:
action(line, tag) action(line, tag)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_tag_with_text_func(self, line, tag): def __found_tag_with_text_func(self, line, tag):
""" """
Requires: Requires:
@ -126,6 +154,7 @@ class Info:
""" """
self.__tag = tag self.__tag = tag
self.__state = 'collect_text' self.__state = 'collect_text'
def __collect_text_func(self, line): def __collect_text_func(self, line):
""" """
Requires: Requires:
@ -139,14 +168,17 @@ class Info:
""" """
if self.__token_info == 'mi<mk<docinf-end': if self.__token_info == 'mi<mk<docinf-end':
self.__state = 'in_info_table' self.__state = 'in_info_table'
self.__write_obj.write( #Don't print empty tags
'mi<tg<open______<%s\n' if len(self.rmspace.sub('',self.__text_string)):
'tx<nu<__________<%s\n' self.__write_obj.write(
'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag) 'mi<tg<open______<%s\n'
) 'tx<nu<__________<%s\n'
'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
)
self.__text_string = '' self.__text_string = ''
elif line[0:2] == 'tx': elif line[0:2] == 'tx':
self.__text_string += line[17:-1] self.__text_string += line[17:-1]
def __found_tag_with_tokens_func(self, line, tag): def __found_tag_with_tokens_func(self, line, tag):
""" """
Requires: Requires:
@ -163,6 +195,7 @@ class Info:
self.__state = 'collect_tokens' self.__state = 'collect_tokens'
self.__text_string = 'mi<tg<empty-att_<%s' % tag self.__text_string = 'mi<tg<empty-att_<%s' % tag
#mi<tg<empty-att_<page-definition<margin>33\n #mi<tg<empty-att_<page-definition<margin>33\n
def __collect_tokens_func(self, line): def __collect_tokens_func(self, line):
""" """
Requires: Requires:
@ -194,18 +227,19 @@ class Info:
att = line[6:16] att = line[6:16]
value = line[20:-1] value = line[20:-1]
att_changed = self.__token_dict.get(att) att_changed = self.__token_dict.get(att)
if att_changed == None: if att_changed is None:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'no dictionary match for %s\n' % att msg = 'No dictionary match for %s\n' % att
raise self.__bug_handler, msg raise self.__bug_handler, msg
else: else:
self.__text_string += '<%s>%s' % (att_changed, value) self.__text_string += '<%s>%s' % (att_changed, value)
def __single_field_func(self, line, tag): def __single_field_func(self, line, tag):
value = line[20:-1] value = line[20:-1]
self.__write_obj.write( self.__write_obj.write(
'mi<tg<empty-att_<%s' 'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value)
'<%s>%s\n' % (tag, tag, value)
) )
def __after_info_table_func(self, line): def __after_info_table_func(self, line):
""" """
Requires: Requires:
@ -217,6 +251,7 @@ class Info:
the file. the file.
""" """
self.__write_obj.write(line) self.__write_obj.write(line)
def fix_info(self): def fix_info(self):
""" """
Requires: Requires:
@ -234,20 +269,15 @@ class Info:
information table, simply write the line to the output file. information table, simply write the line to the output file.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'wb') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read: self.__token_info = line[:16]
line_to_read = read_obj.readline() action = self.__state_dict.get(self.__state)
line = line_to_read if action is None:
self.__token_info = line[:16] sys.stderr.write('No matching state in module styles.py\n')
action = self.__state_dict.get(self.__state) sys.stderr.write(self.__state + '\n')
if action == None: action(line)
sys.stderr.write('no no matching state in module styles.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "info.data") copy_obj.copy_file(self.__write_to, "info.data")

View File

@ -70,7 +70,7 @@ class ProcessTokens:
';' : ('mc', ';', self.ms_sub_func), ';' : ('mc', ';', self.ms_sub_func),
# this must be wrong # this must be wrong
'-' : ('mc', '-', self.ms_sub_func), '-' : ('mc', '-', self.ms_sub_func),
'line' : ('mi', 'hardline-break', self.hardline_func), #calibre 'line' : ('mi', 'hardline-break', self.direct_conv_func), #calibre
# misc => ml # misc => ml
'*' : ('ml', 'asterisk__', self.default_func), '*' : ('ml', 'asterisk__', self.default_func),
':' : ('ml', 'colon_____', self.default_func), ':' : ('ml', 'colon_____', self.default_func),
@ -78,7 +78,6 @@ class ProcessTokens:
'backslash' : ('nu', '\\', self.text_func), 'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func), 'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func), 'cb' : ('nu', '}', self.text_func),
#'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf # paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func), 'page' : ('pf', 'page-break', self.default_func),
'par' : ('pf', 'par-end___', self.default_func), 'par' : ('pf', 'par-end___', self.default_func),
@ -231,11 +230,15 @@ class ProcessTokens:
'trhdr' : ('tb', 'row-header', self.default_func), 'trhdr' : ('tb', 'row-header', self.default_func),
# preamble => pr # preamble => pr
# document information => di # document information => di
# TODO integrate \userprops
'info' : ('di', 'doc-info__', self.default_func), 'info' : ('di', 'doc-info__', self.default_func),
'title' : ('di', 'title_____', self.default_func),
'author' : ('di', 'author____', self.default_func), 'author' : ('di', 'author____', self.default_func),
'operator' : ('di', 'operator__', self.default_func), 'operator' : ('di', 'operator__', self.default_func),
'title' : ('di', 'title_____', self.default_func), 'manager' : ('di', 'manager___', self.default_func),
'company' : ('di', 'company___', self.default_func),
'keywords' : ('di', 'keywords__', self.default_func), 'keywords' : ('di', 'keywords__', self.default_func),
'category' : ('di', 'category__', self.default_func),
'doccomm' : ('di', 'doc-notes_', self.default_func), 'doccomm' : ('di', 'doc-notes_', self.default_func),
'comment' : ('di', 'doc-notes_', self.default_func), 'comment' : ('di', 'doc-notes_', self.default_func),
'subject' : ('di', 'subject___', self.default_func), 'subject' : ('di', 'subject___', self.default_func),
@ -244,11 +247,19 @@ class ProcessTokens:
'mo' : ('di', 'month_____', self.default_func), 'mo' : ('di', 'month_____', self.default_func),
'dy' : ('di', 'day_______', self.default_func), 'dy' : ('di', 'day_______', self.default_func),
'min' : ('di', 'minute____', self.default_func), 'min' : ('di', 'minute____', self.default_func),
'sec' : ('di', 'second____', self.default_func),
'revtim' : ('di', 'revis-time', self.default_func), 'revtim' : ('di', 'revis-time', self.default_func),
'edmins' : ('di', 'edit-time_', self.default_func),
'printim' : ('di', 'print-time', self.default_func),
'buptim' : ('di', 'backuptime', self.default_func),
'nofwords' : ('di', 'num-of-wor', self.default_func), 'nofwords' : ('di', 'num-of-wor', self.default_func),
'nofchars' : ('di', 'num-of-chr', self.default_func), 'nofchars' : ('di', 'num-of-chr', self.default_func),
'nofcharsws' : ('di', 'numofchrws', self.default_func),
'nofpages' : ('di', 'num-of-pag', self.default_func), 'nofpages' : ('di', 'num-of-pag', self.default_func),
'edmins' : ('di', 'edit-time_', self.default_func), 'version' : ('di', 'version___', self.default_func),
'vern' : ('di', 'intern-ver', self.default_func),
'hlinkbase' : ('di', 'linkbase__', self.default_func),
'id' : ('di', 'internalID', self.default_func),
# headers and footers => hf # headers and footers => hf
'headerf' : ('hf', 'head-first', self.default_func), 'headerf' : ('hf', 'head-first', self.default_func),
'headerl' : ('hf', 'head-left_', self.default_func), 'headerl' : ('hf', 'head-left_', self.default_func),
@ -605,7 +616,7 @@ class ProcessTokens:
def ms_sub_func(self, pre, token, num): def ms_sub_func(self, pre, token, num):
return 'tx<mc<__________<%s\n' % token return 'tx<mc<__________<%s\n' % token
def hardline_func(self, pre, token, num): def direct_conv_func(self, pre, token, num):
return 'mi<tg<empty_____<%s\n' % token return 'mi<tg<empty_____<%s\n' % token
def default_func(self, pre, token, num): def default_func(self, pre, token, num):

View File

@ -27,11 +27,13 @@ class Tokenize:
bug_handler, bug_handler,
copy = None, copy = None,
run_level = 1, run_level = 1,
): # out_file = None,
):
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
# self.__out_file = out_file
self.__compile_expressions() self.__compile_expressions()
#variables #variables
self.__uc_char = 0 self.__uc_char = 0
@ -113,6 +115,8 @@ class Tokenize:
def __sub_reg_split(self,input_file): def __sub_reg_split(self,input_file):
input_file = self.__replace_spchar.mreplace(input_file) input_file = self.__replace_spchar.mreplace(input_file)
# this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file) input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file) input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data #remove \n in bin data
@ -127,7 +131,7 @@ class Tokenize:
# this is for older RTF # this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line) #line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \ #return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens)) #(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self): def __compile_expressions(self):
SIMPLE_RPL = { SIMPLE_RPL = {
@ -153,8 +157,6 @@ class Tokenize:
# put a backslash in front of to eliminate special cases and # put a backslash in front of to eliminate special cases and
# make processing easier # make processing easier
"}": "\\}", "}": "\\}",
# this is for older RTF
r'\\$': '\\par ',
} }
self.__replace_spchar = MReplace(SIMPLE_RPL) self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u #add ;? in case of char following \u
@ -168,10 +170,12 @@ class Tokenize:
#why keep backslash whereas \is replaced before? #why keep backslash whereas \is replaced before?
#remove \n from endline char #remove \n from endline char
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF
self.__par_exp = re.compile(r'\\\n+')
# self.__par_exp = re.compile(r'\\$')
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}") #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)") #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__par_exp = re.compile(r'\\$')
#self.__remove_line = re.compile(r'\n+') #self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)") #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
@ -199,7 +203,24 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data") copy_obj.copy_file(self.__write_to, "tokenize.data")
# if self.__out_file:
# self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
#self.__special_tokens = [ '_', '~', "'", '{', '}' ] #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
# import sys
# def main(args=sys.argv):
# if len(args) < 1:
# print 'No file'
# return
# file = 'data_tokens.txt'
# if len(args) == 3:
# file = args[2]
# to = Tokenize(args[1], Exception, out_file = file)
# to.tokenize()
# if __name__ == '__main__':
# sys.exit(main())

View File

@ -505,7 +505,7 @@ class FileDialog(QObject):
self.selected_files = [] self.selected_files = []
if mode == QFileDialog.AnyFile: if mode == QFileDialog.AnyFile:
f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, "")) f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, ""))
if f and os.path.exists(f): if f:
self.selected_files.append(f) self.selected_files.append(f)
elif mode == QFileDialog.ExistingFile: elif mode == QFileDialog.ExistingFile:
f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, "")) f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, ""))

View File

@ -28,7 +28,7 @@ class GenerateCatalogAction(InterfaceAction):
if not ids: if not ids:
return error_dialog(self.gui, _('No books selected'), return error_dialog(self.gui, _('No books selected'),
_('No books selected to generate catalog for'), _('No books selected for catalog generation'),
show=True) show=True)
db = self.gui.library_view.model().db db = self.gui.library_view.model().db
@ -55,9 +55,9 @@ class GenerateCatalogAction(InterfaceAction):
def catalog_generated(self, job): def catalog_generated(self, job):
if job.result: if job.result:
# Search terms nulled catalog results # Error during catalog generation
return error_dialog(self.gui, _('No books found'), return error_dialog(self.gui, _('Catalog generation terminated'),
_("No books to catalog\nCheck job details"), job.result,
show=True) show=True)
if job.failed: if job.failed:
return self.gui.job_exception(job) return self.gui.job_exception(job)

View File

@ -1144,7 +1144,9 @@ class EPUB_MOBI(CatalogPlugin):
def error(self): def error(self):
def fget(self): def fget(self):
return self.__error return self.__error
return property(fget=fget) def fset(self, val):
self.__error = val
return property(fget=fget,fset=fset)
@dynamic_property @dynamic_property
def generateForKindle(self): def generateForKindle(self):
def fget(self): def fget(self):
@ -1411,6 +1413,88 @@ class EPUB_MOBI(CatalogPlugin):
except: except:
pass pass
def fetchBooksByAuthor(self):
'''
Generate a list of titles sorted by author from the database
return = Success
'''
self.updateProgressFullStep("Sorting database")
'''
# Sort titles case-insensitive, by author
self.booksByAuthor = sorted(self.booksByTitle,
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
'''
self.booksByAuthor = list(self.booksByTitle)
self.booksByAuthor.sort(self.author_compare)
if False and self.verbose:
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
for title in self.booksByAuthor:
self.opts.log.info((u" %-30s %-20s%5s " % \
(title['title'][:30],
title['series'][:20] if title['series'] else '',
title['series_index'],
)).encode('utf-8'))
raise SystemExit
# Build the unique_authors set from existing data
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
# unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
books_by_current_author = 0
current_author = authors[0]
multiple_authors = False
unique_authors = []
for (i,author) in enumerate(authors):
if author != current_author:
# Note that current_author and author are tuples: (friendly, sort)
multiple_authors = True
if author != current_author and i:
# Warn, exit if friendly matches previous, but sort doesn't
if author[0] == current_author[0]:
error_msg = _('''
\n*** Metadata error ***
Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
then rebuild the catalog.\n''').format(author[0])
self.opts.log.warn(error_msg)
self.error = error_msg
return False
# New author, save the previous author/sort/count
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
current_author = author
books_by_current_author = 1
elif i==0 and len(authors) == 1:
# Allow for single-book lists
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
else:
books_by_current_author += 1
else:
# Add final author to list or single-author dataset
if (current_author == author and len(authors) > 1) or not multiple_authors:
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8'))
self.authors = unique_authors
return True
def fetchBooksByTitle(self): def fetchBooksByTitle(self):
self.updateProgressFullStep("Fetching database") self.updateProgressFullStep("Fetching database")
@ -1562,90 +1646,9 @@ class EPUB_MOBI(CatalogPlugin):
title['title_sort'][0:40])).decode('mac-roman')) title['title_sort'][0:40])).decode('mac-roman'))
return True return True
else: else:
self.error = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.")
return False return False
def fetchBooksByAuthor(self):
'''
Generate a list of titles sorted by author from the database
return = Success
'''
self.updateProgressFullStep("Sorting database")
'''
# Sort titles case-insensitive, by author
self.booksByAuthor = sorted(self.booksByTitle,
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
'''
self.booksByAuthor = list(self.booksByTitle)
self.booksByAuthor.sort(self.author_compare)
if False and self.verbose:
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
for title in self.booksByAuthor:
self.opts.log.info((u" %-30s %-20s%5s " % \
(title['title'][:30],
title['series'][:20] if title['series'] else '',
title['series_index'],
)).encode('utf-8'))
raise SystemExit
# Build the unique_authors set from existing data
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
# unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
books_by_current_author = 0
current_author = authors[0]
multiple_authors = False
unique_authors = []
for (i,author) in enumerate(authors):
if author != current_author:
# Note that current_author and author are tuples: (friendly, sort)
multiple_authors = True
if author != current_author and i:
# Warn, exit if friendly matches previous, but sort doesn't
if author[0] == current_author[0]:
error_msg = _('''
\n*** Metadata error ***
Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
then rebuild the catalog.
*** Terminating catalog generation ***\n''').format(author[0])
self.opts.log.warn(error_msg)
return False
# New author, save the previous author/sort/count
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
current_author = author
books_by_current_author = 1
elif i==0 and len(authors) == 1:
# Allow for single-book lists
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
else:
books_by_current_author += 1
else:
# Add final author to list or single-author dataset
if (current_author == author and len(authors) > 1) or not multiple_authors:
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8'))
self.authors = unique_authors
return True
def fetchBookmarks(self): def fetchBookmarks(self):
''' '''
Collect bookmarks for catalog entries Collect bookmarks for catalog entries
@ -5069,6 +5072,8 @@ then rebuild the catalog.
abort_after_input_dump=False) abort_after_input_dump=False)
plumber.merge_ui_recommendations(recommendations) plumber.merge_ui_recommendations(recommendations)
plumber.run() plumber.run()
return 0 # returns to gui2.actions.catalog:catalog_generated()
return None
else: else:
return 1 # returns to gui2.actions.catalog:catalog_generated()
return catalog.error

View File

@ -693,8 +693,12 @@ def command_catalog(args, dbpath):
} }
with plugin: with plugin:
plugin.run(args[1], opts, get_db(dbpath, opts)) ret = plugin.run(args[1], opts, get_db(dbpath, opts))
return 0 if ret is None:
ret = 0
else:
ret = 1
return ret
# end of GR additions # end of GR additions

View File

@ -690,10 +690,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
mi = Metadata(None) mi = Metadata(None)
aut_list = row[fm['au_map']] aut_list = row[fm['au_map']]
if not aut_list: if aut_list:
aut_list = [] aut_list = [p.split(':::') for p in aut_list.split(':#:') if p]
else: else:
aut_list = [p.split(':::') for p in aut_list.split(':#:')] aut_list = []
aum = [] aum = []
aus = {} aus = {}
for (author, author_sort) in aut_list: for (author, author_sort) in aut_list:

View File

@ -437,6 +437,15 @@ My antivirus program claims |app| is a virus/trojan?
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it. Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
How do I backup |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The most important thing to backup is the |app| library folder, that contains all your books and metadata. This is the folder you chose for your |app| library when you ran |app| for the first time. You can get the path to the library folder by clicking the |app| icon on the main toolbar. You must backup this complete folder with all its files and sub-folders.
You can switch |app| to using a backed up library folder by simply clicking the |app| icon on the toolbar and choosing your backup library folder.
If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
How do I use purchased EPUB books with |app|? How do I use purchased EPUB books with |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|. Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.