mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #816616 (Too many open files)
This commit is contained in:
commit
a7254ffc1d
@ -198,7 +198,8 @@ class PDFWriter(QObject): # {{{
|
|||||||
try:
|
try:
|
||||||
outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
|
outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
|
||||||
for item in self.combine_queue:
|
for item in self.combine_queue:
|
||||||
inputPDF = PdfFileReader(open(item, 'rb'))
|
with open(item, 'rb') as item_stream:
|
||||||
|
inputPDF = PdfFileReader(item_stream)
|
||||||
for page in inputPDF.pages:
|
for page in inputPDF.pages:
|
||||||
outPDF.addPage(page)
|
outPDF.addPage(page)
|
||||||
outPDF.write(self.out_stream)
|
outPDF.write(self.out_stream)
|
||||||
|
@ -22,6 +22,7 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
self.log.info('Converting XHTML to Markdown formatted TXT...')
|
self.log.info('Converting XHTML to Markdown formatted TXT...')
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
self.in_code = False
|
||||||
self.in_pre = False
|
self.in_pre = False
|
||||||
self.list = []
|
self.list = []
|
||||||
self.blockquotes = 0
|
self.blockquotes = 0
|
||||||
@ -55,10 +56,27 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
def tidy_up(self, text):
|
def tidy_up(self, text):
|
||||||
# Remove blank space form beginning of paragraph.
|
# Remove blank space form beginning of paragraph.
|
||||||
text = re.sub('(?msu)^[ ]{1,3}', '', text)
|
text = re.sub('(?msu)^[ ]{1,3}', '', text)
|
||||||
|
# pre has 4 spaces. We trimmed 3 so anything with a space left is a pre.
|
||||||
|
text = re.sub('(?msu)^[ ]', ' ', text)
|
||||||
|
|
||||||
|
# Remove tabs that aren't at the beinning of a line
|
||||||
|
new_text = []
|
||||||
|
for l in text.splitlines():
|
||||||
|
start = re.match('\t+', l)
|
||||||
|
if start:
|
||||||
|
start = start.group()
|
||||||
|
else:
|
||||||
|
start = ''
|
||||||
|
l = re.sub('\t', '', l)
|
||||||
|
new_text.append(start + l)
|
||||||
|
text = '\n'.join(new_text)
|
||||||
|
|
||||||
# Remove spaces from blank lines.
|
# Remove spaces from blank lines.
|
||||||
text = re.sub('(?msu)^[ ]+$', '', text)
|
text = re.sub('(?msu)^[ ]+$', '', text)
|
||||||
|
|
||||||
# Reduce blank lines
|
# Reduce blank lines
|
||||||
text = re.sub('(?msu)\n{7,}', '\n' * 6, text)
|
text = re.sub('(?msu)\n{7,}', '\n' * 6, text)
|
||||||
|
|
||||||
# Remove blank lines at beginning and end of document.
|
# Remove blank lines at beginning and end of document.
|
||||||
text = re.sub('^\s*', '', text)
|
text = re.sub('^\s*', '', text)
|
||||||
text = re.sub('\s*$', '\n\n', text)
|
text = re.sub('\s*$', '\n\n', text)
|
||||||
@ -81,6 +99,12 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
txt = re.sub(r'([\\`*_{}\[\]()#+!])', r'\\\1', txt)
|
txt = re.sub(r'([\\`*_{}\[\]()#+!])', r'\\\1', txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
def prepare_string_for_pre(self, txt):
|
||||||
|
new_text = []
|
||||||
|
for l in txt.splitlines():
|
||||||
|
new_text.append(' ' + l)
|
||||||
|
return '\n'.join(new_text)
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer):
|
def dump_text(self, elem, stylizer):
|
||||||
'''
|
'''
|
||||||
@elem: The element in the etree that we are working on.
|
@elem: The element in the etree that we are working on.
|
||||||
@ -97,7 +121,7 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
# Setup our variables.
|
# Setup our variables.
|
||||||
text = ['']
|
text = []
|
||||||
style = stylizer.style(elem)
|
style = stylizer.style(elem)
|
||||||
tags = []
|
tags = []
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
@ -143,46 +167,74 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
self.blockquotes += 1
|
self.blockquotes += 1
|
||||||
tags.append('>')
|
tags.append('>')
|
||||||
text.append('> ' * self.blockquotes)
|
text.append('> ' * self.blockquotes)
|
||||||
elif tag in ('code', 'pre'):
|
elif tag == 'code':
|
||||||
|
if not self.in_pre and not self.in_code:
|
||||||
|
text.append('`')
|
||||||
|
tags.append('`')
|
||||||
|
self.in_code = True
|
||||||
|
elif tag == 'pre':
|
||||||
|
if not self.in_pre:
|
||||||
|
text.append('\n')
|
||||||
|
tags.append('pre')
|
||||||
self.in_pre = True
|
self.in_pre = True
|
||||||
text.append(' ')
|
|
||||||
elif tag == 'hr':
|
elif tag == 'hr':
|
||||||
text.append('\n* * *')
|
text.append('\n* * *')
|
||||||
tags.append('\n')
|
tags.append('\n')
|
||||||
elif tag == 'a':
|
elif tag == 'a':
|
||||||
# Only write links with absolute (external) urls.
|
# Only write links with absolute (external) urls.
|
||||||
if attribs.has_key('href') and '://' in attribs['href']:
|
if self.opts.keep_links and attribs.has_key('href') and '://' in attribs['href']:
|
||||||
title = ''
|
title = ''
|
||||||
if attribs.has_key('title'):
|
if attribs.has_key('title'):
|
||||||
title = ' "' + attribs['title'] + '"'
|
title = ' "' + attribs['title'] + '"'
|
||||||
|
remove_space = self.remove_space_after_newline
|
||||||
|
title = self.remove_newlines(title)
|
||||||
|
self.remove_space_after_newline = remove_space
|
||||||
text.append('[')
|
text.append('[')
|
||||||
tags.append('](' + attribs['href'] + title + ')')
|
tags.append('](' + attribs['href'] + title + ')')
|
||||||
elif tag == 'img':
|
elif tag == 'img':
|
||||||
if self.opts.keep_image_references:
|
if self.opts.keep_image_references:
|
||||||
txt = '!'
|
txt = '!'
|
||||||
if attribs.has_key('alt'):
|
if attribs.has_key('alt'):
|
||||||
txt += '[' + attribs['alt'] + ']'
|
remove_space = self.remove_space_after_newline
|
||||||
|
txt += '[' + self.remove_newlines(attribs['alt']) + ']'
|
||||||
|
self.remove_space_after_newline = remove_space
|
||||||
txt += '(' + attribs['src'] + ')'
|
txt += '(' + attribs['src'] + ')'
|
||||||
text.append(txt)
|
text.append(txt)
|
||||||
elif tag in ('ol', 'ul'):
|
elif tag in ('ol', 'ul'):
|
||||||
|
tags.append(tag)
|
||||||
|
# Add the list to our lists of lists so we can track
|
||||||
|
# nested lists.
|
||||||
self.list.append({'name': tag, 'num': 0})
|
self.list.append({'name': tag, 'num': 0})
|
||||||
elif tag == 'li':
|
elif tag == 'li':
|
||||||
|
# Get the last list from our list of lists
|
||||||
if self.list:
|
if self.list:
|
||||||
li = self.list[-1]
|
li = self.list[-1]
|
||||||
else:
|
else:
|
||||||
li = {'name': 'ul', 'num': 0}
|
li = {'name': 'ul', 'num': 0}
|
||||||
|
# Add a new line to start the item
|
||||||
text.append('\n')
|
text.append('\n')
|
||||||
|
# Add indent if we have nested lists.
|
||||||
|
list_count = len(self.list)
|
||||||
|
# We only care about indenting nested lists.
|
||||||
|
if (list_count - 1) > 0:
|
||||||
|
text.append('\t' * (list_count - 1))
|
||||||
|
# Add blockquote if we have a blockquote in a list item.
|
||||||
text.append(bq)
|
text.append(bq)
|
||||||
|
# Write the proper sign for ordered and unorded lists.
|
||||||
if li['name'] == 'ul':
|
if li['name'] == 'ul':
|
||||||
text.append('+ ')
|
text.append('+ ')
|
||||||
elif li['name'] == 'ol':
|
elif li['name'] == 'ol':
|
||||||
text.append(unicode(len(self.list)) + '. ')
|
li['num'] += 1
|
||||||
tags.append('')
|
text.append(unicode(li['num']) + '. ')
|
||||||
|
|
||||||
# Process tags that contain text.
|
# Process tags that contain text.
|
||||||
if hasattr(elem, 'text') and elem.text:
|
if hasattr(elem, 'text') and elem.text:
|
||||||
txt = elem.text
|
txt = elem.text
|
||||||
if not self.in_pre:
|
if self.in_pre:
|
||||||
|
txt = self.prepare_string_for_pre(txt)
|
||||||
|
elif self.in_code:
|
||||||
|
txt = self.remove_newlines(txt)
|
||||||
|
else:
|
||||||
txt = self.prepare_string_for_markdown(self.remove_newlines(txt))
|
txt = self.prepare_string_for_markdown(self.remove_newlines(txt))
|
||||||
text.append(txt)
|
text.append(txt)
|
||||||
|
|
||||||
@ -193,26 +245,23 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
# Close all open tags.
|
# Close all open tags.
|
||||||
tags.reverse()
|
tags.reverse()
|
||||||
for t in tags:
|
for t in tags:
|
||||||
if t in ('pre', 'ul', 'ol', 'li', '>', 'block'):
|
if t in ('pre', 'ul', 'ol', '>'):
|
||||||
if t == 'pre':
|
if t == 'pre':
|
||||||
self.in_pre = False
|
self.in_pre = False
|
||||||
|
text.append('\n')
|
||||||
elif t == '>':
|
elif t == '>':
|
||||||
self.blockquotes -= 1
|
self.blockquotes -= 1
|
||||||
elif t == 'block':
|
|
||||||
if self.style_bold:
|
|
||||||
text.append('**')
|
|
||||||
if self.style_italic:
|
|
||||||
text.append('*')
|
|
||||||
elif t in ('ul', 'ol'):
|
elif t in ('ul', 'ol'):
|
||||||
if self.list:
|
if self.list:
|
||||||
self.list.pop()
|
self.list.pop()
|
||||||
if not self.list:
|
|
||||||
text.append('\n')
|
text.append('\n')
|
||||||
else:
|
else:
|
||||||
if t == '**':
|
if t == '**':
|
||||||
self.style_bold = False
|
self.style_bold = False
|
||||||
elif t == '*':
|
elif t == '*':
|
||||||
self.style_italic = False
|
self.style_italic = False
|
||||||
|
elif t == '`':
|
||||||
|
self.in_code = False
|
||||||
text.append('%s' % t)
|
text.append('%s' % t)
|
||||||
|
|
||||||
# Soft scene breaks.
|
# Soft scene breaks.
|
||||||
@ -224,7 +273,11 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
# Add the text that is outside of the tag.
|
# Add the text that is outside of the tag.
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
tail = elem.tail
|
tail = elem.tail
|
||||||
if not self.in_pre:
|
if self.in_pre:
|
||||||
|
tail = self.prepare_string_for_pre(tail)
|
||||||
|
elif self.in_code:
|
||||||
|
tail = self.remove_newlines(tail)
|
||||||
|
else:
|
||||||
tail = self.prepare_string_for_markdown(self.remove_newlines(tail))
|
tail = self.prepare_string_for_markdown(self.remove_newlines(tail))
|
||||||
text.append(tail)
|
text.append(tail)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user