mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
1af58e4f03
@ -132,6 +132,9 @@ class CHMReader(CHMFile):
|
|||||||
lpath = os.path.join(output_dir, path)
|
lpath = os.path.join(output_dir, path)
|
||||||
self._ensure_dir(lpath)
|
self._ensure_dir(lpath)
|
||||||
data = self.GetFile(path)
|
data = self.GetFile(path)
|
||||||
|
if lpath.find(';') != -1:
|
||||||
|
# fix file names with ";<junk>" at the end, see _reformat()
|
||||||
|
lpath = lpath.split(';')[0]
|
||||||
with open(lpath, 'wb') as f:
|
with open(lpath, 'wb') as f:
|
||||||
if guess_mimetype(path)[0] == ('text/html'):
|
if guess_mimetype(path)[0] == ('text/html'):
|
||||||
data = self._reformat(data)
|
data = self._reformat(data)
|
||||||
@ -158,14 +161,26 @@ class CHMReader(CHMFile):
|
|||||||
# cos they really fuck with the flow of things and generally waste space
|
# cos they really fuck with the flow of things and generally waste space
|
||||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||||
# we'll have to do this manually...
|
# we'll have to do this manually...
|
||||||
|
# only remove the tables, if they have an image with an alt attribute
|
||||||
|
# containing prev, next or team
|
||||||
t = soup('table')
|
t = soup('table')
|
||||||
if t:
|
if t:
|
||||||
if (t[0].previousSibling is None
|
if (t[0].previousSibling is None
|
||||||
or t[0].previousSibling.previousSibling is None):
|
or t[0].previousSibling.previousSibling is None):
|
||||||
t[0].extract()
|
try:
|
||||||
|
alt = t[0].img['alt'].lower()
|
||||||
|
if alt.find('prev') != -1 or alt.find('next') != -1 or alt.find('team') != -1:
|
||||||
|
t[0].extract()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if (t[-1].nextSibling is None
|
if (t[-1].nextSibling is None
|
||||||
or t[-1].nextSibling.nextSibling is None):
|
or t[-1].nextSibling.nextSibling is None):
|
||||||
t[-1].extract()
|
try:
|
||||||
|
alt = t[-1].img['alt'].lower()
|
||||||
|
if alt.find('prev') != -1 or alt.find('next') != -1 or alt.find('team') != -1:
|
||||||
|
t[-1].extract()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
# for some very odd reason each page's content appears to be in a table
|
# for some very odd reason each page's content appears to be in a table
|
||||||
# too. and this table has sub-tables for random asides... grr.
|
# too. and this table has sub-tables for random asides... grr.
|
||||||
|
|
||||||
@ -185,8 +200,24 @@ class CHMReader(CHMFile):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
# and some don't even have a src= ?!
|
# and some don't even have a src= ?!
|
||||||
pass
|
pass
|
||||||
# now give back some pretty html.
|
try:
|
||||||
return soup.prettify('utf-8')
|
# if there is only a single table with a single element
|
||||||
|
# in the body, replace it by the contents of this single element
|
||||||
|
tables = soup.body.findAll('table', recursive=False)
|
||||||
|
if tables and len(tables) == 1:
|
||||||
|
trs = tables[0].findAll('tr', recursive=False)
|
||||||
|
if trs and len(trs) == 1:
|
||||||
|
tds = trs[0].findAll('td', recursive=False)
|
||||||
|
if tds and len(tds) == 1:
|
||||||
|
tdContents = tds[0].contents
|
||||||
|
tableIdx = soup.body.contents.index(tables[0])
|
||||||
|
tables[0].extract()
|
||||||
|
while tdContents:
|
||||||
|
soup.body.insert(tableIdx, tdContents.pop())
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
# do not prettify, it would reformat the <pre> tags!
|
||||||
|
return str(soup)
|
||||||
|
|
||||||
def Contents(self):
|
def Contents(self):
|
||||||
if self._contents is not None:
|
if self._contents is not None:
|
||||||
|
@ -8,7 +8,8 @@ import os
|
|||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted
|
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||||
|
preserve_spaces
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -28,6 +29,9 @@ class TXTInput(InputFormatPlugin):
|
|||||||
'an indent (either a tab or 2+ spaces) represents a paragraph. '
|
'an indent (either a tab or 2+ spaces) represents a paragraph. '
|
||||||
'Paragraphs end when the next line that starts with an indent '
|
'Paragraphs end when the next line that starts with an indent '
|
||||||
'is reached.')),
|
'is reached.')),
|
||||||
|
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
||||||
|
help=_('Normally extra spaces are condensed into a single space. '
|
||||||
|
'With this option all spaces will be displayed.')),
|
||||||
OptionRecommendation(name='markdown', recommended_value=False,
|
OptionRecommendation(name='markdown', recommended_value=False,
|
||||||
help=_('Run the text input through the markdown pre-processor. To '
|
help=_('Run the text input through the markdown pre-processor. To '
|
||||||
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||||
@ -48,6 +52,8 @@ class TXTInput(InputFormatPlugin):
|
|||||||
txt = separate_paragraphs_single_line(txt)
|
txt = separate_paragraphs_single_line(txt)
|
||||||
if options.print_formatted_paras:
|
if options.print_formatted_paras:
|
||||||
txt = separate_paragraphs_print_formatted(txt)
|
txt = separate_paragraphs_print_formatted(txt)
|
||||||
|
if options.preserve_spaces:
|
||||||
|
txt = preserve_spaces(txt)
|
||||||
|
|
||||||
if options.markdown:
|
if options.markdown:
|
||||||
log.debug('Running text though markdown conversion...')
|
log.debug('Running text though markdown conversion...')
|
||||||
|
@ -24,6 +24,9 @@ def convert_basic(txt, title=''):
|
|||||||
for line in txt.splitlines():
|
for line in txt.splitlines():
|
||||||
lines.append(line.strip())
|
lines.append(line.strip())
|
||||||
txt = '\n'.join(lines)
|
txt = '\n'.join(lines)
|
||||||
|
|
||||||
|
# Condense redundant spaces
|
||||||
|
txt = re.sub('[ ]{2,}', ' ', txt)
|
||||||
|
|
||||||
# Remove blank lines from the beginning and end of the document.
|
# Remove blank lines from the beginning and end of the document.
|
||||||
txt = re.sub('^\s+(?=.)', '', txt)
|
txt = re.sub('^\s+(?=.)', '', txt)
|
||||||
@ -56,6 +59,11 @@ def separate_paragraphs_print_formatted(txt):
|
|||||||
txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
|
txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
def preserve_spaces(txt):
|
||||||
|
txt = txt.replace(' ', ' ')
|
||||||
|
txt = txt.replace('\t', '	')
|
||||||
|
return txt
|
||||||
|
|
||||||
def opf_writer(path, opf_name, manifest, spine, mi):
|
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||||
opf = OPFCreator(path, mi)
|
opf = OPFCreator(path, mi)
|
||||||
opf.create_manifest(manifest)
|
opf.create_manifest(manifest)
|
||||||
|
@ -14,6 +14,7 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, 'txt_input',
|
Widget.__init__(self, parent, 'txt_input',
|
||||||
['single_line_paras', 'print_formatted_paras', 'markdown', 'markdown_disable_toc'])
|
['single_line_paras', 'print_formatted_paras', 'markdown',
|
||||||
|
'markdown_disable_toc', 'preserve_spaces'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>400</width>
|
<width>470</width>
|
||||||
<height>300</height>
|
<height>300</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
@ -52,7 +52,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="6" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -65,10 +65,17 @@
|
|||||||
</property>
|
</property>
|
||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="5" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_preserve_spaces">
|
||||||
|
<property name="text">
|
||||||
|
<string>Preserve &spaces</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections>
|
<connections>
|
||||||
<connection>
|
<connection>
|
||||||
<sender>opt_markdown</sender>
|
<sender>opt_markdown</sender>
|
||||||
<signal>toggled(bool)</signal>
|
<signal>toggled(bool)</signal>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user