mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
5bc482a82a
BIN
resources/images/news/exiled.png
Normal file
BIN
resources/images/news/exiled.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
@ -22,7 +22,7 @@ class Deia(BasicNewsRecipe):
|
||||
cover_url ='http://2.bp.blogspot.com/_RjrWzC6tI14/TM6jrPLaBZI/AAAAAAAAFaI/ayffwxidFEY/s1600/2009-10-13-logo-deia.jpg'
|
||||
timefmt ='[%a, %d %b, %Y]'
|
||||
encoding ='utf8'
|
||||
language ='es_ES'
|
||||
language ='es'
|
||||
remove_javascript =True
|
||||
remove_tags_after =dict(id='Texto')
|
||||
remove_tags_before =dict(id='Texto')
|
||||
|
43
resources/recipes/el_publico.recipe
Normal file
43
resources/recipes/el_publico.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Gerardo Diez'
|
||||
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
|
||||
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
publico.es
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class Publico(BasicNewsRecipe):
|
||||
title =u'Publico.es'
|
||||
__author__ ='Gerardo Diez'
|
||||
publisher =u'Mediapubli Sociedad de Publicaciones y Ediciones S.L.'
|
||||
category ='news, politics, finances, world, spain, science, catalunya'
|
||||
oldest_article =1
|
||||
max_articles_per_feed =100
|
||||
simultaneous_downloads =10
|
||||
cover_url =u'http://imagenes.publico.es/css/img/logo_publico.gif'
|
||||
timefmt ='[%a, %d %b, %Y]'
|
||||
encoding ='utf8'
|
||||
language ='es'
|
||||
remove_javascript =True
|
||||
no_stylesheets =True
|
||||
keep_only_tags =dict(id='main')
|
||||
remove_tags =[
|
||||
dict(name='div', attrs={'class':['Noticias_642x50', 'contInfo ancho']}),
|
||||
dict(name='ul', attrs={'class':['navComentarios', 'comentarios']}),
|
||||
dict(name='div', attrs={'id':['commentsContext', 'toolbar', 'comentarios']}),
|
||||
dict(name='h5', attrs={'id':'comentarios'})
|
||||
]
|
||||
feeds =[(u'Internacional', u'http://www.publico.es/estaticos/rss/internacional'),
|
||||
(u'Espa\xf1a', u'http://www.publico.es/estaticos/rss/espana'),
|
||||
(u'Dinero', u'http://www.publico.es/estaticos/rss/dinero'),
|
||||
(u'Ciencias', u'http://www.publico.es/estaticos/rss/ciencias'),
|
||||
(u'Culturas', u'http://www.publico.es/estaticos/rss/culturas'),
|
||||
(u'Deportes', u'http://www.publico.es/estaticos/rss/deportes'),
|
||||
(u'Televisi\xf3n y Gente', u'http://www.publico.es/estaticos/rss/televisionygente'),
|
||||
(u'Catalu\xf1a', u'http://www.publico.es/estaticos/rss/catalunya'),
|
||||
(u'Viajes', u'http://www.publico.es/estaticos/rss/viajes')]
|
||||
|
||||
|
@ -17,7 +17,7 @@ class ElPais_RSS(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es_ES'
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.elpais.com/im/tit_logo.gif'
|
||||
@ -57,14 +57,14 @@ class ElPais_RSS(BasicNewsRecipe):
|
||||
,(u'Madrid' , u'http://www.elpais.com/rss/feed.html?feedId=1016' )
|
||||
,(u'Pais Vasco' , u'http://www.elpais.com/rss/feed.html?feedId=17062')
|
||||
,(u'Galicia' , u'http://www.elpais.com/rss/feed.html?feedId=17063')
|
||||
,(u'Opinion' , u'http://www.elpais.com/rss/feed.html?feedId=1003' )
|
||||
,(u'Sociedad' , u'http://www.elpais.com/rss/feed.html?feedId=1004' )
|
||||
,(u'Opinion' , u'http://www.elpais.com/rss/feed.html?feedId=1003' )
|
||||
,(u'Sociedad' , u'http://www.elpais.com/rss/feed.html?feedId=1004' )
|
||||
,(u'Deportes' , u'http://www.elpais.com/rss/feed.html?feedId=1007' )
|
||||
,(u'Cultura' , u'http://www.elpais.com/rss/feed.html?feedId=1008' )
|
||||
,(u'Cine' , u'http://www.elpais.com/rss/feed.html?feedId=17052')
|
||||
,(u'Literatura' , u'http://www.elpais.com/rss/feed.html?feedId=17053')
|
||||
,(u'Musica' , u'http://www.elpais.com/rss/feed.html?feedId=17051')
|
||||
,(u'Arte' , u'http://www.elpais.com/rss/feed.html?feedId=17060')
|
||||
,(u'Arte' , u'http://www.elpais.com/rss/feed.html?feedId=17060')
|
||||
,(u'Tecnologia' , u'http://www.elpais.com/rss/feed.html?feedId=1005' )
|
||||
,(u'Economia' , u'http://www.elpais.com/rss/feed.html?feedId=1006' )
|
||||
,(u'Ciencia' , u'http://www.elpais.com/rss/feed.html?feedId=17068')
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
exiledonline.com
|
||||
'''
|
||||
@ -20,18 +18,20 @@ class Exiled(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
language = 'en'
|
||||
|
||||
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--base-font-size', '10'
|
||||
, '--category' , category
|
||||
, '--publisher' , publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
language = 'en'
|
||||
publication_type = 'newsblog'
|
||||
masthead_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
#topslug{font-size: xx-large; font-weight: bold; color: red}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||
|
||||
@ -47,12 +47,13 @@ class Exiled(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
raw = article.get('link', None)
|
||||
final = raw + 'all/1/'
|
||||
return final
|
||||
|
||||
|
@ -563,8 +563,8 @@ class HTMLPreProcessor(object):
|
||||
html = html.replace(start, '<!--')
|
||||
html = html.replace(stop, '-->')
|
||||
# convert ellipsis to entities to prevent wrapping
|
||||
html = re.sub('(?u)(?<=\w)\s?(\.\s?){2}\.', '…', html)
|
||||
html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '…', html)
|
||||
# convert double dashes to em-dash
|
||||
html = re.sub('\s--\s', u'\u2014', html)
|
||||
html = re.sub(r'\s--\s', u'\u2014', html)
|
||||
return substitute_entites(html)
|
||||
|
||||
|
58
src/calibre/ebooks/txt/heuristicprocessor.py
Normal file
58
src/calibre/ebooks/txt/heuristicprocessor.py
Normal file
@ -0,0 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
from calibre import prepare_string_for_xml
|
||||
|
||||
class TXTHeuristicProcessor(object):
|
||||
|
||||
def __init__(self):
|
||||
self.ITALICIZE_WORDS = [
|
||||
'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
|
||||
'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetra', 'n.b.', 'N.b.',
|
||||
'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
|
||||
'Mlle.', 'Mons.', 'PS.', 'PPS.',
|
||||
]
|
||||
self.ITALICIZE_STYLE_PATS = [
|
||||
r'(?msu)_(?P<words>.+?)_',
|
||||
r'(?msu)/(?P<words>[^<>]+?)/',
|
||||
r'(?msu)~~(?P<words>.+?)~~',
|
||||
r'(?msu)\*(?P<words>.+?)\*',
|
||||
r'(?msu)~(?P<words>.+?)~',
|
||||
r'(?msu)_/(?P<words>[^<>]+?)/_',
|
||||
r'(?msu)_\*(?P<words>.+?)\*_',
|
||||
r'(?msu)\*/(?P<words>[^<>]+?)/\*',
|
||||
r'(?msu)_\*/(?P<words>[^<>]+?)/\*_',
|
||||
r'(?msu)/:(?P<words>[^<>]+?):/',
|
||||
r'(?msu)\|:(?P<words>.+?):\|',
|
||||
]
|
||||
|
||||
def process_paragraph(self, paragraph):
|
||||
for word in self.ITALICIZE_WORDS:
|
||||
paragraph = paragraph.replace(word, '<i>%s</i>' % word)
|
||||
for pat in self.ITALICIZE_STYLE_PATS:
|
||||
paragraph = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), paragraph)
|
||||
return paragraph
|
||||
|
||||
def convert(self, txt, title='', epub_split_size_kb=0):
|
||||
from calibre.ebooks.txt.processor import clean_txt, split_txt, HTML_TEMPLATE
|
||||
txt = clean_txt(txt)
|
||||
txt = split_txt(txt, epub_split_size_kb)
|
||||
|
||||
processed = []
|
||||
for line in txt.split('\n\n'):
|
||||
processed.append(u'<p>%s</p>' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' '))))
|
||||
|
||||
txt = u'\n'.join(processed)
|
||||
txt = re.sub('[ ]{2,}', ' ', txt)
|
||||
html = HTML_TEMPLATE % (title, txt)
|
||||
|
||||
from calibre.ebooks.conversion.utils import PreProcessor
|
||||
pp = PreProcessor()
|
||||
html = pp.markup_chapters(html, pp.get_word_count(html), False)
|
||||
|
||||
return html
|
@ -10,7 +10,8 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.chardet import detect
|
||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||
preserve_spaces, detect_paragraph_type, detect_formatting_type
|
||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
||||
convert_heuristic
|
||||
from calibre import _ent_pat, xml_entity_to_unicode
|
||||
|
||||
class TXTInput(InputFormatPlugin):
|
||||
@ -24,18 +25,22 @@ class TXTInput(InputFormatPlugin):
|
||||
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||
choices=['auto', 'block', 'single', 'print'],
|
||||
help=_('Paragraph structure.\n'
|
||||
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
|
||||
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
|
||||
'* auto: Try to auto detect paragraph type.\n'
|
||||
'* block: Treat a blank line as a paragraph break.\n'
|
||||
'* single: Assume every line is a paragraph.\n'
|
||||
'* print: Assume every line starting with 2+ spaces or a tab '
|
||||
'starts a paragraph.')),
|
||||
'starts a paragraph.'
|
||||
'* unformatted: Most lines have hard line breaks, few/no spaces or indents.')),
|
||||
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
||||
choices=['auto', 'none', 'markdown'],
|
||||
choices=['auto', 'none', 'heuristic', 'markdown'],
|
||||
help=_('Formatting used within the document.'
|
||||
'* auto: Try to auto detect the document formatting.\n'
|
||||
'* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
|
||||
'* markdown: Run the input though the markdown pre-processor. '
|
||||
'* auto: Automatically decide which formatting processor to use.\n'
|
||||
'* none: Do not process the document formatting. Everything is a '
|
||||
'paragraph and no styling is applied.\n'
|
||||
'* heuristic: Process using heuristics to determine formatting such '
|
||||
'as chapter headings and italic text.\n'
|
||||
'* markdown: Processing using markdown formatting. '
|
||||
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
|
||||
OptionRecommendation(name='preserve_spaces', recommended_value=False,
|
||||
help=_('Normally extra spaces are condensed into a single space. '
|
||||
@ -90,7 +95,8 @@ class TXTInput(InputFormatPlugin):
|
||||
|
||||
# We don't check for block because the processor assumes block.
|
||||
# single and print at transformed to block for processing.
|
||||
if options.paragraph_type in ('single', 'unformatted'):
|
||||
|
||||
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
|
||||
txt = separate_paragraphs_single_line(txt)
|
||||
elif options.paragraph_type == 'print':
|
||||
txt = separate_paragraphs_print_formatted(txt)
|
||||
@ -106,7 +112,12 @@ class TXTInput(InputFormatPlugin):
|
||||
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
||||
|
||||
flow_size = getattr(options, 'flow_size', 0)
|
||||
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
||||
|
||||
if options.formatting_type == 'heuristic':
|
||||
html = convert_heuristic(txt, epub_split_size_kb=flow_size)
|
||||
else:
|
||||
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
||||
|
||||
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
html_input = plugin_for_input_format('html')
|
||||
|
@ -9,6 +9,7 @@ import os, re
|
||||
from calibre import prepare_string_for_xml, isbytestring
|
||||
from calibre.ebooks.markdown import markdown
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -17,7 +18,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||
|
||||
def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||
def clean_txt(txt):
|
||||
if isbytestring(txt):
|
||||
txt = txt.decode('utf-8', 'replace')
|
||||
# Strip whitespace from the beginning and end of the line. Also replace
|
||||
@ -36,6 +37,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
|
||||
illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
|
||||
txt = illegal_chars.sub('', txt)
|
||||
|
||||
return txt
|
||||
|
||||
def split_txt(txt, epub_split_size_kb=0):
|
||||
#Takes care if there is no point to split
|
||||
if epub_split_size_kb > 0:
|
||||
if isinstance(txt, unicode):
|
||||
@ -50,6 +55,12 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||
if isbytestring(txt):
|
||||
txt = txt.decode('utf-8')
|
||||
|
||||
return txt
|
||||
|
||||
def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||
txt = clean_txt(txt)
|
||||
txt = split_txt(txt, epub_split_size_kb)
|
||||
|
||||
lines = []
|
||||
# Split into paragraphs based on having a blank line between text.
|
||||
for line in txt.split('\n\n'):
|
||||
@ -58,6 +69,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||
|
||||
return HTML_TEMPLATE % (title, u'\n'.join(lines))
|
||||
|
||||
def convert_heuristic(txt, title='', epub_split_size_kb=0):
|
||||
tp = TXTHeuristicProcessor()
|
||||
return tp.convert(txt, title, epub_split_size_kb)
|
||||
|
||||
def convert_markdown(txt, title='', disable_toc=False):
|
||||
md = markdown.Markdown(
|
||||
extensions=['footnotes', 'tables', 'toc'],
|
||||
@ -117,12 +132,12 @@ def detect_paragraph_type(txt):
|
||||
if hardbreaks:
|
||||
# Check for print
|
||||
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
|
||||
if tab_line_count / float(txt_line_count) >= .25:
|
||||
if tab_line_count / float(txt_line_count) >= .15:
|
||||
return 'print'
|
||||
|
||||
# Check for block
|
||||
empty_line_count = len(re.findall('(?mu)^\s*$', txt))
|
||||
if empty_line_count / float(txt_line_count) >= .25:
|
||||
if empty_line_count / float(txt_line_count) >= .15:
|
||||
return 'block'
|
||||
|
||||
# Assume unformatted text with hardbreaks if nothing else matches
|
||||
@ -153,4 +168,4 @@ def detect_formatting_type(txt):
|
||||
if txt.count('\\'+c) > 10:
|
||||
return 'markdown'
|
||||
|
||||
return 'none'
|
||||
return 'heuristic'
|
||||
|
@ -16,7 +16,7 @@ from PyQt4.Qt import QWidget, pyqtSignal, QDialog, Qt, QLabel, \
|
||||
from calibre.gui2.wizard.send_email_ui import Ui_Form
|
||||
from calibre.utils.smtp import config as smtp_prefs
|
||||
from calibre.gui2.dialogs.test_email_ui import Ui_Dialog as TE_Dialog
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2 import error_dialog, question_dialog
|
||||
|
||||
class TestEmail(QDialog, TE_Dialog):
|
||||
|
||||
@ -92,7 +92,10 @@ class SendEmail(QWidget, Ui_Form):
|
||||
pa = self.preferred_to_address()
|
||||
to_set = pa is not None
|
||||
if self.set_email_settings(to_set):
|
||||
TestEmail(pa, self).exec_()
|
||||
if question_dialog(self, _('OK to proceed?'),
|
||||
_('This will display your email password on the screen'
|
||||
'. Is it OK to proceed?'), show_copy_button=False):
|
||||
TestEmail(pa, self).exec_()
|
||||
|
||||
def test_email_settings(self, to):
|
||||
opts = smtp_prefs().parse()
|
||||
|
@ -2861,25 +2861,17 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
self.updateProgressMicroStep("Thumbnail %d of %d" % \
|
||||
(i,len(self.booksByTitle)),
|
||||
i/float(len(self.booksByTitle)))
|
||||
# Check to see if source file exists
|
||||
if 'cover' in title and os.path.isfile(title['cover']):
|
||||
# Add the thumb spec to thumbs[]
|
||||
thumbs.append("thumbnail_%d.jpg" % int(title['id']))
|
||||
|
||||
# Check to see if thumbnail exists
|
||||
thumb_fp = "%s/thumbnail_%d.jpg" % (image_dir,int(title['id']))
|
||||
thumb_file = 'thumbnail_%d.jpg' % int(title['id'])
|
||||
if os.path.isfile(thumb_fp):
|
||||
# Check to see if cover is newer than thumbnail
|
||||
# os.path.getmtime() = modified time
|
||||
# os.path.ctime() = creation time
|
||||
cover_timestamp = os.path.getmtime(title['cover'])
|
||||
thumb_timestamp = os.path.getmtime(thumb_fp)
|
||||
if thumb_timestamp < cover_timestamp:
|
||||
self.generateThumbnail(title, image_dir, thumb_file)
|
||||
else:
|
||||
self.generateThumbnail(title, image_dir, thumb_file)
|
||||
else:
|
||||
thumb_file = 'thumbnail_%d.jpg' % int(title['id'])
|
||||
thumb_generated = True
|
||||
try:
|
||||
self.generateThumbnail(title, image_dir, thumb_file)
|
||||
thumbs.append("thumbnail_%d.jpg" % int(title['id']))
|
||||
except:
|
||||
thumb_generated = False
|
||||
|
||||
|
||||
if not thumb_generated:
|
||||
# Use default cover
|
||||
if False and self.verbose:
|
||||
self.opts.log.warn(" using default cover for '%s'" % \
|
||||
|
@ -533,17 +533,23 @@ PDF documents are one of the worst formats to convert from. They are a fixed pag
|
||||
Meaning, it is very difficult to determine where one paragraph ends and another begins. |app| will try to unwrap
|
||||
paragraphs using a configurable, :guilabel:`Line Un-Wrapping Factor`. This is a scale used to determine the length
|
||||
at which a line should be unwrapped. Valid values are a decimal
|
||||
between 0 and 1. The default is 0.5, this is the median line length. Lower this value to include more
|
||||
text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under PDF Input.
|
||||
between 0 and 1. The default is 0.45, just under the median line length. Lower this value to include more
|
||||
text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.
|
||||
|
||||
Also, they often have headers and footers as part of the document that will become included with the text.
|
||||
Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not
|
||||
removed from the text it can throw off the paragraph unwrapping.
|
||||
removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read
|
||||
:ref:`regexptutorial`.
|
||||
|
||||
Some limitations of PDF input is complex, multi-column, and image based documents are not supported.
|
||||
Extraction of vector images and tables from within the document is also not supported. Some PDFs use special glyphs to
|
||||
represent double ll or doubfle ff or fi,etc. Conversion of these may or may not work depending on jusy how they are
|
||||
represented internally in the PDF.
|
||||
Some limitations of PDF input are:
|
||||
|
||||
* Complex, multi-column, and image based documents are not supported.
|
||||
* Extraction of vector images and tables from within the document is also not supported.
|
||||
* Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
|
||||
* Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well.
|
||||
|
||||
To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an
|
||||
output ranging anywhere from decent to unusable, depending on the input PDF.
|
||||
|
||||
Comic Book Collections
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -5,8 +5,8 @@
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: calibre 0.7.38\n"
|
||||
"POT-Creation-Date: 2011-01-07 13:12+MST\n"
|
||||
"PO-Revision-Date: 2011-01-07 13:12+MST\n"
|
||||
"POT-Creation-Date: 2011-01-08 18:40+MST\n"
|
||||
"PO-Revision-Date: 2011-01-08 18:40+MST\n"
|
||||
"Last-Translator: Automatically generated\n"
|
||||
"Language-Team: LANGUAGE\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
@ -2905,28 +2905,29 @@ msgstr ""
|
||||
msgid " (Preface)"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:26
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:27
|
||||
msgid ""
|
||||
"Paragraph structure.\n"
|
||||
"choices are ['auto', 'block', 'single', 'print', 'markdown']\n"
|
||||
"choices are ['auto', 'block', 'single', 'print', 'unformatted']\n"
|
||||
"* auto: Try to auto detect paragraph type.\n"
|
||||
"* block: Treat a blank line as a paragraph break.\n"
|
||||
"* single: Assume every line is a paragraph.\n"
|
||||
"* print: Assume every line starting with 2+ spaces or a tab starts a paragraph."
|
||||
"* print: Assume every line starting with 2+ spaces or a tab starts a paragraph.* unformatted: Most lines have hard line breaks, few/no spaces or indents."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:37
|
||||
msgid ""
|
||||
"Formatting used within the document.* auto: Try to auto detect the document formatting.\n"
|
||||
"* none: Do not modify the paragraph formatting. Everything is a paragraph.\n"
|
||||
"* markdown: Run the input though the markdown pre-processor. To learn more about markdown see"
|
||||
"Formatting used within the document.* auto: Automatically decide which formatting processor to use.\n"
|
||||
"* none: Do not process the document formatting. Everything is a paragraph and no styling is applied.\n"
|
||||
"* heuristic: Process using heuristics to determine formatting such as chapter headings and italic text.\n"
|
||||
"* markdown: Processing using markdown formatting. To learn more about markdown see"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:41
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:46
|
||||
msgid "Normally extra spaces are condensed into a single space. With this option all spaces will be displayed."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:44
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:49
|
||||
msgid "Do not insert a Table of Contents into the output text."
|
||||
msgstr ""
|
||||
|
||||
@ -7225,7 +7226,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/password_ui.py:65
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler_ui.py:219
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/server_ui.py:130
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:169
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:172
|
||||
msgid "&Show password"
|
||||
msgstr ""
|
||||
|
||||
@ -10621,48 +10622,56 @@ msgstr ""
|
||||
msgid "Mail successfully sent"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:136
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:95
|
||||
msgid "OK to proceed?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:96
|
||||
msgid "This will display your email password on the screen. Is it OK to proceed?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:139
|
||||
msgid "If you are setting up a new hotmail account, you must log in to it once before you will be able to send mails."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:147
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:150
|
||||
msgid "Setup sending email using"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:149
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:152
|
||||
msgid "If you don't have an account, you can sign up for a free {name} email account at <a href=\"http://{url}\">http://{url}</a>. {extra}"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:156
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:159
|
||||
msgid "Your %s &email address:"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:157
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:160
|
||||
msgid "Your %s &username:"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:158
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:161
|
||||
msgid "Your %s &password:"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:176
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:179
|
||||
msgid "If you plan to use email to send books to your Kindle, remember to add the your %s email address to the allowed email addresses in your Amazon.com Kindle management page."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:183
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:186
|
||||
msgid "Setup"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:198
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:205
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:201
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:208
|
||||
msgid "Bad configuration"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:199
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:202
|
||||
msgid "You must set the From email address"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:206
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:209
|
||||
msgid "You must set the username and password for the mail server."
|
||||
msgstr ""
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user