mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
c0dafb653a
@ -11,7 +11,7 @@ class CNetJapan(BasicNewsRecipe):
|
|||||||
(u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf')
|
(u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf')
|
||||||
]
|
]
|
||||||
language = 'ja'
|
language = 'ja'
|
||||||
encoding = 'Shift_JIS'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
|
80
resources/recipes/tyzden.recipe
Normal file
80
resources/recipes/tyzden.recipe
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Miroslav Vasko zemiak@gmail.com'
|
||||||
|
|
||||||
|
'''
|
||||||
|
.tyzden, a weekly news magazine (a week old issue)
|
||||||
|
'''
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from datetime import date
|
||||||
|
import re
|
||||||
|
|
||||||
|
class TyzdenRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'zemiak'
|
||||||
|
language = 'sk'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
publisher = u'www.tyzden.sk'
|
||||||
|
category = u'Magazine'
|
||||||
|
description = u'A conservative weekly magazine. The latest free issue'
|
||||||
|
|
||||||
|
today = date.today()
|
||||||
|
iso = today.isocalendar()
|
||||||
|
year = iso[0]
|
||||||
|
weeknum = iso[1]
|
||||||
|
|
||||||
|
if (weeknum > 1):
|
||||||
|
weeknum -= 1
|
||||||
|
|
||||||
|
title = u'.tyzden ' + str(weeknum) + '/' + str(year)
|
||||||
|
|
||||||
|
base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
|
||||||
|
base_url = base_url_path + '.html'
|
||||||
|
|
||||||
|
oldest_article = 20
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'h1'))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_area top_nofoto'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_block'}))
|
||||||
|
|
||||||
|
remove_tags_after = [dict(name = 'div', attrs = {'class': 'text_block'})]
|
||||||
|
|
||||||
|
def find_sections(self):
|
||||||
|
soup = self.index_to_soup(self.base_url)
|
||||||
|
# find cover pic
|
||||||
|
imgdiv = soup.find('div', attrs = {'class': 'foto'})
|
||||||
|
if imgdiv is not None:
|
||||||
|
img = imgdiv.find('img')
|
||||||
|
if img is not None:
|
||||||
|
self.cover_url = 'http://www.tyzden.sk/' + img['src']
|
||||||
|
# end find cover pic
|
||||||
|
|
||||||
|
for s in soup.findAll('a', attrs={'href': re.compile(r'rubrika/.*')}):
|
||||||
|
yield (self.tag_to_string(s), s)
|
||||||
|
|
||||||
|
def find_articles(self, soup):
|
||||||
|
for art in soup.findAllNext('a'):
|
||||||
|
if (not art['href'].startswith('casopis/')):
|
||||||
|
break;
|
||||||
|
|
||||||
|
url = art['href']
|
||||||
|
title = self.tag_to_string(art)
|
||||||
|
yield {
|
||||||
|
'title': title, 'url':self.base_url_path + '/' + url, 'description':title,
|
||||||
|
'date' : strftime('%a, %d %b'),
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
for title, soup in self.find_sections():
|
||||||
|
feeds.append((title, list(self.find_articles(soup))))
|
||||||
|
|
||||||
|
return feeds
|
@ -53,6 +53,9 @@ class ANDROID(USBMS):
|
|||||||
# LG
|
# LG
|
||||||
0x1004 : { 0x61cc : [0x100] },
|
0x1004 : { 0x61cc : [0x100] },
|
||||||
|
|
||||||
|
# Archos
|
||||||
|
0x0e79 : { 0x1420 : [0x0216]},
|
||||||
|
|
||||||
}
|
}
|
||||||
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
|
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
|
||||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||||
@ -61,18 +64,19 @@ class ANDROID(USBMS):
|
|||||||
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
|
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
|
||||||
|
|
||||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE']
|
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS']
|
||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||||
'SGH-T849', '_MB300']
|
'SGH-T849', '_MB300', 'A70S']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD']
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
|
'A70S']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'HTC Android Phone Media'
|
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||||
|
|
||||||
MAIN_MEMORY_VOLUME_LABEL = 'Android Phone Internal Memory'
|
MAIN_MEMORY_VOLUME_LABEL = 'Android Device Main Memory'
|
||||||
|
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
@ -76,12 +76,23 @@ class PRS505(USBMS):
|
|||||||
'sending DRMed books in which you cannot change the cover.'
|
'sending DRMed books in which you cannot change the cover.'
|
||||||
' WARNING: This option should only be used with newer '
|
' WARNING: This option should only be used with newer '
|
||||||
'SONY readers: 350, 650, 950 and newer.'),
|
'SONY readers: 350, 650, 950 and newer.'),
|
||||||
|
_('Refresh separate covers when using automatic management (newer readers)') +
|
||||||
|
':::' +
|
||||||
|
_('Set this option to have separate book covers uploaded '
|
||||||
|
'every time you connect your device. Unset this option if '
|
||||||
|
'you have so many books on the reader that performance is '
|
||||||
|
'unacceptable.')
|
||||||
]
|
]
|
||||||
EXTRA_CUSTOMIZATION_DEFAULT = [
|
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||||
', '.join(['series', 'tags']),
|
', '.join(['series', 'tags']),
|
||||||
|
False,
|
||||||
False
|
False
|
||||||
]
|
]
|
||||||
|
|
||||||
|
OPT_COLLECTIONS = 0
|
||||||
|
OPT_UPLOAD_COVERS = 1
|
||||||
|
OPT_REFRESH_COVERS = 2
|
||||||
|
|
||||||
plugboard = None
|
plugboard = None
|
||||||
plugboard_func = None
|
plugboard_func = None
|
||||||
|
|
||||||
@ -171,7 +182,7 @@ class PRS505(USBMS):
|
|||||||
opts = self.settings()
|
opts = self.settings()
|
||||||
if opts.extra_customization:
|
if opts.extra_customization:
|
||||||
collections = [x.strip() for x in
|
collections = [x.strip() for x in
|
||||||
opts.extra_customization[0].split(',')]
|
opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
|
||||||
else:
|
else:
|
||||||
collections = []
|
collections = []
|
||||||
debug_print('PRS505: collection fields:', collections)
|
debug_print('PRS505: collection fields:', collections)
|
||||||
@ -183,6 +194,20 @@ class PRS505(USBMS):
|
|||||||
c.update(blists, collections, pb)
|
c.update(blists, collections, pb)
|
||||||
c.write()
|
c.write()
|
||||||
|
|
||||||
|
if opts.extra_customization[self.OPT_REFRESH_COVERS]:
|
||||||
|
debug_print('PRS505: uploading covers in sync_booklists')
|
||||||
|
for idx,bl in blists.items():
|
||||||
|
prefix = self._card_a_prefix if idx == 1 else \
|
||||||
|
self._card_b_prefix if idx == 2 \
|
||||||
|
else self._main_prefix
|
||||||
|
for book in bl:
|
||||||
|
p = os.path.join(prefix, book.lpath)
|
||||||
|
self._upload_cover(os.path.dirname(p),
|
||||||
|
os.path.splitext(os.path.basename(p))[0],
|
||||||
|
book, p)
|
||||||
|
else:
|
||||||
|
debug_print('PRS505: NOT uploading covers in sync_booklists')
|
||||||
|
|
||||||
USBMS.sync_booklists(self, booklists, end_session=end_session)
|
USBMS.sync_booklists(self, booklists, end_session=end_session)
|
||||||
debug_print('PRS505: finished sync_booklists')
|
debug_print('PRS505: finished sync_booklists')
|
||||||
|
|
||||||
@ -199,11 +224,14 @@ class PRS505(USBMS):
|
|||||||
|
|
||||||
def upload_cover(self, path, filename, metadata, filepath):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
opts = self.settings()
|
opts = self.settings()
|
||||||
if not opts.extra_customization[1]:
|
if not opts.extra_customization[self.OPT_UPLOAD_COVERS]:
|
||||||
# Building thumbnails disabled
|
# Building thumbnails disabled
|
||||||
debug_print('PRS505: not uploading covers')
|
debug_print('PRS505: not uploading cover')
|
||||||
return
|
return
|
||||||
debug_print('PRS505: uploading covers')
|
debug_print('PRS505: uploading cover')
|
||||||
|
self._upload_cover(path, filename, metadata, filepath)
|
||||||
|
|
||||||
|
def _upload_cover(self, path, filename, metadata, filepath):
|
||||||
if metadata.thumbnail and metadata.thumbnail[-1]:
|
if metadata.thumbnail and metadata.thumbnail[-1]:
|
||||||
path = path.replace('/', os.sep)
|
path = path.replace('/', os.sep)
|
||||||
is_main = path.startswith(self._main_prefix)
|
is_main = path.startswith(self._main_prefix)
|
||||||
|
@ -191,15 +191,15 @@ class PreProcessor(object):
|
|||||||
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
||||||
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
|
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
|
||||||
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
||||||
|
|
||||||
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
||||||
if format == 'txt':
|
if format == 'txt':
|
||||||
unwrap_regex = lookahead+txt_line_wrap
|
unwrap_regex = lookahead+txt_line_wrap
|
||||||
|
|
||||||
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
|
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
|
||||||
content = unwrap.sub(' ', content)
|
content = unwrap.sub(' ', content)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, html):
|
def __call__(self, html):
|
||||||
self.log("********* Preprocessing HTML *********")
|
self.log("********* Preprocessing HTML *********")
|
||||||
|
@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||||
if self.opts.preprocess_html:
|
if self.opts.preprocess_html:
|
||||||
preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
|
preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
|
||||||
res = preprocessor(res)
|
res = preprocessor(res.decode('utf-8')).encode('utf-8')
|
||||||
f.write(res)
|
f.write(res)
|
||||||
self.write_inline_css(inline_class, border_styles)
|
self.write_inline_css(inline_class, border_styles)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
@ -53,7 +53,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
||||||
|
|
||||||
txt = stream.read()
|
txt = stream.read()
|
||||||
# Get the encoding of the document.
|
# Get the encoding of the document.
|
||||||
if options.input_encoding:
|
if options.input_encoding:
|
||||||
@ -80,7 +80,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
# Get length for hyphen removal and punctuation unwrap
|
# Get length for hyphen removal and punctuation unwrap
|
||||||
docanalysis = DocAnalysis('txt', txt)
|
docanalysis = DocAnalysis('txt', txt)
|
||||||
length = docanalysis.line_length(.5)
|
length = docanalysis.line_length(.5)
|
||||||
|
|
||||||
if options.formatting_type == 'auto':
|
if options.formatting_type == 'auto':
|
||||||
options.formatting_type = detect_formatting_type(txt)
|
options.formatting_type = detect_formatting_type(txt)
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
||||||
|
|
||||||
flow_size = getattr(options, 'flow_size', 0)
|
flow_size = getattr(options, 'flow_size', 0)
|
||||||
|
|
||||||
if options.formatting_type == 'heuristic':
|
if options.formatting_type == 'heuristic':
|
||||||
html = convert_heuristic(txt, epub_split_size_kb=flow_size)
|
html = convert_heuristic(txt, epub_split_size_kb=flow_size)
|
||||||
else:
|
else:
|
||||||
|
@ -98,9 +98,10 @@ class AumSortedConcatenate(object):
|
|||||||
|
|
||||||
def finalize(self):
|
def finalize(self):
|
||||||
keys = self.ans.keys()
|
keys = self.ans.keys()
|
||||||
if len(keys) == 0:
|
l = len(keys)
|
||||||
return None
|
if l == 0:
|
||||||
if len(keys) == 1:
|
return 'Unknown:::Unknown'
|
||||||
|
if l == 1:
|
||||||
return self.ans[keys[0]]
|
return self.ans[keys[0]]
|
||||||
return ':#:'.join([self.ans[v] for v in sorted(keys)])
|
return ':#:'.join([self.ans[v] for v in sorted(keys)])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user