diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe index 1058b90401..b57bce5b97 100644 --- a/resources/recipes/cnetjapan.recipe +++ b/resources/recipes/cnetjapan.recipe @@ -11,7 +11,7 @@ class CNetJapan(BasicNewsRecipe): (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf') ] language = 'ja' - encoding = 'Shift_JIS' + encoding = 'utf-8' remove_javascript = True preprocess_regexps = [ diff --git a/resources/recipes/tyzden.recipe b/resources/recipes/tyzden.recipe new file mode 100644 index 0000000000..c206244ff6 --- /dev/null +++ b/resources/recipes/tyzden.recipe @@ -0,0 +1,80 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2011, Miroslav Vasko zemiak@gmail.com' + +''' +.tyzden, a weekly news magazine (a week old issue) +''' +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import date +import re + +class TyzdenRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'zemiak' + language = 'sk' + version = 1 + + publisher = u'www.tyzden.sk' + category = u'Magazine' + description = u'A conservative weekly magazine. The latest free issue' + + today = date.today() + iso = today.isocalendar() + year = iso[0] + weeknum = iso[1] + + if (weeknum > 1): + weeknum -= 1 + + title = u'.tyzden ' + str(weeknum) + '/' + str(year) + + base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum) + base_url = base_url_path + '.html' + + oldest_article = 20 + max_articles_per_feed = 100 + remove_javascript = True + + use_embedded_content = False + no_stylesheets = True + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'h1')) + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_area top_nofoto'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_block'})) + + remove_tags_after = [dict(name = 'div', attrs = {'class': 'text_block'})] + + def find_sections(self): + soup = self.index_to_soup(self.base_url) + # find cover pic + imgdiv = soup.find('div', attrs = {'class': 'foto'}) + if imgdiv is not None: + img = imgdiv.find('img') + if img is not None: + self.cover_url = 'http://www.tyzden.sk/' + img['src'] + # end find cover pic + + for s in soup.findAll('a', attrs={'href': re.compile(r'rubrika/.*')}): + yield (self.tag_to_string(s), s) + + def find_articles(self, soup): + for art in soup.findAllNext('a'): + if (not art['href'].startswith('casopis/')): + break; + + url = art['href'] + title = self.tag_to_string(art) + yield { + 'title': title, 'url':self.base_url_path + '/' + url, 'description':title, + 'date' : strftime('%a, %d %b'), + } + + def parse_index(self): + feeds = [] + for title, soup in self.find_sections(): + feeds.append((title, list(self.find_articles(soup)))) + + return feeds diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 73c930778e..5a82882dfa 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -53,6 +53,9 @@ class ANDROID(USBMS): # LG 0x1004 : { 0x61cc : [0x100] }, + # Archos + 0x0e79 : { 0x1420 : [0x0216]}, + } EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books'] EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' @@ -61,18 +64,19 @@ class ANDROID(USBMS): EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN) VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', - 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE'] + 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', - 'SGH-T849', '_MB300'] + 'SGH-T849', '_MB300', 'A70S'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', - 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD'] + 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', + 'A70S'] - OSX_MAIN_MEM = 'HTC Android Phone Media' + OSX_MAIN_MEM = 'Android Device Main Memory' - MAIN_MEMORY_VOLUME_LABEL = 'Android Phone Internal Memory' + MAIN_MEMORY_VOLUME_LABEL = 'Android Device Main Memory' SUPPORTS_SUB_DIRS = True diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index 98a7241a36..874fbe4b10 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -76,12 +76,23 @@ class PRS505(USBMS): 'sending DRMed books in which you cannot change the cover.' ' WARNING: This option should only be used with newer ' 'SONY readers: 350, 650, 950 and newer.'), + _('Refresh separate covers when using automatic management (newer readers)') + + ':::' + + _('Set this option to have separate book covers uploaded ' + 'every time you connect your device. Unset this option if ' + 'you have so many books on the reader that performance is ' + 'unacceptable.') ] EXTRA_CUSTOMIZATION_DEFAULT = [ ', '.join(['series', 'tags']), + False, False ] + OPT_COLLECTIONS = 0 + OPT_UPLOAD_COVERS = 1 + OPT_REFRESH_COVERS = 2 + plugboard = None plugboard_func = None @@ -171,7 +182,7 @@ class PRS505(USBMS): opts = self.settings() if opts.extra_customization: collections = [x.strip() for x in - opts.extra_customization[0].split(',')] + opts.extra_customization[self.OPT_COLLECTIONS].split(',')] else: collections = [] debug_print('PRS505: collection fields:', collections) @@ -183,6 +194,20 @@ class PRS505(USBMS): c.update(blists, collections, pb) c.write() + if opts.extra_customization[self.OPT_REFRESH_COVERS]: + debug_print('PRS505: uploading covers in sync_booklists') + for idx,bl in blists.items(): + prefix = self._card_a_prefix if idx == 1 else \ + self._card_b_prefix if idx == 2 \ + else self._main_prefix + for book in bl: + p = os.path.join(prefix, book.lpath) + self._upload_cover(os.path.dirname(p), + os.path.splitext(os.path.basename(p))[0], + book, p) + else: + debug_print('PRS505: NOT uploading covers in sync_booklists') + USBMS.sync_booklists(self, booklists, end_session=end_session) debug_print('PRS505: finished sync_booklists') @@ -199,11 +224,14 @@ class PRS505(USBMS): def upload_cover(self, path, filename, metadata, filepath): opts = self.settings() - if not opts.extra_customization[1]: + if not opts.extra_customization[self.OPT_UPLOAD_COVERS]: # Building thumbnails disabled - debug_print('PRS505: not uploading covers') + debug_print('PRS505: not uploading cover') return - debug_print('PRS505: uploading covers') + debug_print('PRS505: uploading cover') + self._upload_cover(path, filename, metadata, filepath) + + def _upload_cover(self, path, filename, metadata, filepath): if metadata.thumbnail and metadata.thumbnail[-1]: path = path.replace('/', os.sep) is_main = path.startswith(self._main_prefix) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 52d1bcc619..dac93fa2e2 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -191,15 +191,15 @@ class PreProcessor(object): blanklines = "\s*(?P<(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*\s*)\s*){0,3}\s*" line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}" - + unwrap_regex = lookahead+line_ending+blanklines+line_opening if format == 'txt': unwrap_regex = lookahead+txt_line_wrap - + unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE) content = unwrap.sub(' ', content) return content - + def __call__(self, html): self.log("********* Preprocessing HTML *********") diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 8c7561f68c..5154373eda 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin): u'

\u00a0

\n'.encode('utf-8'), res) if self.opts.preprocess_html: preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None)) - res = preprocessor(res) + res = preprocessor(res.decode('utf-8')).encode('utf-8') f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 3957391494..aaff8b55c0 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -53,7 +53,7 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): log.debug('Reading text from file...') - + txt = stream.read() # Get the encoding of the document. if options.input_encoding: @@ -80,7 +80,7 @@ class TXTInput(InputFormatPlugin): # Get length for hyphen removal and punctuation unwrap docanalysis = DocAnalysis('txt', txt) length = docanalysis.line_length(.5) - + if options.formatting_type == 'auto': options.formatting_type = detect_formatting_type(txt) @@ -122,7 +122,7 @@ class TXTInput(InputFormatPlugin): txt = preprocessor.punctuation_unwrap(length, txt, 'txt') flow_size = getattr(options, 'flow_size', 0) - + if options.formatting_type == 'heuristic': html = convert_heuristic(txt, epub_split_size_kb=flow_size) else: diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 75856dd0f6..83f19b8711 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -98,9 +98,10 @@ class AumSortedConcatenate(object): def finalize(self): keys = self.ans.keys() - if len(keys) == 0: - return None - if len(keys) == 1: + l = len(keys) + if l == 0: + return 'Unknown:::Unknown' + if l == 1: return self.ans[keys[0]] return ':#:'.join([self.ans[v] for v in sorted(keys)])