diff --git a/resources/recipes/lifehacker.recipe b/resources/recipes/lifehacker.recipe index 7fede310b1..42e32497be 100644 --- a/resources/recipes/lifehacker.recipe +++ b/resources/recipes/lifehacker.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Lifehacker(BasicNewsRecipe): title = 'Lifehacker' - __author__ = 'NA' + __author__ = 'Kovid Goyal' description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live." publisher = 'lifehacker.com' category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy' @@ -32,14 +32,20 @@ class Lifehacker(BasicNewsRecipe): , 'language' : language } - remove_attributes = ['width','height'] - keep_only_tags = [dict(attrs={'class':'content permalink'})] + remove_attributes = ['width', 'height', 'style'] remove_tags_before = dict(name='h1') - remove_tags = [dict(attrs={'class':'contactinfo'})] - remove_tags_after = dict(attrs={'class':'contactinfo'}) + keep_only_tags = [dict(id='container')] + remove_tags_after = dict(attrs={'class':'post-body'}) + remove_tags = [ + dict(id="sharemenu"), + {'class': 'related'}, + ] feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')] def preprocess_html(self, soup): return self.adeify_images(soup) + def print_version(self, url): + return url.replace('#!', '?_escaped_fragment_=') + diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index 7f73664660..4077065d91 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -668,7 +668,7 @@ class NYTimes(BasicNewsRecipe): try: #remove "Related content" bar - runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ']}) + runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ','articleInline runaroundLeft lastArticleInline']}) if runAroundsFound: for runAround in runAroundsFound: #find all section headers diff --git a/resources/recipes/workers_world.recipe b/resources/recipes/workers_world.recipe new file mode 100644 index 0000000000..1967b8e76c --- /dev/null +++ b/resources/recipes/workers_world.recipe @@ -0,0 +1,26 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe + +class WorkersWorld(BasicNewsRecipe): + + title = u'Workers World' + description = u'Socialist news and analysis' + __author__ = u'urslnx' + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + oldest_article = 7 + max_articles_per_feed = 100 + encoding = 'utf8' + publisher = 'workers.org' + category = 'news, politics, USA, world' + language = 'en' + publication_type = 'newsportal' + extra_css = ' body{ font-family: Verdana,Arial,Helvetica,sans-serif; } h1{ font-size: x-large; text-align: left; margin-top:0.5em; margin-bottom:0.25em; } h2{ font-size: large; } p{ text-align: left; } .published{ font-size: small; } .byline{ font-size: small; } .copyright{ font-size: small; } ' + remove_tags_before = dict(name='div', attrs={'id':'evernote'}) + remove_tags_after = dict(name='div', attrs={'id':'footer'}) + + masthead_url='http://www.workers.org/graphics/wwlogo300.gif' + cover_url = 'http://www.workers.org/pdf/current.jpg' + feeds = [(u'Headlines', u'http://www.workers.org/rss/nonstandard_rss.xml'), +] + diff --git a/src/calibre/customize/__init__.py b/src/calibre/customize/__init__.py index 13e1f20a2d..1f44eb4ae2 100644 --- a/src/calibre/customize/__init__.py +++ b/src/calibre/customize/__init__.py @@ -90,6 +90,11 @@ class Plugin(object): # {{{ an optional method validate() that takes no arguments and is called immediately after the user clicks OK. Changes are applied if and only if the method returns True. + + If for some reason you cannot perform the configuration at this time, + return a tuple of two strings (message, details), these will be + displayed as a warning dialog to the user and the process will be + aborted. ''' raise NotImplementedError() @@ -133,6 +138,12 @@ class Plugin(object): # {{{ except NotImplementedError: config_widget = None + if isinstance(config_widget, tuple): + from calibre.gui2 import warning_dialog + warning_dialog(parent, _('Cannot configure'), config_widget[0], + det_msg=config_widget[1], show=True) + return False + if config_widget is not None: v.addWidget(config_widget) v.addWidget(button_box) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 3ccc07040b..1dd575f45b 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -511,14 +511,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ - LibraryThingCovers, DoubanCovers + AmazonCovers, DoubanCovers from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, - Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers, + Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, NiceBooksCovers] plugins += [ ComicInput, diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index baefdfc41d..53c73b01a0 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -19,7 +19,7 @@ class ANDROID(USBMS): VENDOR_ID = { # HTC - 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], + 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222], 0x0c01 : [0x100, 0x0227, 0x0226], 0x0ff9 : [0x0100, 0x0227, 0x0226], 0x0c87 : [0x0100, 0x0227, 0x0226], diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 369c470e2b..cc4d39d3c5 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -39,6 +39,7 @@ if iswindows: class DriverBase(DeviceConfig, DevicePlugin): # Needed for config_widget to work FORMATS = ['epub', 'pdf'] + USER_CAN_ADD_NEW_FORMATS = False SUPPORTS_SUB_DIRS = True # To enable second checkbox in customize widget @classmethod diff --git a/src/calibre/devices/bambook/driver.py b/src/calibre/devices/bambook/driver.py index e7fa66c939..3cc0245cf7 100644 --- a/src/calibre/devices/bambook/driver.py +++ b/src/calibre/devices/bambook/driver.py @@ -32,6 +32,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin): ip = None FORMATS = [ "snb" ] + USER_CAN_ADD_NEW_FORMATS = False VENDOR_ID = 0x230b PRODUCT_ID = 0x0001 BCD = None @@ -421,7 +422,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin): from calibre.gui2.device_drivers.configwidget import ConfigWidget cw = ConfigWidget(cls.settings(), cls.FORMATS, cls.SUPPORTS_SUB_DIRS, cls.MUST_READ_METADATA, cls.SUPPORTS_USE_AUTHOR_SORT, - cls.EXTRA_CUSTOMIZATION_MESSAGE) + cls.EXTRA_CUSTOMIZATION_MESSAGE, cls) # Turn off the Save template cw.opt_save_template.setVisible(False) cw.label.setVisible(False) diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py index f108de3347..0d328ba637 100644 --- a/src/calibre/devices/jetbook/driver.py +++ b/src/calibre/devices/jetbook/driver.py @@ -93,11 +93,11 @@ class MIBUK(USBMS): VENDOR_ID = [0x0525] PRODUCT_ID = [0xa4a5] - BCD = [0x314] + BCD = [0x314, 0x319] SUPPORTS_SUB_DIRS = True - VENDOR_NAME = 'LINUX' - WINDOWS_MAIN_MEM = 'WOLDERMIBUK' + VENDOR_NAME = ['LINUX', 'FILE_BAC'] + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['WOLDERMIBUK', 'KED_STORAGE_GADG'] class JETBOOK_MINI(USBMS): diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index d8dc9709d9..c98fe7a7fa 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -11,44 +11,42 @@ Generates and writes an APNX page mapping file. import struct import uuid +from calibre.ebooks.mobi.reader import MobiReader from calibre.ebooks.pdb.header import PdbHeaderReader +from calibre.utils.logging import default_log class APNXBuilder(object): ''' - 2300 characters of uncompressed text per page. This is - not meant to map 1 to 1 to a print book but to be a - close enough measure. - - A test book was chosen and the characters were counted - on one page. This number was round to 2240 then 60 - characters of markup were added to the total giving - 2300. - - Uncompressed text length is used because it's easily - accessible in MOBI files (part of the header). Also, - It's faster to work off of the length then to - decompress and parse the actual text. - - A better but much more resource intensive and slower - method to calculate the page length would be to parse - the uncompressed text. For each paragraph we would - want to find how many lines it would occupy in a paper - back book. 70 characters per line and 32 lines per page. - So divide the number of characters (minus markup) in - each paragraph by 70. If there are less than 70 - characters in the paragraph then it is 1 line. Then, - count every 32 lines and mark that location as a page. + Create an APNX file using a pseudo page mapping. ''' - def write_apnx(self, mobi_file_path, apnx_path): + def write_apnx(self, mobi_file_path, apnx_path, accurate=True): + # Check that this is really a MOBI file. with open(mobi_file_path, 'rb') as mf: - phead = PdbHeaderReader(mf) - r0 = phead.section_data(0) - text_length = struct.unpack('>I', r0[4:8])[0] + ident = PdbHeaderReader(mf).identity() + if ident != 'BOOKMOBI': + raise Exception(_('Not a valid MOBI file. Reports identity of %s' % ident)) - pages = self.get_pages(text_length) + # Get the pages depending on the chosen parser + pages = [] + if accurate: + try: + pages = self.get_pages_accurate(mobi_file_path) + except: + # Fall back to the fast parser if we can't + # use the accurate one. Typically this is + # due to the file having DRM. + pages = self.get_pages_fast(mobi_file_path) + else: + pages = self.get_pages_fast(mobi_file_path) + + if not pages: + raise Exception(_('Could not generate page mapping.')) + + # Generate the APNX file from the page mapping. apnx = self.generate_apnx(pages) + # Write the APNX. with open(apnx_path, 'wb') as apnxf: apnxf.write(apnx) @@ -73,18 +71,126 @@ class APNXBuilder(object): apnx += struct.pack('>H', 32) apnx += page_header - # write page values to apnx + # Write page values to APNX. for page in pages: - apnx += struct.pack('>L', page) + apnx += struct.pack('>I', page) return apnx - def get_pages(self, text_length): + def get_pages_fast(self, mobi_file_path): + ''' + 2300 characters of uncompressed text per page. This is + not meant to map 1 to 1 to a print book but to be a + close enough measure. + + A test book was chosen and the characters were counted + on one page. This number was round to 2240 then 60 + characters of markup were added to the total giving + 2300. + + Uncompressed text length is used because it's easily + accessible in MOBI files (part of the header). Also, + It's faster to work off of the length then to + decompress and parse the actual text. + ''' + text_length = 0 pages = [] count = 0 + with open(mobi_file_path, 'rb') as mf: + phead = PdbHeaderReader(mf) + r0 = phead.section_data(0) + text_length = struct.unpack('>I', r0[4:8])[0] + while count < text_length: pages.append(count) count += 2300 return pages + + def get_pages_accurate(self, mobi_file_path): + ''' + A more accurate but much more resource intensive and slower + method to calculate the page length. + + Parses the uncompressed text. In an average paper back book + There are 32 lines per page and a maximum of 70 characters + per line. + + Each paragraph starts a new line and every 70 characters + (minus markup) in a paragraph starts a new line. The + position after every 30 lines will be marked as a new + page. + + This can be make more accurate by accounting for +
as a new page marker. + And'+ans%('', '')
-
# }}}
def check_for_cover(mi, timeout=5.): # {{{
diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 17a14d9e12..189739986d 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -367,6 +367,9 @@ class MobiMLizer(object):
istate.attrib['src'] = elem.attrib['src']
istate.attrib['align'] = 'baseline'
cssdict = style.cssdict()
+ valign = cssdict.get('vertical-align', None)
+ if valign in ('top', 'bottom', 'middle'):
+ istate.attrib['align'] = valign
for prop in ('width', 'height'):
if cssdict[prop] != 'auto':
value = style[prop]
@@ -451,8 +454,11 @@ class MobiMLizer(object):
text = COLLAPSE.sub(' ', elem.text)
valign = style['vertical-align']
not_baseline = valign in ('super', 'sub', 'text-top',
- 'text-bottom')
- vtag = 'sup' if valign in ('super', 'text-top') else 'sub'
+ 'text-bottom') or (
+ isinstance(valign, (float, int)) and abs(valign) != 0)
+ issup = valign in ('super', 'text-top') or (
+ isinstance(valign, (float, int)) and valign > 0)
+ vtag = 'sup' if issup else 'sub'
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index 653aa4533b..db6bdf0a7a 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -207,7 +207,14 @@ class CSSFlattener(object):
font_size = self.sbase if self.sbase is not None else \
self.context.source.fbase
if 'align' in node.attrib:
- cssdict['text-align'] = node.attrib['align']
+ if tag != 'img':
+ cssdict['text-align'] = node.attrib['align']
+ else:
+ val = node.attrib['align']
+ if val in ('middle', 'bottom', 'top'):
+ cssdict['vertical-align'] = val
+ elif val in ('left', 'right'):
+ cssdict['text-align'] = val
del node.attrib['align']
if node.tag == XHTML('font'):
node.tag = XHTML('span')
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 8ab1524b02..1c49eb9b35 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -4,10 +4,9 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember