Merge upstream changes.

This commit is contained in:
Marshall T. Vandegrift 2009-02-11 10:01:52 -05:00
commit f24f4f7b1a
108 changed files with 22472 additions and 16079 deletions

View File

@ -81,7 +81,8 @@ def freeze():
'PyQt4.QtScript.so', 'PyQt4.QtSql.so', 'PyQt4.QtTest.so', 'qt',
'glib', 'gobject']
packages = ['calibre', 'encodings', 'cherrypy', 'cssutils', 'xdg']
packages = ['calibre', 'encodings', 'cherrypy', 'cssutils', 'xdg',
'dateutil']
includes += ['calibre.web.feeds.recipes.'+r for r in recipe_modules]

View File

@ -342,6 +342,7 @@ def main():
'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
'keyword', 'codeop', 'pydoc', 'readline',
'BeautifulSoup', 'calibre.ebooks.lrf.fonts.prs500.*',
'dateutil',
],
'packages' : ['PIL', 'Authorization', 'lxml'],
'excludes' : ['IPython'],

View File

@ -299,7 +299,6 @@ File ::2BCD9281-2CBC-CF0D-0E12-2CE11F6ED758 -name comic2epub.exe.local -parent 8
File ::EDE6F457-C83F-C5FA-9AF4-38FDFF17D929 -name PIL._imagingtk.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::09D0906E-3611-3DB7-32CF-A140585694A7 -name win32pdh.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::4C84F0DC-7157-0C90-2062-180139B03E25 -name IM_MOD_RL_rgb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::F402F507-87C5-BDB1-80AE-AD3FF4A4BCE7 -name bzrlib._patiencediff_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::A732EDE7-4796-241F-BECA-68E59F88F8AF -name lrs2lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::69072379-7D16-B9F7-9F39-3E6403C48267 -name IM_MOD_RL_xbm_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::FBD11D98-D1E7-5DD9-BF02-01CE92518859 -name IM_MOD_RL_otb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
@ -365,7 +364,6 @@ File ::26741B21-C241-E100-8BB1-8B679BC3E662 -name configure.xml -parent 8E5D85A4
File ::7D491E89-C6D3-1E6E-F4BD-8E55260FE33E -name libexpat.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::A4910EB3-0F1C-F6F0-CD2D-16A64BBAA92B -name calibre-fontconfig.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::8711327A-716D-B162-6AC6-2FB4AD071266 -name fb22lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::0FDD3A7A-31F3-8089-CE32-D80EAA6F62B2 -name bzrlib._btree_serializer_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::476CB977-5155-D56F-26CA-EB243AEBBA99 -name unrar.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::2DA1CC8D-AF5C-3B03-2060-301DFE0356CC -name mobi2oeb.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::2E2A9EDA-5386-444E-8479-557386794552 -name IM_MOD_RL_uil_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
@ -487,7 +485,6 @@ File ::AA761ACD-B728-2324-AA75-B20A2A79F125 -name lrf2lrs.exe -parent 8E5D85A4-7
File ::95434C76-22F5-B9CE-6194-6E1B1EE3232D -name IM_MOD_RL_info_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::AAF45D03-322F-5553-63A7-312DB754A20B -name _ctypes.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::C3D351CA-A8D8-AB35-55D9-5AACF8DB37D1 -name python26.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::2F90B52F-A728-2CA4-5688-0283674695B7 -name _elementtree.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::B50B66A1-FB65-FAD5-1DD7-E894ACC07464 -name QtSvg4.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::906FF13D-D993-7192-7EA5-6D15A5A24BFB -name CORE_RL_png_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::5D368661-6BF0-D6AF-7C1A-87646864EB4B -name delegates.xml -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
@ -552,7 +549,7 @@ SetupType ::D9ADE41C-B744-690C-2CED-CF826BF03D2E -setup Install -active Yes -pla
InstallComponent 3EA07B17-04D8-6508-B535-96CC7173B49A -setup Install -type pane -conditions D7F585DB-0DEC-A94E-DAB0-94D558D82764 -title {Welcome Screen} -component Welcome -command insert -active Yes -parent StandardInstall
Condition D7F585DB-0DEC-A94E-DAB0-94D558D82764 -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id D7F585DB-0DEC-A94E-DAB0-94D558D82764
InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command reorder -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
Condition ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -active Yes -parent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E
InstallComponent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -setup Install -type action -conditions 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -title {Set Virtual Text} -component SetVirtualText -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
Condition 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -active Yes -parent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -title {String Is Condition} -component StringIsCondition -TreeObject::id 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB

View File

@ -12,7 +12,7 @@ LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
PDFTOHTML = 'C:\\pdftohtml\\pdftohtml.exe'
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
FONTCONFIG_DIR = 'C:\\fontconfig'
VC90 = r'C:\Program Files\Microsoft Visual Studio 9.0\VC\redist\x86\Microsoft.VC90.CRT'
VC90 = r'C:\VC90.CRT'
import sys, os, py2exe, shutil, zipfile, glob, subprocess, re
from distutils.core import setup
@ -179,7 +179,8 @@ def main(args=sys.argv):
'calibre.ebooks.lrf.fonts.prs500.*',
'PyQt4.QtWebKit', 'PyQt4.QtNetwork',
],
'packages' : ['PIL', 'lxml', 'cherrypy'],
'packages' : ['PIL', 'lxml', 'cherrypy',
'dateutil'],
'excludes' : ["Tkconstants", "Tkinter", "tcl",
"_imagingtk", "ImageTk", "FixTk"
],

View File

@ -21,6 +21,8 @@ import mechanize
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.133'
__version__ = '0.4.136'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -153,6 +153,7 @@ def set_file_type_metadata(stream, mi, ftype):
plugin.set_metadata(stream, mi, ftype.lower().strip())
break
except:
print 'Failed to set metadata for', repr(getattr(mi, 'title', ''))
traceback.print_exc()

View File

@ -17,8 +17,8 @@ class CYBOOKG3(USBMS):
# Be sure these have an entry in calibre.devices.mime
FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
VENDOR_ID = 0x0bda
PRODUCT_ID = 0x0703
VENDOR_ID = [0x0bda, 0x3034]
PRODUCT_ID = [0x0703, 0x1795]
BCD = [0x110, 0x132]
VENDOR_NAME = 'BOOKEEN'

View File

@ -12,8 +12,8 @@ class KINDLE(USBMS):
# Ordered list of supported formats
FORMATS = ['azw', 'mobi', 'prc', 'txt']
VENDOR_ID = 0x1949
PRODUCT_ID = 0x0001
VENDOR_ID = [0x1949]
PRODUCT_ID = [0x0001]
BCD = [0x399]
VENDOR_NAME = 'KINDLE'

View File

@ -248,15 +248,20 @@ class PRS505(Device):
time.sleep(3)
self.open_osx()
if self._card_prefix is not None:
cachep = os.path.join(self._card_prefix, self.CACHE_XML)
if not os.path.exists(cachep):
os.makedirs(os.path.dirname(cachep), mode=0777)
f = open(cachep, 'wb')
f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
try:
cachep = os.path.join(self._card_prefix, self.CACHE_XML)
if not os.path.exists(cachep):
os.makedirs(os.path.dirname(cachep), mode=0777)
f = open(cachep, 'wb')
f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
<cache xmlns="http://www.kinoma.com/FskCache/1">
</cache>
'''.encode('utf8'))
f.close()
f.close()
except:
self._card_prefix = None
import traceback
traceback.print_exc()
def set_progress_reporter(self, pr):
self.report_progress = pr

View File

@ -58,17 +58,20 @@ class DeviceScanner(object):
return False
def is_device_connected(self, device):
vendor_ids = device.VENDOR_ID if hasattr(device.VENDOR_ID, '__len__') else [device.VENDOR_ID]
product_ids = device.PRODUCT_ID if hasattr(device.PRODUCT_ID, '__len__') else [device.PRODUCT_ID]
if iswindows:
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
for device_id in self.devices:
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
if device.can_handle(device_id):
return True
for vendor_id, product_id in zip(vendor_ids, product_ids):
vid, pid = 'vid_%4.4x'%vendor_id, 'pid_%4.4x'%product_id
vidd, pidd = 'vid_%i'%vendor_id, 'pid_%i'%product_id
for device_id in self.devices:
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
if device.can_handle(device_id):
return True
else:
for vendor, product, bcdDevice in self.devices:
if device.VENDOR_ID == vendor and device.PRODUCT_ID == product:
if vendor in vendor_ids and product in product_ids:
if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)):
if device.can_handle((vendor, product, bcdDevice)):
return True

View File

@ -74,24 +74,27 @@ class Device(_Device):
def get_fdi(cls):
fdi = ''
fdi_base_values = dict(
app=__appname__,
deviceclass=cls.__name__,
vendor_id=hex(cls.VENDOR_ID),
product_id=hex(cls.PRODUCT_ID),
main_memory=cls.MAIN_MEMORY_VOLUME_LABEL,
storage_card=cls.STORAGE_CARD_VOLUME_LABEL,
)
if cls.BCD is None:
fdi_base_values['BCD_start'] = ''
fdi_base_values['BCD_end'] = ''
fdi = cls.FDI_TEMPLATE % fdi_base_values
else:
for bcd in cls.BCD:
fdi_bcd_values = fdi_base_values
fdi_bcd_values['BCD_start'] = cls.FDI_BCD_TEMPLATE % dict(bcd=hex(bcd))
fdi_bcd_values['BCD_end'] = '</match>'
fdi += cls.FDI_TEMPLATE % fdi_bcd_values
for vid in cls.VENDOR_ID:
for pid in cls.PRODUCT_ID:
fdi_base_values = dict(
app=__appname__,
deviceclass=cls.__name__,
vendor_id=hex(vid),
product_id=hex(pid),
main_memory=cls.MAIN_MEMORY_VOLUME_LABEL,
storage_card=cls.STORAGE_CARD_VOLUME_LABEL,
)
if cls.BCD is None:
fdi_base_values['BCD_start'] = ''
fdi_base_values['BCD_end'] = ''
fdi += cls.FDI_TEMPLATE % fdi_base_values
else:
for bcd in cls.BCD:
fdi_bcd_values = fdi_base_values
fdi_bcd_values['BCD_start'] = cls.FDI_BCD_TEMPLATE % dict(bcd=hex(bcd))
fdi_bcd_values['BCD_end'] = '</match>'
fdi += cls.FDI_TEMPLATE % fdi_bcd_values
return fdi

View File

@ -74,7 +74,9 @@ def check_links(opf_path, pretty_print):
html_files = []
for item in opf.itermanifest():
if 'html' in item.get('media-type', '').lower():
f = item.get('href').split('/')[-1].decode('utf-8')
f = item.get('href').split('/')[-1]
if isinstance(f, str):
f = f.decode('utf-8')
html_files.append(os.path.abspath(content(f)))
for path in html_files:

View File

@ -330,7 +330,8 @@ class PreProcessor(object):
sanitize_head),
# Convert all entities, since lxml doesn't handle them well
(re.compile(r'&(\S+?);'), convert_entities),
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
]
# Fix pdftohtml markup
@ -467,7 +468,7 @@ class Parser(PreProcessor, LoggingInterface):
if self.htmlfile.is_binary:
raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
src = src.replace('\x00', '')
src = src.replace('\x00', '').replace('\r', ' ')
src = self.preprocess(src)
# lxml chokes on unicode input when it contains encoding declarations
for pat in ENCODING_PATS:

View File

@ -917,7 +917,8 @@ class HTMLConverter(object, LoggingInterface):
blockStyle=self.current_block.blockStyle)
def process_image(self, path, tag_css, width=None, height=None, dropcaps=False):
def process_image(self, path, tag_css, width=None, height=None,
dropcaps=False, rescale=False):
def detect_encoding(im):
fmt = im.format
if fmt == 'JPG':
@ -936,10 +937,6 @@ class HTMLConverter(object, LoggingInterface):
return
encoding = detect_encoding(im)
if width == None or height == None:
width, height = im.size
factor = 720./self.profile.dpi
def scale_image(width, height):
if width <= 0:
@ -955,8 +952,15 @@ class HTMLConverter(object, LoggingInterface):
return pt.name
except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images
self.log_warning(_('Unable to process image %s. Error: %s')%(path, err))
return None
if width == None or height == None:
width, height = im.size
elif rescale and (width < im.size[0] or height < im.size[1]):
path = scale_image(width, height)
if not path:
return
factor = 720./self.profile.dpi
pheight = int(self.current_page.pageStyle.attrs['textheight'])
pwidth = int(self.current_page.pageStyle.attrs['textwidth'])
@ -1518,7 +1522,8 @@ class HTMLConverter(object, LoggingInterface):
except:
pass
dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
self.process_image(path, tag_css, width, height,
dropcaps=dropcaps, rescale=True)
elif not urlparse(tag['src'])[0]:
self.log_warn('Could not find image: '+tag['src'])
else:

View File

@ -188,7 +188,8 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language', 'book_producer'):
'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
@ -213,7 +214,7 @@ class MetaInformation(object):
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer',
'book_producer', 'timestamp'
):
setattr(self, x, getattr(mi, x, None))
@ -231,7 +232,8 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer'):
'cover', 'language', 'guide', 'book_producer',
'timestamp'):
val = getattr(mi, attr, None)
if val is not None:
setattr(self, attr, val)
@ -254,7 +256,7 @@ class MetaInformation(object):
ans = []
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
fmt('Title', self.title)
if self.title_sort:
fmt('Title sort', self.title_sort)
@ -279,6 +281,8 @@ class MetaInformation(object):
fmt('Language', self.language)
if self.rating is not None:
fmt('Rating', self.rating)
if self.timestamp is not None:
fmt('Timestamp', self.timestamp.isoformat(' '))
return u'\n'.join(ans)
def to_html(self):
@ -290,14 +294,14 @@ class MetaInformation(object):
ans += [('ISBN', unicode(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
if self.series:
ans += [(_('Series'), unicode(self.series))+ ' #%s'%self.format_series_index()]
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
ans += [(_('Language'), unicode(self.language))]
if self.timestamp is not None:
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans)
def __str__(self):
return self.__unicode__().encode('utf-8')

View File

@ -31,21 +31,21 @@ from calibre import prints
def config():
c = StringConfig('')
c.add_opt('title', ['-t', '--title'],
c.add_opt('title', ['-t', '--title'],
help=_('Set the title.'))
c.add_opt('authors', ['-a', '--authors'],
help=_('Set the authors. Multiple authors should be separated '
'by the & character. Author names should be in the order '
'Firstname Lastname.'))
c.add_opt('title_sort', ['--title-sort'],
c.add_opt('title_sort', ['--title-sort'],
help=_('The version of the title to be used for sorting. '
'If unspecified, and the title is specified, it will '
'be auto-generated from the title.'))
c.add_opt('author_sort', ['--author-sort'],
c.add_opt('author_sort', ['--author-sort'],
help=_('String to be used when sorting by author. '
'If unspecified, and the author(s) are specified, it will '
'be auto-generated from the author(s).'))
c.add_opt('cover', ['--cover'],
c.add_opt('cover', ['--cover'],
help=_('Set the cover to the specified file.'))
c.add_opt('comments', ['-c', '--comments'],
help=_('Set the ebook description.'))
@ -195,4 +195,4 @@ def main(args=sys.argv):
return 0
if __name__ == '__main__':
sys.exit(main())
sys.exit(main())

View File

@ -37,9 +37,15 @@ def cover_from_isbn(isbn, timeout=5.):
if browser is None:
browser = _browser()
_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout)
socket.setdefaulttimeout(timeout)
src = None
try:
src = browser.open('http://www.librarything.com/isbn/'+isbn).read().decode('utf-8', 'replace')
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = LibraryThingError(_('LibraryThing.com timed out. Try again later.'))
raise err
else:
s = BeautifulSoup(src)
url = s.find('td', attrs={'class':'left'})
if url is None:

View File

@ -31,8 +31,14 @@ def metadata_from_formats(formats):
mi = MetaInformation(None, None)
formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],
METADATA_PRIORITIES[path_to_ext(y)]))
for path in formats:
ext = path_to_ext(path)
extensions = list(map(path_to_ext, formats))
if 'opf' in extensions:
opf = formats[extensions.index('opf')]
mi2 = opf_metadata(opf)
if mi2 is not None and mi2.title:
return mi2
for path, ext in zip(formats, extensions):
stream = open(path, 'rb')
try:
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))

View File

@ -22,8 +22,8 @@ class StreamSlicer(object):
def __init__(self, stream, start=0, stop=None):
self._stream = stream
self.start = start
if stop is None:
stream.seek(0, 2)
if stop is None:
stream.seek(0, 2)
stop = stream.tell()
self.stop = stop
self._len = stop - start
@ -73,7 +73,7 @@ class StreamSlicer(object):
raise TypeError("stream indices must be integers")
class MetadataUpdater(object):
class MetadataUpdater(object):
def __init__(self, stream):
self.stream = stream
data = self.data = StreamSlicer(stream)
@ -85,9 +85,9 @@ class MetadataUpdater(object):
image_base, = unpack('>I', record0[108:112])
flags, = unpack('>I', record0[128:132])
have_exth = self.have_exth = (flags & 0x40) != 0
self.cover_record = self.thumbnail_record = None
if not have_exth:
return
self.cover_record = self.thumbnail_record = None
exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start
exth = self.exth = StreamSlicer(stream, exth_off, record0.stop)
nitems, = unpack('>I', exth[8:12])
@ -142,6 +142,8 @@ class MetadataUpdater(object):
exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
exth = ''.join(exth)
title = (mi.title or _('Unknown')).encode(self.codec, 'replace')
if getattr(self, 'exth', None) is None:
raise MobiError('No existing EXTH record. Cannot update metadata.')
title_off = (self.exth.start - self.record0.start) + len(exth)
title_len = len(title)
trail = len(self.exth) - len(exth) - len(title)
@ -150,18 +152,22 @@ class MetadataUpdater(object):
self.exth[:] = ''.join([exth, title, '\0' * trail])
self.record0[84:92] = pack('>II', title_off, title_len)
self.record0[92:96] = iana2mobi(mi.language)
if mi.cover_data[1]:
data = mi.cover_data[1]
if self.cover_record is not None:
size = len(self.cover_record)
cover = rescale_image(data, size)
cover += '\0' * (size - len(cover))
self.cover_record[:] = cover
if self.thumbnail_record is not None:
size = len(self.thumbnail_record)
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
thumbnail += '\0' * (size - len(thumbnail))
self.thumbnail_record[:] = thumbnail
if mi.cover_data[1] or mi.cover:
try:
data = mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read()
except:
pass
else:
if self.cover_record is not None:
size = len(self.cover_record)
cover = rescale_image(data, size)
cover += '\0' * (size - len(cover))
self.cover_record[:] = cover
if self.thumbnail_record is not None:
size = len(self.thumbnail_record)
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
thumbnail += '\0' * (size - len(thumbnail))
self.thumbnail_record[:] = thumbnail
return
def set_metadata(stream, mi):

View File

@ -10,7 +10,7 @@
<dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:attrs="{'opf:file-as':mi.author_sort} if mi.author_sort and i == 0 else {}">${author}</dc:creator>
<dc:contributor opf:role="bkp" py:with="attrs={'opf:file-as':__appname__}" py:attrs="attrs">${'%s (%s)'%(__appname__, __version__)} [http://${__appname__}.kovidgoyal.net]</dc:contributor>
<dc:identifier opf:scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
<dc:date py:if="getattr(mi, 'timestamp', None) is not None">${mi.timestamp.isoformat()}</dc:date>
<dc:language>${mi.language if mi.language else 'UND'}</dc:language>
<dc:type py:if="getattr(mi, 'category', False)">${mi.category}</dc:type>
<dc:description py:if="mi.comments">${mi.comments}</dc:description>

View File

@ -12,6 +12,7 @@ from urllib import unquote
from urlparse import urlparse
from lxml import etree
from dateutil import parser
from calibre.ebooks.chardet import xml_to_unicode
from calibre import relpath
@ -436,6 +437,7 @@ class OPF(object):
series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=int, none_is=1)
rating = MetadataField('rating', is_dc=False, formatter=int)
timestamp = MetadataField('date', formatter=parser.parse)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
@ -618,7 +620,7 @@ class OPF(object):
def fset(self, val):
remove = list(self.authors_path(self.metadata))
for elem in remove:
self.metadata.remove(elem)
elem.getparent().remove(elem)
for author in val:
attrib = {'{%s}role'%self.NAMESPACES['opf']: 'aut'}
elem = self.create_metadata_element('creator', attrib=attrib)

View File

@ -306,13 +306,15 @@ IANA_MOBI = \
'zu': {None: (53, 0)}}
def iana2mobi(icode):
subtags = list(icode.split('-'))
langdict = IANA_MOBI[None]
while len(subtags) > 0:
lang = subtags.pop(0).lower()
if lang in IANA_MOBI:
langdict = IANA_MOBI[lang]
break
langdict, subtags = IANA_MOBI[None], []
if icode:
subtags = list(icode.split('-'))
while len(subtags) > 0:
lang = subtags.pop(0).lower()
if lang in IANA_MOBI:
langdict = IANA_MOBI[lang]
break
mcode = langdict[None]
while len(subtags) > 0:
subtag = subtags.pop(0)

View File

@ -308,8 +308,11 @@ class MobiReader(object):
if 'filepos-id' in attrib:
attrib['id'] = attrib.pop('filepos-id')
if 'filepos' in attrib:
filepos = int(attrib.pop('filepos'))
attrib['href'] = "#filepos%d" % filepos
filepos = attrib.pop('filepos')
try:
attrib['href'] = "#filepos%d" % int(filepos)
except:
attrib['href'] = filepos
if tag.tag == 'img':
recindex = None
for attr in self.IMAGE_ATTRS:

View File

@ -44,34 +44,34 @@ OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
'xsi': XSI_NS, 'calibre': CALIBRE_NS}
def XML(name):
def XML(name):
return '{%s}%s' % (XML_NS, name)
def XHTML(name):
def XHTML(name):
return '{%s}%s' % (XHTML_NS, name)
def OPF(name):
def OPF(name):
return '{%s}%s' % (OPF2_NS, name)
def DC(name):
def DC(name):
return '{%s}%s' % (DC11_NS, name)
def XSI(name):
def XSI(name):
return '{%s}%s' % (XSI_NS, name)
def DCTERMS(name):
def DCTERMS(name):
return '{%s}%s' % (DCTERMS_NS, name)
def NCX(name):
def NCX(name):
return '{%s}%s' % (NCX_NS, name)
def SVG(name):
def SVG(name):
return '{%s}%s' % (SVG_NS, name)
def XLINK(name):
def XLINK(name):
return '{%s}%s' % (XLINK_NS, name)
def CALIBRE(name):
def CALIBRE(name):
return '{%s}%s' % (CALIBRE_NS, name)
def LINK_SELECTORS():

View File

@ -17,6 +17,8 @@ import types
import re
import copy
from itertools import izip
from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError
import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties
@ -106,7 +108,7 @@ class CSSSelector(etree.XPath):
class Stylizer(object):
STYLESHEETS = {}
STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
self.oeb = oeb
@ -131,18 +133,19 @@ class Stylizer(object):
and elem.get('type', CSS_MIME) in OEB_STYLES:
href = urlnormalize(elem.attrib['href'])
path = item.abshref(href)
if path not in oeb.manifest.hrefs:
sitem = oeb.manifest.hrefs.get(path, None)
if sitem is None:
self.logger.warn(
'Stylesheet %r referenced by file %r not in manifest' %
(path, item.href))
continue
if path in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[path]
if sitem in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[sitem]
else:
data = self._fetch_css_file(path)[1]
stylesheet = parser.parseString(data, href=path)
stylesheet.namespaces['h'] = XHTML_NS
self.STYLESHEETS[path] = stylesheet
self.STYLESHEETS[sitem] = stylesheet
stylesheets.append(stylesheet)
rules = []
index = 0
@ -288,15 +291,19 @@ class Style(object):
def _update_cssdict(self, cssdict):
self._style.update(cssdict)
def _apply_style_attr(self):
attrib = self._element.attrib
if 'style' in attrib:
css = attrib['style'].split(';')
css = filter(None, map(lambda x: x.strip(), css))
if 'style' not in attrib:
return
css = attrib['style'].split(';')
css = filter(None, (x.strip() for x in css))
try:
style = CSSStyleDeclaration('; '.join(css))
self._style.update(self._stylizer.flatten_style(style))
except CSSSyntaxError:
return
self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self):
return (self._element.getparent() is not None)

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" The GUI """
import sys, os, re, StringIO, traceback
import sys, os, re, StringIO, traceback, time
from PyQt4.QtCore import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QSize, \
QByteArray, QLocale, QUrl, QTranslator, QCoreApplication, \
QModelIndex
@ -14,6 +14,9 @@ from calibre import __author__, islinux, iswindows, isosx
from calibre.startup import get_lang
from calibre.utils.config import Config, ConfigProxy, dynamic
import calibre.resources as resources
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
from calibre.ebooks.metadata import MetaInformation
NONE = QVariant() #: Null value to return from the data function of item models
@ -148,7 +151,41 @@ class Dispatcher(QObject):
def dispatch(self, args, kwargs):
self.func(*args, **kwargs)
class GetMetadata(QObject):
'''
Convenience class to ensure that metadata readers are used only in the
GUI thread. Must be instantiated in the GUI thread.
'''
def __init__(self):
QObject.__init__(self)
self.connect(self, SIGNAL('edispatch(PyQt_PyObject, PyQt_PyObject, PyQt_PyObject)'),
self._get_metadata, Qt.QueuedConnection)
self.connect(self, SIGNAL('idispatch(PyQt_PyObject, PyQt_PyObject, PyQt_PyObject)'),
self._from_formats, Qt.QueuedConnection)
def __call__(self, id, *args, **kwargs):
self.emit(SIGNAL('edispatch(PyQt_PyObject, PyQt_PyObject, PyQt_PyObject)'),
id, args, kwargs)
def from_formats(self, id, *args, **kwargs):
self.emit(SIGNAL('idispatch(PyQt_PyObject, PyQt_PyObject, PyQt_PyObject)'),
id, args, kwargs)
def _from_formats(self, id, args, kwargs):
try:
mi = metadata_from_formats(*args, **kwargs)
except:
mi = MetaInformation('', [_('Unknown')])
self.emit(SIGNAL('metadataf(PyQt_PyObject, PyQt_PyObject)'), id, mi)
def _get_metadata(self, id, args, kwargs):
try:
mi = get_metadata(*args, **kwargs)
except:
mi = MetaInformation('', [_('Unknown')])
self.emit(SIGNAL('metadata(PyQt_PyObject, PyQt_PyObject)'), id, mi)
class TableView(QTableView):
def __init__(self, parent):

224
src/calibre/gui2/add.py Normal file
View File

@ -0,0 +1,224 @@
'''
UI for adding books to the database
'''
import os
from PyQt4.Qt import QThread, SIGNAL, QMutex, QWaitCondition, Qt
from calibre.gui2.dialogs.progress import ProgressDialog
from calibre.constants import preferred_encoding
from calibre.gui2.widgets import WarningDialog
class Add(QThread):
def __init__(self):
QThread.__init__(self)
self._lock = QMutex()
self._waiting = QWaitCondition()
def is_canceled(self):
if self.pd.canceled:
self.canceled = True
return self.canceled
def wait_for_condition(self):
self._lock.lock()
self._waiting.wait(self._lock)
self._lock.unlock()
def wake_up(self):
self._waiting.wakeAll()
class AddFiles(Add):
def __init__(self, paths, default_thumbnail, get_metadata, db=None):
Add.__init__(self)
self.paths = paths
self.get_metadata = get_metadata
self.default_thumbnail = default_thumbnail
self.db = db
self.formats, self.metadata, self.names, self.infos = [], [], [], []
self.duplicates = []
self.number_of_books_added = 0
self.connect(self.get_metadata,
SIGNAL('metadata(PyQt_PyObject, PyQt_PyObject)'),
self.metadata_delivered)
def metadata_delivered(self, id, mi):
if self.is_canceled():
self.reading.wakeAll()
return
if not mi.title:
mi.title = os.path.splitext(self.names[id])[0]
mi.title = mi.title if isinstance(mi.title, unicode) else \
mi.title.decode(preferred_encoding, 'replace')
self.metadata.append(mi)
self.infos.append({'title':mi.title,
'authors':', '.join(mi.authors),
'cover':self.default_thumbnail, 'tags':[]})
if self.db is not None:
duplicates, num = self.db.add_books(self.paths[id:id+1],
self.formats[id:id+1], [mi],
add_duplicates=False)
self.number_of_books_added += num
if duplicates:
if not self.duplicates:
self.duplicates = [[], [], [], []]
for i in range(4):
self.duplicates[i] += duplicates[i]
self.emit(SIGNAL('processed(PyQt_PyObject,PyQt_PyObject)'),
mi.title, id)
self.wake_up()
def create_progress_dialog(self, title, msg, parent):
self._parent = parent
self.pd = ProgressDialog(title, msg, -1, len(self.paths)-1, parent)
self.connect(self, SIGNAL('processed(PyQt_PyObject,PyQt_PyObject)'),
self.update_progress_dialog)
self.pd.setModal(True)
self.pd.show()
self.connect(self, SIGNAL('finished()'), self.pd.hide)
return self.pd
def update_progress_dialog(self, title, count):
self.pd.set_value(count)
if self.db is not None:
self.pd.set_msg(_('Added %s to library')%title)
else:
self.pd.set_msg(_('Read metadata from ')+title)
def run(self):
self.canceled = False
for c, book in enumerate(self.paths):
if self.pd.canceled:
self.canceled = True
break
format = os.path.splitext(book)[1]
format = format[1:] if format else None
stream = open(book, 'rb')
self.formats.append(format)
self.names.append(os.path.basename(book))
self.get_metadata(c, stream, stream_type=format,
use_libprs_metadata=True)
self.wait_for_condition()
def process_duplicates(self):
if self.duplicates:
files = _('<p>Books with the same title as the following already '
'exist in the database. Add them anyway?<ul>')
for mi in self.duplicates[2]:
files += '<li>'+mi.title+'</li>\n'
d = WarningDialog (_('Duplicates found!'),
_('Duplicates found!'),
files+'</ul></p>', parent=self._parent)
if d.exec_() == d.Accepted:
num = self.db.add_books(*self.duplicates,
**dict(add_duplicates=True))[1]
self.number_of_books_added += num
class AddRecursive(Add):
def __init__(self, path, db, get_metadata, single_book_per_directory, parent):
self.path = path
self.db = db
self.get_metadata = get_metadata
self.single_book_per_directory = single_book_per_directory
self.duplicates, self.books, self.metadata = [], [], []
self.number_of_books_added = 0
self.canceled = False
Add.__init__(self)
self.connect(self.get_metadata,
SIGNAL('metadataf(PyQt_PyObject, PyQt_PyObject)'),
self.metadata_delivered, Qt.QueuedConnection)
self.connect(self, SIGNAL('searching_done()'), self.searching_done,
Qt.QueuedConnection)
self._parent = parent
self.pd = ProgressDialog(_('Adding books recursively...'),
_('Searching for books in all sub-directories...'),
0, 0, parent)
self.connect(self, SIGNAL('processed(PyQt_PyObject,PyQt_PyObject)'),
self.update_progress_dialog)
self.connect(self, SIGNAL('update(PyQt_PyObject)'), self.pd.set_msg,
Qt.QueuedConnection)
self.connect(self, SIGNAL('pupdate(PyQt_PyObject)'), self.pd.set_value,
Qt.QueuedConnection)
self.pd.setModal(True)
self.pd.show()
self.connect(self, SIGNAL('finished()'), self.pd.hide)
def update_progress_dialog(self, title, count):
self.pd.set_value(count)
if title:
self.pd.set_msg(_('Read metadata from ')+title)
def metadata_delivered(self, id, mi):
if self.is_canceled():
self.reading.wakeAll()
return
self.emit(SIGNAL('processed(PyQt_PyObject,PyQt_PyObject)'),
mi.title, id)
self.metadata.append((mi if mi.title else None, self.books[id]))
if len(self.metadata) >= len(self.books):
self.metadata = [x for x in self.metadata if x[0] is not None]
self.pd.set_min(-1)
self.pd.set_max(len(self.metadata)-1)
self.pd.set_value(-1)
self.pd.set_msg(_('Adding books to database...'))
self.wake_up()
def searching_done(self):
self.pd.set_min(-1)
self.pd.set_max(len(self.books)-1)
self.pd.set_value(-1)
self.pd.set_msg(_('Reading metadata...'))
def run(self):
root = os.path.abspath(self.path)
for dirpath in os.walk(root):
if self.is_canceled():
return
self.emit(SIGNAL('update(PyQt_PyObject)'),
_('Searching in')+' '+dirpath[0])
self.books += list(self.db.find_books_in_directory(dirpath[0],
self.single_book_per_directory))
self.books = [formats for formats in self.books if formats]
# Reset progress bar
self.emit(SIGNAL('searching_done()'))
for c, formats in enumerate(self.books):
self.get_metadata.from_formats(c, formats)
self.wait_for_condition()
# Add books to database
for c, x in enumerate(self.metadata):
mi, formats = x
if self.is_canceled():
break
if self.db.has_book(mi):
self.duplicates.append((mi, formats))
else:
self.db.import_book(mi, formats, notify=False)
self.number_of_books_added += 1
self.emit(SIGNAL('pupdate(PyQt_PyObject)'), c)
def process_duplicates(self):
if self.duplicates:
files = _('<p>Books with the same title as the following already '
'exist in the database. Add them anyway?<ul>')
for mi in self.duplicates:
files += '<li>'+mi[0].title+'</li>\n'
d = WarningDialog (_('Duplicates found!'),
_('Duplicates found!'),
files+'</ul></p>', parent=self._parent)
if d.exec_() == d.Accepted:
for mi, formats in self.duplicates:
self.db.import_book(mi, formats, notify=False)
self.number_of_books_added += 1

View File

@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
from calibre.gui2 import dynamic
from calibre.gui2.dialogs.confirm_delete_ui import Ui_Dialog
from PyQt4.Qt import QDialog, SIGNAL
from PyQt4.Qt import QDialog, SIGNAL, Qt
def _config_name(name):
return name + '_again'
@ -19,6 +19,7 @@ class Dialog(QDialog, Ui_Dialog):
self.msg.setText(msg)
self.name = name
self.connect(self.again, SIGNAL('stateChanged(int)'), self.toggle)
self.buttonBox.setFocus(Qt.OtherFocusReason)
def toggle(self, x):

View File

@ -105,36 +105,6 @@
<string>Book Cover</string>
</property>
<layout class="QGridLayout" name="_2" >
<item row="0" column="0" >
<layout class="QHBoxLayout" name="_3" >
<item>
<widget class="ImageView" name="cover" >
<property name="text" >
<string/>
</property>
<property name="pixmap" >
<pixmap resource="../images.qrc" >:/images/book.svg</pixmap>
</property>
<property name="scaledContents" >
<bool>true</bool>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0" >
<widget class="QCheckBox" name="opt_prefer_metadata_cover" >
<property name="text" >
<string>Use cover from &amp;source file</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0" >
<layout class="QVBoxLayout" name="_4" >
<property name="spacing" >
@ -186,6 +156,36 @@
</item>
</layout>
</item>
<item row="2" column="0" >
<widget class="QCheckBox" name="opt_prefer_metadata_cover" >
<property name="text" >
<string>Use cover from &amp;source file</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item row="0" column="0" >
<layout class="QHBoxLayout" name="_3" >
<item>
<widget class="ImageView" name="cover" >
<property name="text" >
<string/>
</property>
<property name="pixmap" >
<pixmap resource="../images.qrc" >:/images/book.svg</pixmap>
</property>
<property name="scaledContents" >
<bool>true</bool>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
</layout>
</item>
</layout>
<zorder>opt_prefer_metadata_cover</zorder>
<zorder></zorder>
@ -507,6 +507,13 @@
</property>
</widget>
</item>
<item row="9" column="0" >
<widget class="QCheckBox" name="opt_remove_first_image" >
<property name="text" >
<string>Remove &amp;first image from source file</string>
</property>
</widget>
</item>
</layout>
</item>
<item>

View File

@ -20,6 +20,7 @@ class ProgressDialog(QDialog, Ui_Dialog):
self.setWindowModality(Qt.ApplicationModal)
self.set_min(min)
self.set_max(max)
self.bar.setValue(min)
self.canceled = False
self.connect(self.button_box, SIGNAL('rejected()'), self._canceled)

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 983 B

View File

@ -1,3 +1,4 @@
from calibre.ebooks.metadata import authors_to_string
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, textwrap, traceback, time, re
@ -18,6 +19,7 @@ from calibre.library.database2 import FIELD_MAP
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
class LibraryDelegate(QItemDelegate):
COLOR = QColor("blue")
@ -370,35 +372,24 @@ class BooksModel(QAbstractTableModel):
if not rows_are_ids:
rows = [self.db.id(row.row()) for row in rows]
for id in rows:
au = self.db.authors(id, index_is_id=True)
tags = self.db.tags(id, index_is_id=True)
if not au:
au = _('Unknown')
au = au.split(',')
if len(au) > 1:
t = ', '.join(au[:-1])
t += ' & ' + au[-1]
au = t
else:
au = ' & '.join(au)
if not tags:
tags = []
else:
tags = tags.split(',')
series = self.db.series(id, index_is_id=True)
if series is not None:
tags.append(series)
mi = {
'title' : self.db.title(id, index_is_id=True),
mi = self.db.get_metadata(id, index_is_id=True)
au = authors_to_string(mi.authors if mi.authors else [_('Unknown')])
tags = mi.tags if mi.tags else []
if mi.series is not None:
tags.append(mi.series)
info = {
'title' : mi.title,
'authors' : au,
'cover' : self.db.cover(id, index_is_id=True),
'tags' : tags,
'comments': self.db.comments(id, index_is_id=True),
'comments': mi.comments,
}
if series is not None:
mi['tag order'] = {series:self.db.books_in_series_of(id, index_is_id=True)}
if mi.series is not None:
info['tag order'] = {
mi.series:self.db.books_in_series_of(id, index_is_id=True)
}
metadata.append(mi)
metadata.append(info)
return metadata
def get_preferred_formats_from_ids(self, ids, all_formats, mode='r+b'):
@ -423,7 +414,7 @@ class BooksModel(QAbstractTableModel):
def get_preferred_formats(self, rows, formats, paths=False):
def get_preferred_formats(self, rows, formats, paths=False, set_metadata=False):
ans = []
for row in (row.row() for row in rows):
format = None
@ -441,6 +432,9 @@ class BooksModel(QAbstractTableModel):
pt = PersistentTemporaryFile(suffix='.'+format)
pt.write(self.db.format(row, format))
pt.flush()
if set_metadata:
_set_metadata(pt, self.db.get_metadata(row, get_cover=True),
format)
pt.close() if paths else pt.seek(0)
ans.append(pt)
else:

View File

@ -13,7 +13,6 @@ from PyQt4.QtSvg import QSvgRenderer
from calibre import __version__, __appname__, islinux, sanitize_file_name, \
iswindows, isosx, preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.meta import get_metadata
from calibre.devices.errors import FreeSpaceError
from calibre.devices.interface import Device
from calibre.utils.config import prefs, dynamic
@ -23,7 +22,7 @@ from calibre.gui2 import APP_UID, warning_dialog, choose_files, error_dialog, \
set_sidebar_directories, Dispatcher, \
SingleApplication, Application, available_height, \
max_available_height, config, info_dialog, \
available_width
available_width, GetMetadata
from calibre.gui2.cover_flow import CoverFlow, DatabaseImages, pictureflowerror
from calibre.gui2.dialogs.scheduler import Scheduler
from calibre.gui2.update import CheckForUpdates
@ -49,7 +48,6 @@ from calibre.ebooks import BOOK_EXTENSIONS
from calibre.library.database2 import LibraryDatabase2, CoverCache
from calibre.parallel import JobKilled
from calibre.utils.filenames import ascii_filename
from calibre.gui2.widgets import WarningDialog
from calibre.gui2.dialogs.confirm_delete import confirm
class Main(MainWindow, Ui_MainWindow):
@ -78,6 +76,7 @@ class Main(MainWindow, Ui_MainWindow):
self.setupUi(self)
self.setWindowTitle(__appname__)
self.verbose = opts.verbose
self.get_metadata = GetMetadata()
self.read_settings()
self.job_manager = JobManager()
self.jobs_dialog = JobsDialog(self, self.job_manager)
@ -369,13 +368,14 @@ class Main(MainWindow, Ui_MainWindow):
if r == QSystemTrayIcon.Trigger:
if self.isVisible():
for window in QApplication.topLevelWidgets():
if isinstance(window, (MainWindow, QDialog)):
if isinstance(window, (MainWindow, QDialog)) and window.isVisible():
window.hide()
setattr(window, '__systray_minimized', True)
else:
for window in QApplication.topLevelWidgets():
if isinstance(window, (MainWindow, QDialog)):
if window not in (self.device_error_dialog, self.jobs_dialog):
window.show()
if getattr(window, '__systray_minimized', False):
window.show()
setattr(window, '__systray_minimized', False)
def do_default_sync(self, checked):
@ -390,7 +390,7 @@ class Main(MainWindow, Ui_MainWindow):
def change_output_format(self, x):
of = unicode(x).strip()
if of != prefs['output_format']:
if of not in ('LRF',):
if of not in ('LRF', 'EPUB'):
warning_dialog(self, 'Warning',
'<p>%s support is still in beta. If you find bugs, please report them by opening a <a href="http://calibre.kovidgoyal.net">ticket</a>.'%of).exec_()
prefs.set('output_format', of)
@ -607,36 +607,26 @@ class Main(MainWindow, Ui_MainWindow):
################################# Add books ################################
def add_recursive(self, single):
root = choose_dir(self, 'recursive book import root dir dialog', 'Select root folder')
root = choose_dir(self, 'recursive book import root dir dialog',
'Select root folder')
if not root:
return
progress = ProgressDialog(_('Adding books recursively...'),
min=0, max=0, parent=self)
progress.show()
def callback(msg):
if msg != '.':
progress.set_msg((_('Added ')+msg) if msg else _('Searching...'))
QApplication.processEvents()
QApplication.sendPostedEvents()
QApplication.flush()
return progress.canceled
try:
duplicates = self.library_view.model().db.recursive_import(root, single, callback=callback)
finally:
progress.hide()
if duplicates:
files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
for mi, formats in duplicates:
files += '<li>'+mi.title+'</li>\n'
d = WarningDialog(_('Duplicates found!'), _('Duplicates found!'),
files+'</ul></p>', self)
if d.exec_() == QDialog.Accepted:
for mi, formats in duplicates:
self.library_view.model().db.import_book(mi, formats )
self.library_view.model().resort()
self.library_view.model().research()
from calibre.gui2.add import AddRecursive
self._add_recursive_thread = AddRecursive(root,
self.library_view.model().db, self.get_metadata,
single, self)
self.connect(self._add_recursive_thread, SIGNAL('finished()'),
self._recursive_files_added)
self._add_recursive_thread.start()
def _recursive_files_added(self):
self._add_recursive_thread.process_duplicates()
if self._add_recursive_thread.number_of_books_added > 0:
self.library_view.model().resort(reset=False)
self.library_view.model().research()
self.library_view.model().count_changed()
self._add_recursive_thread = None
def add_recursive_single(self, checked):
'''
Add books from the local filesystem to either the library or the device
@ -685,63 +675,41 @@ class Main(MainWindow, Ui_MainWindow):
return
to_device = self.stack.currentIndex() != 0
self._add_books(books, to_device)
if to_device:
self.status_bar.showMessage(_('Uploading books to device.'), 2000)
def _add_books(self, paths, to_device, on_card=None):
if on_card is None:
on_card = self.stack.currentIndex() == 2
if not paths:
return
# Get format and metadata information
formats, metadata, names, infos = [], [], [], []
progress = ProgressDialog(_('Adding books...'), _('Reading metadata...'),
min=0, max=len(paths), parent=self)
progress.show()
try:
for c, book in enumerate(paths):
progress.set_value(c)
if progress.canceled:
return
format = os.path.splitext(book)[1]
format = format[1:] if format else None
stream = open(book, 'rb')
try:
mi = get_metadata(stream, stream_type=format, use_libprs_metadata=True)
except:
mi = MetaInformation(None, None)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(book))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
formats.append(format)
metadata.append(mi)
names.append(os.path.basename(book))
infos.append({'title':mi.title, 'authors':', '.join(mi.authors),
'cover':self.default_thumbnail, 'tags':[]})
title = mi.title if isinstance(mi.title, unicode) else mi.title.decode(preferred_encoding, 'replace')
progress.set_msg(_('Read metadata from ')+title)
if not to_device:
progress.set_msg(_('Adding books to database...'))
model = self.library_view.model()
paths = list(paths)
duplicates, number_added = model.add_books(paths, formats, metadata)
if duplicates:
files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
for mi in duplicates[2]:
files += '<li>'+mi.title+'</li>\n'
d = WarningDialog(_('Duplicates found!'), _('Duplicates found!'), files+'</ul></p>', parent=self)
if d.exec_() == QDialog.Accepted:
num = model.add_books(*duplicates, **dict(add_duplicates=True))[1]
number_added += num
model.books_added(number_added)
from calibre.gui2.add import AddFiles
self._add_files_thread = AddFiles(paths, self.default_thumbnail,
self.get_metadata,
None if to_device else \
self.library_view.model().db
)
self._add_files_thread.send_to_device = to_device
self._add_files_thread.on_card = on_card
self._add_files_thread.create_progress_dialog(_('Adding books...'),
_('Reading metadata...'), self)
self.connect(self._add_files_thread, SIGNAL('finished()'),
self._files_added)
self._add_files_thread.start()
def _files_added(self):
t = self._add_files_thread
self._add_files_thread = None
if not t.canceled:
if t.send_to_device:
self.upload_books(t.paths,
list(map(sanitize_file_name, t.names)),
t.infos, on_card=t.on_card)
self.status_bar.showMessage(_('Uploading books to device.'), 2000)
else:
self.upload_books(paths, list(map(sanitize_file_name, names)), infos, on_card=on_card)
finally:
progress.hide()
t.process_duplicates()
if t.number_of_books_added > 0:
self.library_view.model().books_added(t.number_of_books_added)
def upload_books(self, files, names, metadata, on_card=False, memory=None):
'''
Upload books to device.
@ -797,7 +765,10 @@ class Main(MainWindow, Ui_MainWindow):
if not rows or len(rows) == 0:
return
if self.stack.currentIndex() == 0:
if not confirm('<p>'+_('The selected books will be <b>permanently deleted</b> and the files removed from your computer. Are you sure?')+'</p>', 'library_delete_books', self):
if not confirm('<p>'+_('The selected books will be '
'<b>permanently deleted</b> and the files '
'removed from your computer. Are you sure?')
+'</p>', 'library_delete_books', self):
return
view.model().delete_books(rows)
else:
@ -928,7 +899,8 @@ class Main(MainWindow, Ui_MainWindow):
mi['cover'] = self.cover_to_thumbnail(cdata)
metadata = iter(metadata)
_files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS, paths=True)
self.device_manager.device_class.FORMATS,
paths=True, set_metadata=True)
files = [getattr(f, 'name', None) for f in _files]
bad, good, gf, names, remove_ids = [], [], [], [], []
for f in files:
@ -1222,6 +1194,8 @@ class Main(MainWindow, Ui_MainWindow):
format = 'LRF'
if 'EPUB' in formats:
format = 'EPUB'
if 'MOBI' in formats:
format = 'MOBI'
if not formats:
d = error_dialog(self, _('Cannot view'),
_('%s has no available formats.')%(title,))
@ -1403,8 +1377,15 @@ class Main(MainWindow, Ui_MainWindow):
def initialize_database(self):
self.library_path = prefs['library_path']
if self.library_path is None: # Need to migrate to new database layout
base = os.path.expanduser('~')
if iswindows:
from calibre import plugins
from PyQt4.Qt import QDir
base = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_PERSONAL)
if not base or not os.path.exists(base):
base = unicode(QDir.homePath()).replace('/', os.sep)
dir = unicode(QFileDialog.getExistingDirectory(self,
_('Choose a location for your ebook library.'), os.getcwd()))
_('Choose a location for your ebook library.'), base))
if not dir:
dir = os.path.expanduser('~/Library')
self.library_path = os.path.abspath(dir)
@ -1590,6 +1571,11 @@ def main(args=sys.argv):
print 'Restarting with:', e, sys.argv
os.execvp(e, sys.argv)
else:
if iswindows:
try:
main.system_tray_icon.hide()
except:
pass
return ret
return 0

View File

@ -15,6 +15,7 @@ from calibre.utils.config import Config, StringConfig
from calibre.gui2.viewer.config_ui import Ui_Dialog
from calibre.gui2.viewer.js import bookmarks, referencing
from calibre.ptempfile import PersistentTemporaryFile
from calibre.constants import iswindows
def load_builtin_fonts():
from calibre.ebooks.lrf.fonts.liberation import LiberationMono_BoldItalic
@ -56,9 +57,12 @@ def config(defaults=None):
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
fonts = c.add_group('FONTS', _('Font options'))
fonts('serif_family', default='Liberation Serif', help=_('The serif font family'))
fonts('sans_family', default='Liberation Sans', help=_('The sans-serif font family'))
fonts('mono_family', default='Liberation Mono', help=_('The monospaced font family'))
fonts('serif_family', default='Times New Roman' if iswindows else 'Liberation Serif',
help=_('The serif font family'))
fonts('sans_family', default='Verdana' if iswindows else 'Liberation Sans',
help=_('The sans-serif font family'))
fonts('mono_family', default='Courier New' if iswindows else 'Liberation Mono',
help=_('The monospaced font family'))
fonts('default_font_size', default=20, help=_('The standard font size in px'))
fonts('mono_font_size', default=16, help=_('The monospaced font size in px'))
fonts('standard_font', default='serif', help=_('The standard font type'))

View File

@ -4,14 +4,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Backend that implements storage of ebooks in an sqlite database.
'''
import sqlite3 as sqlite
import datetime, re, os, cPickle, traceback, sre_constants
import datetime, re, cPickle, sre_constants
from zlib import compress, decompress
from calibre import sanitize_file_name
from calibre.ebooks.metadata.meta import set_metadata, metadata_from_formats
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe
class Concatenate(object):
@ -1389,227 +1385,16 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
def all_ids(self):
return [i[0] for i in self.conn.get('SELECT id FROM books')]
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
index_is_id=False, callback=None):
if not os.path.exists(dir):
raise IOError('Target directory does not exist: '+dir)
by_author = {}
count = 0
for index in indices:
id = index if index_is_id else self.id(index)
au = self.conn.get('SELECT author_sort FROM books WHERE id=?',
(id,), all=False)
if not au:
au = self.authors(index, index_is_id=index_is_id)
if not au:
au = _('Unknown')
au = au.split(',')[0]
if not by_author.has_key(au):
by_author[au] = []
by_author[au].append(index)
for au in by_author.keys():
apath = os.path.join(dir, sanitize_file_name(au))
if not single_dir and not os.path.exists(apath):
os.mkdir(apath)
for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
tpath = os.path.join(apath, sanitize_file_name(title))
id = idx if index_is_id else self.id(idx)
id = str(id)
if not single_dir and not os.path.exists(tpath):
os.mkdir(tpath)
name = au + ' - ' + title if byauthor else title + ' - ' + au
name += '_'+id
base = dir if single_dir else tpath
mi = self.get_metadata(idx, index_is_id=index_is_id)
cover = self.cover(idx, index_is_id=index_is_id)
if cover is not None:
cname = sanitize_file_name(name) + '.jpg'
cpath = os.path.join(base, cname)
open(cpath, 'wb').write(cover)
mi.cover = cname
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(base, mi)
opf.render(f)
f.close()
fmts = self.formats(idx, index_is_id=index_is_id)
if not fmts:
fmts = ''
for fmt in fmts.split(','):
data = self.format(idx, fmt, index_is_id=index_is_id)
if not data:
continue
fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname)
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
f.seek(0)
try:
set_metadata(f, mi, fmt.lower())
except:
print 'Error setting metadata for book:', mi.title
traceback.print_exc()
f.close()
count += 1
if callable(callback):
if not callback(count, mi.title):
return
def import_book(self, mi, formats):
series_index = 1 if mi.series_index is None else mi.series_index
if not mi.authors:
mi.authors = [_('Unknown')]
aus = mi.author_sort if mi.author_sort else ', '.join(mi.authors)
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(mi.title, None, series_index, aus))
id = obj.lastrowid
self.conn.commit()
self.set_metadata(id, mi)
for path in formats:
ext = os.path.splitext(path)[1][1:].lower()
stream = open(path, 'rb')
stream.seek(0, 2)
usize = stream.tell()
stream.seek(0)
data = sqlite.Binary(compress(stream.read()))
try:
self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
(id, ext, usize, data))
except sqlite.IntegrityError:
self.conn.execute('UPDATE data SET uncompressed_size=?, data=? WHERE book=? AND format=?',
(usize, data, id, ext))
self.conn.commit()
def import_book_directory_multiple(self, dirpath, callback=None):
dirpath = os.path.abspath(dirpath)
duplicates = []
books = {}
for path in os.listdir(dirpath):
if callable(callback):
callback('.')
path = os.path.abspath(os.path.join(dirpath, path))
if os.path.isdir(path) or not os.access(path, os.R_OK):
continue
ext = os.path.splitext(path)[1]
if not ext:
continue
ext = ext[1:].lower()
if ext not in BOOK_EXTENSIONS:
continue
key = os.path.splitext(path)[0]
if not books.has_key(key):
books[key] = []
books[key].append(path)
for formats in books.values():
mi = metadata_from_formats(formats)
if mi.title is None:
continue
if self.has_book(mi):
duplicates.append((mi, formats))
continue
self.import_book(mi, formats)
if callable(callback):
if callback(mi.title):
break
return duplicates
def import_book_directory(self, dirpath, callback=None):
dirpath = os.path.abspath(dirpath)
formats = []
for path in os.listdir(dirpath):
if callable(callback):
callback('.')
path = os.path.abspath(os.path.join(dirpath, path))
if os.path.isdir(path) or not os.access(path, os.R_OK):
continue
ext = os.path.splitext(path)[1]
if not ext:
continue
ext = ext[1:].lower()
if ext not in BOOK_EXTENSIONS:
continue
formats.append(path)
if not formats:
return
mi = metadata_from_formats(formats)
if mi.title is None:
return
if self.has_book(mi):
return [(mi, formats)]
self.import_book(mi, formats)
if callable(callback):
callback(mi.title)
def has_id(self, id):
return self.conn.get('SELECT id FROM books where id=?', (id,), all=False) is not None
def recursive_import(self, root, single_book_per_directory=True, callback=None):
root = os.path.abspath(root)
duplicates = []
for dirpath in os.walk(root):
res = self.import_book_directory(dirpath[0], callback=callback) if \
single_book_per_directory else \
self.import_book_directory_multiple(dirpath[0], callback=callback)
if res is not None:
duplicates.extend(res)
if callable(callback):
if callback(''):
break
return duplicates
def export_single_format_to_dir(self, dir, indices, format,
index_is_id=False, callback=None):
dir = os.path.abspath(dir)
if not index_is_id:
indices = map(self.id, indices)
failures = []
for count, id in enumerate(indices):
try:
data = self.format(id, format, index_is_id=True)
if not data:
failures.append((id, self.title(id, index_is_id=True)))
continue
except:
failures.append((id, self.title(id, index_is_id=True)))
continue
title = self.title(id, index_is_id=True)
au = self.authors(id, index_is_id=True)
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname)
if not os.path.exists(dir):
os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b')
f.write(data)
f.seek(0)
try:
set_metadata(f, self.get_metadata(id, index_is_id=True), stream_type=format.lower())
except:
pass
f.close()
if callable(callback):
if not callback(count, title):
break
return failures
class SearchToken(object):

File diff suppressed because it is too large Load Diff

View File

@ -236,7 +236,9 @@ Donors per day: %(dpd).2f
ml = mdates.MonthLocator() # every month
fig = plt.figure(1, (8, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
ax = fig.add_subplot(111)
average = sum(y)/len(y)
ax.bar(x, y, align='center', width=20, color='g')
ax.hlines([average], x[0], x[-1])
ax.xaxis.set_major_locator(ml)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %y'))
ax.set_xlim(_months[0].min-timedelta(days=15), _months[-1].min+timedelta(days=15))

View File

@ -30,11 +30,12 @@ class Distribution(object):
('libusb', '0.1.12', None, None, None),
('Qt', '4.4.0', 'qt', 'libqt4-core libqt4-gui', 'qt4'),
('PyQt', '4.4.2', 'PyQt4', 'python-qt4', 'PyQt4'),
('mechanize for python', '0.1.8', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
('mechanize for python', '0.1.11', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -91,7 +91,7 @@ class BasicNewsRecipe(object, LoggingInterface):
#: If True stylesheets are not downloaded and processed
no_stylesheets = False
#: Convenient flag to strip all javascripts tags from the downloaded HTML
#: Convenient flag to strip all javascript tags from the downloaded HTML
remove_javascript = True
#: If True the GUI will ask the user for a username and password

View File

@ -27,6 +27,8 @@ recipe_modules = ['recipe_' + r for r in (
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
'pobjeda',
)]
import re, imp, inspect, time, os

View File

@ -1,31 +1,38 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Ambito(BasicNewsRecipe):
title = 'Ambito.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
description = 'Informacion Libre las 24 horas'
publisher = 'Ambito.com'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso--8859-1'
language = _('Spanish')
encoding = 'iso-8859-1'
cover_url = 'http://www.ambito.com/img/logo_.jpg'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe):
def print_version(self, url):
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,25 +7,33 @@ b92.net
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class B92(BasicNewsRecipe):
title = u'B92'
title = 'B92'
__author__ = 'Darko Miletic'
language = _('Serbian')
description = 'Dnevne vesti iz Srbije i sveta'
oldest_article = 7
oldest_article = 2
publisher = 'B92.net'
category = 'news, politics, Serbia'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://static.b92.net/images/fp/logo.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'B92'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
@ -44,3 +52,16 @@ class B92(BasicNewsRecipe):
if biz:
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
return nurl
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn'
soup.html['lang'] = 'sr-Latn'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(name='img',align=True):
del item['align']
item.insert(0,'<br /><br />')
return soup
language = _('Serbian')

View File

@ -5,31 +5,49 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
blic.rs
'''
import string,re
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe):
title = u'Blic'
__author__ = 'Darko Miletic'
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
oldest_article = 7
__author__ = u'Darko Miletic'
description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Blic'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'single_news'}) ]
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
feeds = [ (u'Vesti', u'http://www.blic.rs/rssall.php')]
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
remove_tags = [dict(name=['object','link'])]
def print_version(self, url):
start_url, question, rest_url = url.partition('?')
return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,31 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
clarin.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Clarin(BasicNewsRecipe):
title = 'Clarin'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina y mundo'
publisher = 'Grupo Clarin'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
language = _('Spanish')
use_embedded_content = False
no_stylesheets = True
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Argentina'
, '--publisher', 'Grupo Clarin'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
dict(name='a' , attrs={'class':'Imp' })
@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe):
rest = artl.partition('-0')[-1]
lmain = rest.partition('.')[0]
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -5,37 +5,47 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
danas.rs
'''
import string,re
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe):
title = 'Danas'
title = u'Danas'
__author__ = 'Darko Miletic'
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
description = 'Vesti'
publisher = 'Danas d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
no_stylesheets = False
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.danas.rs/images/basic/danas.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Danas'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'id':'left'}) ]
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
remove_tags = [
dict(name='div', attrs={'class':'width_1_4' })
,dict(name='div', attrs={'class':'metaClanka' })
,dict(name='div', attrs={'id':'comments' })
,dict(name='div', attrs={'class':'baner' })
,dict(name='div', attrs={'class':'slikaClanka'})
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
,dict(name='div', attrs={'id':'comments'})
,dict(name=['object','link'])
]
feeds = [(u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def print_version(self, url):
return url + '&action=print'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DeStandaard(BasicNewsRecipe):
title = u'De Standaard'
__author__ = u'Darko Miletic'
language = _('French')
language = _('Dutch')
description = u'News from Belgium'
oldest_article = 7
max_articles_per_feed = 100

View File

@ -13,6 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
__author__ = u'Darko Miletic'
description = u'News from Belgium'
oldest_article = 7
language = _('Dutch')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False

View File

@ -0,0 +1,41 @@
'''
dnaindia.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DNAIndia(BasicNewsRecipe):
title = 'DNA India'
description = 'Mumbai news, India news, World news, breaking news'
__author__ = 'Kovid Goyal'
language = _('English')
feeds = [
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
]
remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}]
def print_version(self, url):
match = re.search(r'newsid=(\d+)', url)
if not match:
return url
return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
a = soup.find(href='http://www.3dsyndication.com/')
if a is not None:
a.parent.extract()
return soup

View File

@ -5,31 +5,36 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
emol.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class ElMercurio(BasicNewsRecipe):
title = 'El Mercurio online'
language = _('Spanish')
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
description = 'El sitio de noticias online de Chile'
publisher = 'El Mercurio'
category = 'news, politics, Chile'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='div', attrs={'class':'despliegue-txt_750px'})
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
]
remove_tags = [
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
@ -45,4 +50,12 @@ class ElMercurio(BasicNewsRecipe):
,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5')
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,30 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
elargentino.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class ElArgentino(BasicNewsRecipe):
title = 'ElArgentino.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
language = _('Spanish')
description = 'Informacion Libre las 24 horas'
publisher = 'ElArgentino.com'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , 'ElArgentino.com'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
dict(name='div', attrs={'id':'noprint' })
@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe):
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
soup.prettify()
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,41 +6,55 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
elmundo.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class ElMundo(BasicNewsRecipe):
title = 'El Mundo'
__author__ = 'Darko Miletic'
description = 'News from Spain'
language = _('Spanish')
publisher = 'El Mundo'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso8859_15'
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Spain'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
keep_only_tags = [
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
,dict(name='div', attrs={'class':['contenido_noticia_01']})
]
remove_tags = [
dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']})
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
,dict(name='div', attrs={'id':'modulo_multimedia' })
,dict(name=['object','script','link', 'a'])
,dict(name='ul', attrs={'class':'herramientas'})
,dict(name='ul', attrs={'class':'herramientas' })
,dict(name=['object','link'])
]
feeds = [
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,26 +6,29 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
estadao.com.br
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Estadao(BasicNewsRecipe):
title = 'O Estado de S. Paulo'
__author__ = 'Darko Miletic'
description = 'News from Brasil'
language = _('Spanish')
description = 'News from Brasil in Portugese'
publisher = 'O Estado de S. Paulo'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.estadao.com.br/img/logo_estadao.png'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Brasil'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe):
ifr = soup.find('iframe')
if ifr:
ifr.extract()
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -0,0 +1,51 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
exiledonline.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Exiled(BasicNewsRecipe):
title = 'Exiled Online'
__author__ = 'Darko Miletic'
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
publisher = 'Exiled Online'
language = _('English')
category = 'news, politics, international'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
remove_tags = [
dict(name=['object','link'])
,dict(name='div', attrs={'class':'info'})
,dict(name='div', attrs={'id':['comments','navig']})
]
feeds = [(u'Articles', u'http://exiledonline.com/feed/' )]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
soup.head.insert(0,mtag)
return soup

View File

@ -7,27 +7,30 @@ granma.cubaweb.cu
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Granma(BasicNewsRecipe):
title = 'Diario Granma'
__author__ = 'Darko Miletic'
language = _('Spanish')
description = 'Organo oficial del Comite Central del Partido Comunista de Cuba'
publisher = 'Granma'
category = 'news, politics, Cuba'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Cuba'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
@ -35,9 +38,15 @@ class Granma(BasicNewsRecipe):
def preprocess_html(self, soup):
del soup.body.table['style']
rtag = soup.find('td', attrs={'height':'458'})
if rtag:
del rtag['style']
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll('table'):
if item.has_key('width'):
del item['width']
if item.has_key('height'):
del item['height']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,62 +1,80 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
harpers.org - paid subscription/ printed issue articles
This recipe only get's article's published in text format
images and pdf's are ignored
'''
from calibre import strftime
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
harpers.org - paid subscription/ printed issue articles
This recipe only get's article's published in text format
images and pdf's are ignored
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Harpers_full(BasicNewsRecipe):
title = u"Harper's Magazine - articles from printed edition"
__author__ = u'Darko Miletic'
description = u"Harper's Magazine: Founded June 1850."
language = _('English')
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
simultaneous_downloads = 1
delay = 1
needs_subscription = True
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
LOGIN = 'http://www.harpers.org'
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
remove_tags = [
dict(name='table', attrs={'class':'rcnt'})
,dict(name='table', attrs={'class':'rcnt topline'})
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(nr=1)
br['handle' ] = self.username
br['password'] = self.password
br.submit()
return br
class Harpers_full(BasicNewsRecipe):
title = u"Harper's Magazine - articles from printed edition"
__author__ = u'Darko Miletic'
description = u"Harper's Magazine: Founded June 1850."
publisher = "Harpers's"
category = 'news, politics, USA'
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
simultaneous_downloads = 1
delay = 1
needs_subscription = True
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
LOGIN = 'http://www.harpers.org'
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
remove_tags = [
dict(name='table', attrs={'class':'rcnt'})
,dict(name='table', attrs={'class':'rcnt topline'})
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(nr=1)
br['handle' ] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
print 'Processing ' + self.INDEX
soup = self.index_to_soup(self.INDEX)
for item in soup.findAll('div', attrs={'class':'title'}):
text_link = item.parent.find('img',attrs={'alt':'Text'})
if text_link:
url = self.LOGIN + item.a['href']
title = item.a.contents[0]
date = strftime(' %B %Y')
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
return [(soup.head.title.string, articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def parse_index(self):
articles = []
print 'Processing ' + self.INDEX
soup = self.index_to_soup(self.INDEX)
for item in soup.findAll('div', attrs={'class':'title'}):
text_link = item.parent.find('img',attrs={'alt':'Text'})
if text_link:
url = self.LOGIN + item.a['href']
title = item.a.contents[0]
date = strftime(' %B %Y')
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
return [(soup.head.title.string, articles)]
language = _('English')

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
honoluluadvertiser.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Honoluluadvertiser(BasicNewsRecipe):
title = 'Honolulu Advertiser'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
publisher = 'Honolulu Advertiser'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
language = _('English')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
cover_url = 'http://www.honoluluadvertiser.com/graphics/branding.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='td')]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
return soup
def print_version(self, url):
ubody, sep, rest = url.rpartition('/-1/')
root, sep2, article_id = ubody.partition('/article/')
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'

View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1234144423(BasicNewsRecipe):
title = u'Indianapolis Star'
oldest_article = 5
language = _('English')
__author__ = 'Owen Kelly'
max_articles_per_feed = 100
cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
def print_version(self, url):
return url + '&template=printart'

View File

@ -1,34 +1,36 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
infobae.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Infobae(BasicNewsRecipe):
title = 'Infobae.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
description = 'Informacion Libre las 24 horas'
publisher = 'Infobae.com'
category = 'news, politics, Argentina'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso-8859-1'
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , 'Infobae.com'
]
remove_javascript = True
feeds = [
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' )
,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml')
@ -37,5 +39,14 @@ class Infobae(BasicNewsRecipe):
def print_version(self, url):
main, sep, article_part = url.partition('contenidos/')
article_id, rsep, rrest = article_part.partition('-')
article_id, rsep, rrest = article_part.partition('-')
return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,25 +6,29 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
jbonline.terra.com.br
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class JBOnline(BasicNewsRecipe):
title = 'Jornal Brasileiro Online'
__author__ = 'Darko Miletic'
description = 'News from Brasil'
description = 'News from Brasil'
publisher = 'Jornal Brasileiro'
category = 'news, politics, Brasil'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Brasil'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe):
ifr = soup.find('iframe')
if ifr:
ifr.extract()
item = soup.find('div', attrs={'id':'corpoNoticia'})
if item:
del item['style']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -6,28 +6,36 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
jutarnji.hr
'''
import string, re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Jutarnji(BasicNewsRecipe):
title = 'Jutarnji'
__author__ = 'Darko Miletic'
description = 'Online izdanje Jutarnjeg lista'
title = u'Jutarnji'
__author__ = u'Darko Miletic'
description = u'Hrvatski portal'
publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia'
oldest_article = 2
max_articles_per_feed = 100
simultaneous_downloads = 1
delay = 1
language = _('Croatian')
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Croatia'
, '--publisher', 'Europapress holding d.o.o.'
]
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [
@ -49,11 +57,16 @@ class Jutarnji(BasicNewsRecipe):
def print_version(self, url):
main, split, rest = url.partition('.jl')
rmain, rsplit, rrest = main.rpartition(',')
return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.prettify()
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup

View File

@ -7,26 +7,30 @@ juventudrebelde.cu
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Juventudrebelde(BasicNewsRecipe):
title = 'Juventud Rebelde'
__author__ = 'Darko Miletic'
description = 'Diario de la Juventud Cubana'
description = 'Diario de la Juventud Cubana'
publisher = 'Juventud rebelde'
category = 'news, politics, Cuba'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Cuba'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe):
,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -5,30 +5,40 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
juventudrebelde.co.cu
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Juventudrebelde_english(BasicNewsRecipe):
title = 'Juventud Rebelde in english'
__author__ = 'Darko Miletic'
description = 'The newspaper of Cuban Youth'
language = _('English')
description = 'The newspaper of Cuban Youth'
publisher = 'Juventud Rebelde'
category = 'news, politics, Cuba'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso-8859-1'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Cuba'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'read'})]
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('English')

View File

@ -6,30 +6,33 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
lacuarta.cl
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class LaCuarta(BasicNewsRecipe):
title = 'La Cuarta'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
description = 'La Cuarta Cibernetica: El Diario popular'
publisher = 'CODISA, Consorcio Digital S.A.'
category = 'news, politics, entertainment, Chile'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
remove_tags = [
dict(name='script')
,dict(name='ul')
dict(name='ul')
,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
,dict(name='input')
@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe):
,dict(name='strong', text='PUBLICIDAD')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
language = _('Spanish')

View File

@ -6,26 +6,29 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
lasegunda.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class LaSegunda(BasicNewsRecipe):
title = 'La Segunda'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
language = _('Spanish')
description = 'El sitio de noticias online de Chile'
publisher = 'La Segunda'
category = 'news, politics, Chile'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
, '--ignore-tables'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='table')]
@ -45,4 +48,14 @@ class LaSegunda(BasicNewsRecipe):
def print_version(self, url):
rest, sep, article_id = url.partition('index.asp?idnoticia=')
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(name='table', width=True):
del item['width']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,26 +6,30 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
latercera.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class LaTercera(BasicNewsRecipe):
title = 'La Tercera'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
description = 'El sitio de noticias online de Chile'
publisher = 'La Tercera'
category = 'news, politics, Chile'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ]
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
remove_tags = [
dict(name='script')
@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe):
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,29 +1,32 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
lanacion.com.ar
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Lanacion(BasicNewsRecipe):
title = 'La Nacion'
__author__ = 'Darko Miletic'
description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!'
description = 'Noticias de Argentina y el resto del mundo'
publisher = 'La Nacion'
category = 'news, politics, Argentina'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Argentina'
, '--publisher', 'La Nacion SA'
]
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
remove_tags = [
@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe):
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
]
def get_cover_url(self):
index = 'http://www.lanacion.com.ar'
cover_url = None
soup = self.index_to_soup(index)
cover_item = soup.find('img',attrs={'class':'logo'})
if cover_item:
cover_url = index + cover_item['src']
return cover_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,25 +7,29 @@ lanacion.cl
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class LaNacionChile(BasicNewsRecipe):
title = 'La Nacion Chile'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
description = 'El sitio de noticias online de Chile'
publisher = 'La Nacion'
category = 'news, politics, Chile'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe):
item = soup.find('a', attrs={'href':'javascript:window.close()'})
if item:
item.extract()
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,31 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
laprensa.com.ar
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class LaPrensa(BasicNewsRecipe):
title = 'La Prensa'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
description = 'Informacion Libre las 24 horas'
publisher = 'La Prensa'
category = 'news, politics, Argentina'
oldest_article = 7
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [
(u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe):
def preprocess_html(self, soup):
del soup.body['onload']
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,12 +7,15 @@ nin.co.yu
'''
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe):
title = 'NIN online'
__author__ = 'Darko Miletic'
description = 'Nedeljne informativne novine'
publisher = 'NIN'
category = 'news, politics, Serbia'
no_stylesheets = True
oldest_article = 15
simultaneous_downloads = 1
@ -22,11 +25,17 @@ class Nin(BasicNewsRecipe):
PREFIX = 'http://www.nin.co.yu'
INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, politics, Serbia'
, '--publisher' , 'NIN'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -53,3 +62,12 @@ class Nin(BasicNewsRecipe):
if link_item:
cover_url = self.PREFIX + link_item['src']
return cover_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -5,31 +5,45 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
novosti.rs
'''
import string,re
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Novosti(BasicNewsRecipe):
title = 'Vecernje Novosti'
__author__ = 'Darko Miletic'
description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti'
title = u'Vecernje Novosti'
__author__ = u'Darko Miletic'
description = u'Vesti'
publisher = 'Kompanija Novosti'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Novosti AD'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'jednaVest'}) ]
remove_tags_after = dict(name='div', attrs={'class':'info_bottom'})
remove_tags = [
dict(name='div', attrs={'class':'info'})
,dict(name='div', attrs={'class':'info_bottom'})
]
keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})]
remove_tags = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})]
feeds = [ (u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -6,35 +6,55 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
nspm.rs
'''
import string,re
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Nspm(BasicNewsRecipe):
title = u'Nova srpska politicka misao'
__author__ = 'Darko Miletic'
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
publisher = 'NSPM'
category = 'news, politics, Serbia'
oldest_article = 7
language = _('Serbian')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l'
cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Serbia'
, '--publisher', 'IIC NSPM'
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name='a')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
return br
feeds = [ (u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')]
feeds = [(u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')]
def print_version(self, url):
return url.replace('.html','/stampa.html')
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag:
ftag['content'] = 'sr-Latn-RS'
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -6,25 +6,29 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
oglobo.globo.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class OGlobo(BasicNewsRecipe):
title = 'O Globo'
__author__ = 'Darko Miletic'
description = 'News from Brasil'
description = 'News from Brasil'
publisher = 'O Globo'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 100
language = _('Spanish')
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://oglobo.globo.com/_img/o-globo.png'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Brasil'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe):
,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe):
title = 'Outlook India'
__author__ = 'Kovid Goyal'
description = 'Weekly news magazine focussed on India.'
description = 'Weekly news magazine focused on India.'
language = _('English')
recursions = 1
match_regexp = r'full.asp.*&pn=\d+'
html2lrf_options = ['--ignore-tables']
remove_tags = [
dict(name='img', src="images/space.gif"),
@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe):
bad.append(table)
for b in bad:
b.extract()
soup = soup.findAll('html')[0]
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup

View File

@ -1,31 +1,36 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
pagina12.com.ar
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = u'Pagina/12'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo'
language = _('Spanish')
publisher = 'La Pagina S.A.'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , 'La Pagina S.A.'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe):
def print_version(self, url):
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,30 +6,53 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
pescanik.net
'''
import string,re
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe):
title = 'Pescanik'
__author__ = 'Darko Miletic'
description = 'Pescanik'
publisher = 'Pescanik'
category = 'news, politics, Serbia'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
html2lrf_options = ['--base-font-size', '10']
html2epub_options = 'base_font_size = "10pt"'
remove_javascript = True
encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_after = dict(name='div', attrs={'class':'article_seperator'})
remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})]
remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link'])
]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url):
nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php')
nurl = url.replace('/index.php','/index2.php')
return nurl + '&pop=1&page=0'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -0,0 +1,102 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
pobjeda.co.me
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Pobjeda(BasicNewsRecipe):
title = 'Pobjeda Online'
__author__ = 'Darko Miletic'
description = 'News from Montenegro'
publisher = 'Pobjeda a.d.'
category = 'news, politics, Montenegro'
language = _('Serbian')
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
encoding = 'utf8'
remove_javascript = True
use_embedded_content = False
INDEX = u'http://www.pobjeda.co.me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'vijest'})]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'Politika' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=1' )
,(u'Ekonomija' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=2' )
,(u'Drustvo' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=3' )
,(u'Crna Hronika' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=4' )
,(u'Kultura' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=5' )
,(u'Hronika Podgorice' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=7' )
,(u'Feljton' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=8' )
,(u'Crna Gora' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=9' )
,(u'Svijet' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=202')
,(u'Ekonomija i Biznis', u'http://www.pobjeda.co.me/dodatak.php?rubrika=11' )
,(u'Djeciji Svijet' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=12' )
,(u'Kultura i Drustvo' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=13' )
,(u'Agora' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=133')
,(u'Ekologija' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=252')
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('img',attrs={'alt':'Naslovna strana'})
if cover_item:
cover_url = self.INDEX + cover_item.parent['href']
return cover_url
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class':'vijest'}):
description = self.tag_to_string(item.h2)
atag = item.h1.find('a')
if atag:
url = self.INDEX + '/' + atag['href']
title = self.tag_to_string(atag)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

Some files were not shown because too many files have changed in this diff Show More