Sync to trunk.

This commit is contained in:
John Schember 2009-08-18 18:52:10 -04:00
commit 5e3493eab2
52 changed files with 1790 additions and 822 deletions

View File

@ -75,7 +75,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
'''
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
The set of invalid characters is the union of the invalid characters in Windows,
OS X and Linux. Also removes leading an trailing whitespace.
OS X and Linux. Also removes leading and trailing whitespace.
**WARNING:** This function also replaces path separators, so only pass file names
and not full paths to it.
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings

View File

@ -520,7 +520,8 @@ class Device(DeviceConfig, DevicePlugin):
main, carda, cardb = self.find_device_nodes()
if main is None:
raise DeviceError(_('Unable to detect the %s disk drive.')
raise DeviceError(_('Unable to detect the %s disk drive. Your '
' kernel is probably exporting a deprecated version of SYSFS.')
%self.__class__.__name__)
self._linux_mount_map = {}

View File

@ -301,6 +301,15 @@ class MetaInformation(object):
def authors_from_string(self, raw):
self.authors = string_to_authors(raw)
def format_authors(self):
return authors_to_string(self.authors)
def format_tags(self):
return u', '.join([unicode(t) for t in self.tags])
def format_rating(self):
return unicode(self.rating)
def __unicode__(self):
ans = []
def fmt(x, y):

View File

@ -153,14 +153,17 @@ def read_metadata(paths, result_queue, chunk=50, spare_server=None):
t.start()
return t
###########################################################################
############ Saving #####################
###########################################################################
class SaveWorker(Thread):
def __init__(self, result_queue, db, ids, path, by_author=False,
single_dir=False, single_format=None, spare_server=None):
def __init__(self, result_queue, db, ids, path, opts, spare_server=None):
Thread.__init__(self)
self.daemon = True
self.path, self.by_author = path, by_author
self.single_dir, self.single_format = single_dir, single_format
self.path, self.opts = path, opts
self.ids = ids
self.library_path = db.library_path
self.canceled = False
@ -170,17 +173,22 @@ class SaveWorker(Thread):
self.start()
def run(self):
from calibre.library.save_to_disk import config
server = Server() if self.spare_server is None else self.spare_server
ids = set(self.ids)
tasks = server.split(list(ids))
jobs = set([])
c = config()
recs = {}
for pref in c.preferences:
recs[pref.name] = getattr(self.opts, pref.name)
for i, task in enumerate(tasks):
tids = [x[-1] for x in task]
job = ParallelJob('save_book',
'Save books (%d of %d)'%(i, len(tasks)),
lambda x,y:x,
args=[tids, self.library_path, self.path, self.single_dir,
self.single_format, self.by_author])
args=[tids, self.library_path, self.path, recs])
jobs.add(job)
server.add_job(job)
@ -192,9 +200,9 @@ class SaveWorker(Thread):
job.update(consume_notifications=False)
while True:
try:
id, title, ok = job.notifications.get_nowait()[0]
id, title, ok, tb = job.notifications.get_nowait()[0]
if id in ids:
self.result_queue.put((id, title, ok))
self.result_queue.put((id, title, ok, tb))
ids.remove(id)
except Empty:
break
@ -221,23 +229,18 @@ class SaveWorker(Thread):
pass
def save_book(task, library_path, path, single_dir, single_format,
by_author, notification=lambda x,y:x):
def save_book(task, library_path, path, recs, notification=lambda x,y:x):
from calibre.library.database2 import LibraryDatabase2
db = LibraryDatabase2(library_path)
from calibre.library.save_to_disk import config, save_to_disk
opts = config().parse()
for name in recs:
setattr(opts, name, recs[name])
def callback(id, title):
notification((id, title, True))
def callback(id, title, failed, tb):
notification((id, title, not failed, tb))
return True
if single_format is None:
failures = []
db.export_to_dir(path, task, index_is_id=True, byauthor=by_author,
callback=callback, single_dir=single_dir)
else:
failures = db.export_single_format_to_dir(path, task, single_format,
index_is_id=True, callback=callback)
for id, title in failures:
notification((id, title, False))
save_to_disk(db, task, path, opts, callback)

View File

@ -6,15 +6,19 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.oeb.base import OEB_DOCS, XPNSMAP
from calibre.ebooks.oeb.base import OEB_DOCS, XPath
class LinearizeTables(object):
def linearize(self, root):
for x in root.xpath('//h:table|//h:td|//h:tr|//h:th',
namespaces=XPNSMAP):
for x in XPath('//h:table|//h:td|//h:tr|//h:th|//h:caption|'
'//h:tbody|//h:tfoot|//h:thead|//h:colgroup|//h:col')(root):
x.tag = 'div'
for attr in ('valign', 'colspan', 'rowspan', 'width', 'halign'):
for attr in ('style', 'font', 'valign',
'colspan', 'width', 'height',
'rowspan', 'summary', 'align',
'cellspacing', 'cellpadding',
'frames', 'rules', 'border'):
if attr in x.attrib:
del x.attrib[attr]

View File

@ -74,6 +74,8 @@ def _config():
c.add_opt('search_as_you_type', default=True,
help='Start searching as you type. If this is disabled then search will '
'only take place when the Enter or Return key is pressed.')
c.add_opt('save_to_disk_template_history', default=[],
help='Previously used Save to Disk templates')
return ConfigProxy(c)
config = _config()

View File

@ -295,13 +295,13 @@ class Adder(QObject):
class Saver(QObject):
def __init__(self, parent, db, callback, rows, path,
by_author=False, single_dir=False, single_format=None,
def __init__(self, parent, db, callback, rows, path, opts,
spare_server=None):
QObject.__init__(self, parent)
self.pd = ProgressDialog(_('Saving...'), parent=parent)
self.spare_server = spare_server
self.db = db
self.opts = opts
self.pd.setModal(True)
self.pd.show()
self.pd.set_min(0)
@ -315,8 +315,8 @@ class Saver(QObject):
self.failures = set([])
from calibre.ebooks.metadata.worker import SaveWorker
self.worker = SaveWorker(self.rq, db, self.ids, path, by_author,
single_dir, single_format, spare_server=self.spare_server)
self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
spare_server=self.spare_server)
self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
self.timer = QTimer(self)
self.connect(self.timer, SIGNAL('timeout()'), self.update)
@ -344,15 +344,14 @@ class Saver(QObject):
return
try:
id, title, ok = self.rq.get_nowait()
id, title, ok, tb = self.rq.get_nowait()
except Empty:
return
self.pd.value += 1
self.ids.remove(id)
if not isinstance(title, unicode):
title = str(title).decode('utf-8', preferred_encoding)
title = str(title).decode(preferred_encoding, 'replace')
self.pd.set_msg(_('Saved')+' '+title)
if not ok:
self.failures.add(title)
self.failures.add((title, tb))

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import textwrap
from PyQt4.Qt import QTabWidget
from calibre.gui2.dialogs.add_save_ui import Ui_TabWidget
from calibre.library.save_to_disk import config, FORMAT_ARG_DESCS
class AddSave(QTabWidget, Ui_TabWidget):
def __init__(self, parent=None):
QTabWidget.__init__(self, parent)
self.setupUi(self)
c = config()
opts = c.parse()
for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf'):
g = getattr(self, 'opt_'+x)
g.setChecked(getattr(opts, x))
help = '\n'.join(textwrap.wrap(c.get_option(x).help, 75))
g.setToolTip(help)
g.setWhatsThis(help)
for x in ('formats', 'timefmt'):
g = getattr(self, 'opt_'+x)
g.setText(getattr(opts, x))
help = '\n'.join(textwrap.wrap(c.get_option(x).help, 75))
g.setToolTip(help)
g.setWhatsThis(help)
help = '\n'.join(textwrap.wrap(c.get_option('template').help, 75))
self.opt_template.initialize('save_to_disk_template_history',
opts.template, help=help)
variables = sorted(FORMAT_ARG_DESCS.keys())
rows = []
for var in variables:
rows.append(u'<tr><td>%s</td><td>%s</td></tr>'%
(var, FORMAT_ARG_DESCS[var]))
table = u'<table>%s</table>'%(u'\n'.join(rows))
self.template_variables.setText(table)
def save_settings(self):
c = config()
for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf'):
c.set(x, getattr(self, 'opt_'+x).isChecked())
for x in ('formats', 'template', 'timefmt'):
c.set(x, unicode(getattr(self, 'opt_'+x).text()).strip())
if __name__ == '__main__':
from PyQt4.Qt import QApplication
app=QApplication([])
a = AddSave()
a.show()
app.exec_()
a.save_settings()

View File

@ -10,12 +10,11 @@ from PyQt4.Qt import QDialog, QMessageBox, QListWidgetItem, QIcon, \
QDialogButtonBox, QTabWidget, QBrush, QLineEdit
from calibre.constants import islinux, iswindows
from calibre.gui2.dialogs.config_ui import Ui_Dialog
from calibre.gui2.dialogs.config.config_ui import Ui_Dialog
from calibre.gui2 import qstring_to_unicode, choose_dir, error_dialog, config, \
ALL_COLUMNS, NONE, info_dialog, choose_files, \
warning_dialog
from calibre.utils.config import prefs
from calibre.gui2.widgets import FilenamePattern
from calibre.gui2.library import BooksModel
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.oeb.iterator import is_supported
@ -193,12 +192,12 @@ class CategoryModel(QStringListModel):
def __init__(self, *args):
QStringListModel.__init__(self, *args)
self.setStringList([_('General'), _('Interface'), _('Conversion'),
_('Email\nDelivery'),
_('Email\nDelivery'), _('Add/Save'),
_('Advanced'), _('Content\nServer'), _('Plugins')])
self.icons = list(map(QVariant, map(QIcon,
[':/images/dialog_information.svg', ':/images/lookfeel.svg',
':/images/convert.svg',
':/images/mail.svg', ':/images/view.svg',
':/images/mail.svg', ':/images/save.svg', ':/images/view.svg',
':/images/network-server.svg', ':/images/plugins.svg'])))
def data(self, index, role):
@ -373,9 +372,6 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.connect(self.column_up, SIGNAL('clicked()'), self.up_column)
self.connect(self.column_down, SIGNAL('clicked()'), self.down_column)
self.filename_pattern = FilenamePattern(self)
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
icons = config['toolbar_icon_size']
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
@ -408,7 +404,6 @@ class ConfigDialog(QDialog, Ui_Dialog):
for item in items:
self.language.addItem(item[1], QVariant(item[0]))
self.pdf_metadata.setChecked(prefs['read_file_metadata'])
exts = set([])
for ext in BOOK_EXTENSIONS:
@ -439,7 +434,6 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.password.setText(opts.password if opts.password else '')
self.auto_launch.setChecked(config['autolaunch_server'])
self.systray_icon.setChecked(config['systray_icon'])
self.search_as_you_type.setChecked(config['search_as_you_type'])
self.sync_news.setChecked(config['upload_news_to_device'])
self.delete_news.setChecked(config['delete_news_from_library_on_upload'])
p = {'normal':0, 'high':1, 'low':2}[prefs['worker_process_priority']]
@ -683,6 +677,8 @@ class ConfigDialog(QDialog, Ui_Dialog):
return
if not self.conversion_options.commit():
return
if not self.add_save.save_settings():
return
config['use_roman_numerals_for_series_number'] = bool(self.roman_numerals.isChecked())
config['new_version_notification'] = bool(self.new_version_notification.isChecked())
prefs['network_timeout'] = int(self.timeout.value())
@ -697,11 +693,8 @@ class ConfigDialog(QDialog, Ui_Dialog):
config['show_text_in_toolbar'] = bool(self.show_toolbar_text.isChecked())
config['separate_cover_flow'] = bool(self.separate_cover_flow.isChecked())
config['disable_tray_notification'] = not self.systray_notifications.isChecked()
pattern = self.filename_pattern.commit()
prefs['filename_pattern'] = pattern
p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()]
prefs['worker_process_priority'] = p
prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked())
prefs['output_format'] = unicode(self.output_format.currentText()).upper()
config['cover_flow_queue_length'] = self.cover_browse.value()
prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import textwrap
from PyQt4.Qt import QTabWidget
from calibre.gui2.dialogs.config.add_save_ui import Ui_TabWidget
from calibre.library.save_to_disk import config, FORMAT_ARG_DESCS, \
preprocess_template
from calibre.gui2 import error_dialog
from calibre.utils.config import prefs
from calibre.gui2.widgets import FilenamePattern
class AddSave(QTabWidget, Ui_TabWidget):
def __init__(self, parent=None):
QTabWidget.__init__(self, parent)
self.setupUi(self)
while self.count() > 2:
self.removeTab(2)
c = config()
opts = c.parse()
for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf'):
g = getattr(self, 'opt_'+x)
g.setChecked(getattr(opts, x))
help = '\n'.join(textwrap.wrap(c.get_option(x).help, 75))
g.setToolTip(help)
g.setWhatsThis(help)
for x in ('formats', 'timefmt'):
g = getattr(self, 'opt_'+x)
g.setText(getattr(opts, x))
help = '\n'.join(textwrap.wrap(c.get_option(x).help, 75))
g.setToolTip(help)
g.setWhatsThis(help)
help = '\n'.join(textwrap.wrap(c.get_option('template').help, 75))
self.opt_template.initialize('save_to_disk_template_history',
opts.template, help)
variables = sorted(FORMAT_ARG_DESCS.keys())
rows = []
for var in variables:
rows.append(u'<tr><td>%s</td><td>%s</td></tr>'%
(var, FORMAT_ARG_DESCS[var]))
table = u'<table>%s</table>'%(u'\n'.join(rows))
self.template_variables.setText(table)
self.opt_read_metadata_from_filename.setChecked(not prefs['read_file_metadata'])
self.filename_pattern = FilenamePattern(self)
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
def validate(self):
tmpl = preprocess_template(self.opt_template.text())
fa = {}
for x in FORMAT_ARG_DESCS.keys():
fa[x]=''
try:
tmpl.format(**fa)
except Exception, err:
error_dialog(self, _('Invalid template'),
'<p>'+_('The template %s is invalid:')%tmpl + \
'<br>'+str(err), show=True)
return False
return True
def save_settings(self):
if not self.validate():
return False
c = config()
for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf'):
c.set(x, getattr(self, 'opt_'+x).isChecked())
for x in ('formats', 'template', 'timefmt'):
c.set(x, unicode(getattr(self, 'opt_'+x).text()).strip())
self.opt_template.save_history('save_to_disk_template_history')
prefs['read_file_metadata'] = not bool(self.opt_read_metadata_from_filename.isChecked())
pattern = self.filename_pattern.commit()
prefs['filename_pattern'] = pattern
return True
if __name__ == '__main__':
from PyQt4.Qt import QApplication
app=QApplication([])
a = AddSave()
a.show()
app.exec_()
a.save_settings()

View File

@ -0,0 +1,178 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>TabWidget</class>
<widget class="QTabWidget" name="TabWidget">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>645</width>
<height>516</height>
</rect>
</property>
<property name="windowTitle">
<string>TabWidget</string>
</property>
<property name="currentIndex">
<number>0</number>
</property>
<widget class="QWidget" name="tab">
<attribute name="title">
<string>&amp;Adding books</string>
</attribute>
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QLabel" name="label_6">
<property name="text">
<string>Here you can control how calibre will read metadata from the files you add to it. calibre can either read metadata from the contents of the file, or from the filename.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="opt_read_metadata_from_filename">
<property name="text">
<string>Read metadata only from &amp;file name</string>
</property>
</widget>
</item>
<item>
<widget class="QGroupBox" name="metadata_box">
<property name="title">
<string>&amp;Configure metadata from file name</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>363</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab">
<attribute name="title">
<string>&amp;Saving books</string>
</attribute>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" colspan="2">
<widget class="QLabel" name="label">
<property name="text">
<string>Here you can control how calibre will save your books when you click the Save to Disk button:</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
<widget class="QCheckBox" name="opt_save_cover">
<property name="text">
<string>Save &amp;cover separately</string>
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_update_metadata">
<property name="text">
<string>Update &amp;metadata in saved copies</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_write_opf">
<property name="text">
<string>Save metadata in &amp;OPF file</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_asciiize">
<property name="text">
<string>Convert non-English characters to &amp;English equivalents</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Format &amp;dates as:</string>
</property>
<property name="buddy">
<cstring>opt_timefmt</cstring>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QLineEdit" name="opt_timefmt"/>
</item>
<item row="6" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>File &amp;formats to save:</string>
</property>
<property name="buddy">
<cstring>opt_formats</cstring>
</property>
</widget>
</item>
<item row="6" column="1">
<widget class="QLineEdit" name="opt_formats"/>
</item>
<item row="7" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Save &amp;template</string>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>By adjusting the template below, you can control what folders the files are saved in and what filenames they are given. You can use the / character to indicate sub-folders. Available metadata variables are described below. If a particular book does not have some metadata, the variable will be replaced by the empty string.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Available variables:</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QTextBrowser" name="template_variables"/>
</item>
<item row="1" column="0">
<widget class="HistoryBox" name="opt_template"/>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
</widget>
<customwidgets>
<customwidget>
<class>HistoryBox</class>
<extends>QComboBox</extends>
<header>calibre/gui2/dialogs/config/history.h</header>
</customwidget>
</customwidgets>
<resources/>
<connections/>
</ui>

View File

@ -15,7 +15,7 @@
<string>Preferences</string>
</property>
<property name="windowIcon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
</property>
<layout class="QGridLayout">
@ -115,7 +115,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/mimetypes/dir.svg</normaloff>:/images/mimetypes/dir.svg</iconset>
</property>
</widget>
@ -131,19 +131,6 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="pdf_metadata">
<property name="toolTip">
<string>If you disable this setting, metadata is guessed from the filename instead. This can be configured in the Advanced section.</string>
</property>
<property name="text">
<string>Read &amp;metadata from files</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<layout class="QGridLayout" name="gridLayout_2">
<item row="1" column="0">
@ -258,7 +245,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/arrow-up.svg</normaloff>:/images/arrow-up.svg</iconset>
</property>
</widget>
@ -282,7 +269,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/arrow-down.svg</normaloff>:/images/arrow-down.svg</iconset>
</property>
</widget>
@ -339,7 +326,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/plus.svg</normaloff>:/images/plus.svg</iconset>
</property>
</widget>
@ -366,7 +353,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/list_remove.svg</normaloff>:/images/list_remove.svg</iconset>
</property>
</widget>
@ -543,7 +530,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/arrow-up.svg</normaloff>:/images/arrow-up.svg</iconset>
</property>
</widget>
@ -567,7 +554,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/arrow-down.svg</normaloff>:/images/arrow-down.svg</iconset>
</property>
</widget>
@ -627,7 +614,7 @@
<string>&amp;Add email</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/plus.svg</normaloff>:/images/plus.svg</iconset>
</property>
<property name="iconSize">
@ -654,7 +641,7 @@
<string>&amp;Remove email</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/minus.svg</normaloff>:/images/minus.svg</iconset>
</property>
<property name="iconSize">
@ -687,6 +674,14 @@
</item>
</layout>
</widget>
<widget class="QWidget" name="page_7">
<layout class="QVBoxLayout" name="verticalLayout_9">
<item>
<widget class="AddSave" name="add_save">
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="page_2">
<layout class="QVBoxLayout">
<item>
@ -729,28 +724,6 @@
</item>
</layout>
</item>
<item>
<widget class="QGroupBox" name="metadata_box">
<property name="title">
<string>&amp;Metadata from file name</string>
</property>
<layout class="QVBoxLayout">
<item>
<spacer>
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="page_4">
@ -1020,7 +993,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../images.qrc">
<iconset resource="../../images.qrc">
<normaloff>:/images/document_open.svg</normaloff>:/images/document_open.svg</iconset>
</property>
</widget>
@ -1079,9 +1052,15 @@
<header>calibre/gui2/wizard/send_email.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>AddSave</class>
<extends>QTabWidget</extends>
<header>calibre/gui2/dialogs/config/add_save.h</header>
<container>1</container>
</customwidget>
</customwidgets>
<resources>
<include location="../images.qrc"/>
<include location="../../images.qrc"/>
</resources>
<connections>
<connection>

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QComboBox, QStringList, Qt
from calibre.gui2 import config as gui_conf
class HistoryBox(QComboBox):
def __init__(self, parent=None):
QComboBox.__init__(self, parent)
self.setEditable(True)
def initialize(self, opt_name, default, help=None):
history = gui_conf[opt_name]
if default not in history:
history.append(default)
self.addItems(QStringList(history))
self.setCurrentIndex(self.findText(default, Qt.MatchFixedString))
if help is not None:
self.setToolTip(help)
self.setWhatsThis(help)
def save_history(self, opt_name):
history = [unicode(self.itemText(i)) for i in range(self.count())]
ct = self.text()
if ct not in history:
history = [ct] + history
gui_conf[opt_name] = history[:10]
def text(self):
return unicode(self.currentText()).strip()

Binary file not shown.

After

Width:  |  Height:  |  Size: 858 B

View File

@ -1121,13 +1121,14 @@ class SearchBox(QLineEdit):
def normalize_state(self):
self.setText('')
self.setPalette(self.default_palette)
self.setStyleSheet('QLineEdit { background-color: white; }')
def clear_to_help(self):
self.setPalette(self.gray)
self.setText(self.help_text)
self.home(False)
self.initial_state = True
self.setStyleSheet("background-color: white")
self.setStyleSheet('QLineEdit { background-color: white; }')
self.emit(SIGNAL('cleared()'))
def clear(self):
@ -1135,8 +1136,8 @@ class SearchBox(QLineEdit):
self.emit(SIGNAL('search(PyQt_PyObject, PyQt_PyObject)'), '', False)
def search_done(self, ok):
col = 'rgba(0,255,0,25%)' if ok else 'rgb(255,0,0,25%)'
self.setStyleSheet('background-color: '+col)
col = 'rgba(0,255,0,20%)' if ok else 'rgb(255,0,0,20%)'
self.setStyleSheet('QLineEdit { background-color: %s; }' % col)
def keyPressEvent(self, event):
if self.initial_state:

View File

@ -1064,11 +1064,14 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if self.current_view() is self.library_view:
from calibre.gui2.add import Saver
from calibre.library.save_to_disk import config
opts = config().parse()
if single_format is not None:
opts.formats = single_format
if single_dir:
opts.template = '{title} - {authors}'
self._saver = Saver(self, self.library_view.model().db,
Dispatcher(self._books_saved), rows, path,
by_author=self.library_view.model().by_author,
single_dir=single_dir,
single_format=single_format,
Dispatcher(self._books_saved), rows, path, opts,
spare_server=self.spare_server)
else:
@ -1078,19 +1081,20 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def _books_saved(self, path, failures, error):
single_format = self._saver.worker.single_format
self._saver = None
if error:
return error_dialog(self, _('Error while saving'),
_('There was an error while saving.'),
error, show=True)
if failures and single_format:
single_format = single_format.upper()
if failures:
failures = [u'%s\n\t%s'%
(title, '\n\t'.join(err.splitlines())) for title, err in
failures]
warning_dialog(self, _('Could not save some books'),
_('Could not save some books') + ', ' +
(_('as the %s format is not available for them.')%single_format) +
_('Click the show details button to see which ones.'),
'\n'.join(failures), show=True)
u'\n\n'.join(failures), show=True)
QDesktopServices.openUrl(QUrl.fromLocalFile(path))
def books_saved(self, job):

View File

@ -6,7 +6,7 @@ import traceback
from PyQt4.QtCore import QThread, SIGNAL
import mechanize
from calibre.constants import __version__
from calibre.constants import __version__, iswindows, isosx
from calibre import browser
URL = 'http://status.calibre-ebook.com/latest'
@ -18,6 +18,8 @@ class CheckForUpdates(QThread):
br = browser()
req = mechanize.Request(URL)
req.add_header('CALIBRE_VERSION', __version__)
req.add_header('CALIBRE_OS',
'win' if iswindows else 'osx' if isosx else 'oth')
version = br.open(req).read().strip()
if version and version != __version__:
self.emit(SIGNAL('update_found(PyQt_PyObject)'), version)

View File

@ -11,7 +11,7 @@ import sys, os, cStringIO
from textwrap import TextWrapper
from urllib import quote
from calibre import terminal_controller, preferred_encoding
from calibre import terminal_controller, preferred_encoding, prints
from calibre.utils.config import OptionParser, prefs
try:
from calibre.utils.single_qt_application import send_message
@ -488,10 +488,18 @@ show_metadata command.
do_set_metadata(get_db(dbpath, opts), id, opf)
return 0
def do_export(db, ids, dir, single_dir, by_author):
def do_export(db, ids, dir, opts):
if ids is None:
ids = list(db.all_ids())
db.export_to_dir(dir, ids, byauthor=by_author, single_dir=single_dir, index_is_id=True)
from calibre.library.save_to_disk import save_to_disk
failures = save_to_disk(db, ids, dir, opts=opts)
if failures:
prints('Failed to save the following books:')
for id, title, tb in failures:
prints(str(id)+':', title)
prints('\t'+'\n\t'.join(tb.splitlines()))
prints(' ')
def command_export(args, dbpath):
parser = get_parser(_('''\
@ -507,8 +515,21 @@ an opf file). You can get id numbers from the list command.
help=(_('Export books to the specified directory. Default is')+' %default'))
parser.add_option('--single-dir', default=False, action='store_true',
help=_('Export all books into a single directory'))
parser.add_option('--by-author', default=False, action='store_true',
help=_('Create file names as author - title instead of title - author'))
from calibre.library.save_to_disk import config
c = config()
for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
opt = c.get_option(pref)
switch = '--dont-'+pref.replace('_', '-')
parser.add_option(switch, default=True, action='store_false',
help=opt.help+' '+_('Specifying this switch will turn '
'this behavior off.'), dest=pref)
for pref in ['timefmt', 'template', 'formats']:
opt = c.get_option(pref)
switch = '--'+pref
parser.add_option(switch, default=opt.default,
help=opt.help, dest=pref)
opts, args = parser.parse_args(sys.argv[1:]+args)
if (len(args) < 2 and not opts.all):
parser.print_help()
@ -517,7 +538,7 @@ an opf file). You can get id numbers from the list command.
return 1
ids = None if opts.all else map(int, args[1].split(','))
dir = os.path.abspath(os.path.expanduser(opts.to_dir))
do_export(get_db(dbpath, opts), ids, dir, opts.single_dir, opts.by_author)
do_export(get_db(dbpath, opts), ids, dir, opts)
return 0
def main(args=sys.argv):

View File

@ -850,20 +850,14 @@ class LibraryDatabase2(LibraryDatabase):
return None
ans = []
for format in formats:
_format = ('.' + format.lower()) if format else ''
if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
if self.format_abspath(id, format, index_is_id=True) is not None:
ans.append(format)
if not ans:
return None
return ','.join(ans)
def has_format(self, index, format, index_is_id=False):
id = index if index_is_id else self.id(index)
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
if name:
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
format = ('.' + format.lower()) if format else ''
path = os.path.join(path, name+format)
return os.access(path, os.R_OK|os.W_OK)
return False
return self.format_abspath(index, format, index_is_id) is not None
def format_abspath(self, index, format, index_is_id=False):
'Return absolute path to the ebook file of format `format`'
@ -872,9 +866,13 @@ class LibraryDatabase2(LibraryDatabase):
if name:
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
format = ('.' + format.lower()) if format else ''
path = os.path.join(path, name+format)
if os.access(path, os.R_OK|os.W_OK):
return path
fmt_path = os.path.join(path, name+format)
if os.path.exists(fmt_path):
return fmt_path
candidates = glob.glob(os.path.join(path, '*'+format))
if format and candidates and os.path.exists(candidates[0]):
shutil.copyfile(candidates[0], fmt_path)
return fmt_path
def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
'''
@ -886,9 +884,10 @@ class LibraryDatabase2(LibraryDatabase):
path = self.format_abspath(index, format, index_is_id=index_is_id)
if path is not None:
f = open(path, mode)
return f if as_file else f.read()
if self.has_format(index, format, index_is_id):
self.remove_format(id, format, index_is_id=True)
ret = f if as_file else f.read()
if not as_file:
f.close()
return ret
def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
path=None, notify=True):
@ -944,11 +943,9 @@ class LibraryDatabase2(LibraryDatabase):
def remove_format(self, index, format, index_is_id=False, notify=True):
id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, *self.path(id, index_is_id=True).split(os.sep))
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
if name:
ext = ('.' + format.lower()) if format else ''
path = os.path.join(path, name+ext)
path = self.format_abspath(id, format, index_is_id=True)
try:
delete_file(path)
except:
@ -1488,8 +1485,9 @@ class LibraryDatabase2(LibraryDatabase):
yield record
def all_ids(self):
x = FIELD_MAP['id']
for i in iter(self):
yield i['id']
yield i[x]
def get_data_as_dict(self, prefix=None, authors_as_string=False):
'''

View File

@ -0,0 +1,226 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, traceback, cStringIO
from calibre.utils.config import Config, StringConfig
from calibre.utils.filenames import shorten_components_to, supports_long_names, \
ascii_filename, sanitize_file_name
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.metadata.meta import set_metadata
from calibre.constants import preferred_encoding, filesystem_encoding
from calibre import strftime
DEFAULT_TEMPLATE = '{author_sort}/{title} - {authors}'
FORMAT_ARG_DESCS = dict(
title=_('The title'),
authors=_('The authors'),
author_sort=_('The author sort string'),
tags=_('The tags'),
series=_('The series'),
series_index=_('The series number'),
rating=_('The rating'),
isbn=_('The ISBN'),
publisher=_('The publisher'),
timestamp=_('The date'),
pubdate=_('The published date'),
id=_('The calibre internal id')
)
FORMAT_ARGS = {}
for x in FORMAT_ARG_DESCS:
FORMAT_ARGS[x] = ''
def config(defaults=None):
if defaults is None:
c = Config('save_to_disk', _('Options to control saving to disk'))
else:
c = StringConfig(defaults)
x = c.add_opt
x('update_metadata', default=True,
help=_('Normally, calibre will update the metadata in the saved files from what is'
' in the calibre library. Makes saving to disk slower.'))
x('write_opf', default=True,
help=_('Normally, calibre will write the metadata into a separate OPF file along with the'
' actual e-book files.'))
x('save_cover', default=True,
help=_('Normally, calibre will save the cover in a separate file along with the '
'actual e-book file(s).'))
x('formats', default='all',
help=_('Comma separated list of formats to save for each book.'
' By default all available books are saved.'))
x('template', default=DEFAULT_TEMPLATE,
help=_('The template to control the filename and directory structure of the saved files. '
'Default is "%s" which will save books into a per-author '
'subdirectory with filenames containing title and author. '
'Available controls are: {%s}')%(DEFAULT_TEMPLATE, ', '.join(FORMAT_ARGS)))
x('asciiize', default=True,
help=_('Normally, calibre will convert all non English characters into English equivalents '
'for the file names. '
'WARNING: If you turn this off, you may experience errors when '
'saving, depending on how well the filesystem you are saving '
'to supports unicode.'))
x('timefmt', default='%b, %Y',
help=_('The format in which to display dates. %d - day, %b - month, '
'%Y - year. Default is: %b, %Y'))
return c
def preprocess_template(template):
template = template.replace('//', '/')
template = template.replace('{author}', '{authors}')
template = template.replace('{tag}', '{tags}')
if not isinstance(template, unicode):
template = template.decode(preferred_encoding, 'replace')
return template
def get_components(template, mi, id, timefmt='%b %Y', length=250, sanitize_func=ascii_filename):
format_args = dict(**FORMAT_ARGS)
if mi.title:
format_args['title'] = mi.title
if mi.authors:
format_args['authors'] = mi.format_authors()
if mi.author_sort:
format_args['author_sort'] = mi.author_sort
if mi.tags:
format_args['tags'] = mi.format_tags()
if mi.series:
format_args['series'] = mi.series
if mi.series_index is not None:
format_args['series_index'] = mi.format_series_index()
if mi.rating is not None:
format_args['rating'] = mi.format_rating()
if mi.isbn:
format_args['isbn'] = mi.isbn
if mi.publisher:
format_args['publisher'] = mi.publisher
if hasattr(mi.timestamp, 'timetuple'):
format_args['timestamp'] = strftime(timefmt, mi.timestamp.timetuple())
if hasattr(mi.pubdate, 'timetuple'):
format_args['timestamp'] = strftime(timefmt, mi.pubdate.timetuple())
format_args['id'] = str(id)
components = [x.strip() for x in template.split('/') if x.strip()]
components = [x.format(**format_args).strip() for x in components]
components = [sanitize_func(x) for x in components if x]
if not components:
components = [str(id)]
components = [x.encode(filesystem_encoding, 'replace') if isinstance(x,
unicode) else x for x in components]
return shorten_components_to(length, components)
def save_book_to_disk(id, db, root, opts, length):
mi = db.get_metadata(id, index_is_id=True)
available_formats = db.formats(id, index_is_id=True)
if not available_formats:
available_formats = []
else:
available_formats = [x.lower().strip() for x in
available_formats.split(',')]
if opts.formats == 'all':
asked_formats = available_formats
else:
asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
formats = set(available_formats).intersection(set(asked_formats))
if not formats:
return True, id, mi.title
components = get_components(opts.template, mi, id, opts.timefmt, length,
ascii_filename if opts.asciiize else sanitize_file_name)
base_path = os.path.join(root, *components)
base_name = os.path.basename(base_path)
dirpath = os.path.dirname(base_path)
if not os.path.exists(dirpath):
os.makedirs(dirpath)
cdata = db.cover(id, index_is_id=True)
if opts.save_cover:
if cdata is not None:
with open(base_path+'.jpg', 'wb') as f:
f.write(cdata)
mi.cover = base_name+'.jpg'
else:
mi.cover = None
if opts.write_opf:
opf = metadata_to_opf(mi)
with open(base_path+'.opf', 'wb') as f:
f.write(opf)
if cdata is not None:
mi.cover_data = ('jpg', cdata)
mi.cover = None
written = False
for fmt in formats:
data = db.format(id, fmt, index_is_id=True)
if data is None:
continue
else:
written = True
if opts.update_metadata:
stream = cStringIO.StringIO()
stream.write(data)
stream.seek(0)
try:
set_metadata(stream, mi, fmt)
except:
traceback.print_exc()
stream.seek(0)
data = stream.read()
fmt_path = base_path+'.'+str(fmt)
with open(fmt_path, 'wb') as f:
f.write(data)
return not written, id, mi.title
def save_to_disk(db, ids, root, opts=None, callback=None):
'''
Save books from the database ``db`` to the path specified by ``root``.
:param:`ids` iterable of book ids to save from the database.
:param:`callback` is an optional callable that is called on after each
book is processed with the arguments: id, title, failed, traceback.
If the callback returns False, further processing is terminated and
the function returns.
:return: A list of failures. Each element of the list is a tuple
(id, title, traceback)
'''
if opts is None:
opts = config().parse()
if isinstance(root, unicode):
root = root.encode(filesystem_encoding)
root = os.path.abspath(root)
opts.template = preprocess_template(opts.template)
length = 1000 if supports_long_names(root) else 250
length -= len(root)
if length < 5:
raise ValueError('%r is too long.'%root)
failures = []
for x in ids:
tb = ''
try:
failed, id, title = save_book_to_disk(x, db, root, opts, length)
tb = _('Requested formats not available')
except:
failed, id, title = True, x, db.title(x, index_is_id=True)
tb = traceback.format_exc()
if failed:
failures.append((id, title, tb))
if callable(callback):
if not callback(int(id), title, failed, tb):
break
return failures

File diff suppressed because it is too large Load Diff

View File

@ -216,6 +216,14 @@ class OptionSet(object):
return True
return False
def get_option(self, name_or_option_object):
idx = self.preferences.index(name_or_option_object)
if idx > -1:
return self.preferences[idx]
for p in self.preferences:
if p.name == name_or_option_object:
return p
def add_group(self, name, description=''):
if name in self.group_list:
raise ValueError('A group by the name %s already exists in this set'%name)
@ -370,6 +378,8 @@ class ConfigInterface(object):
self.add_group = self.option_set.add_group
self.remove_opt = self.remove = self.option_set.remove_opt
self.parse_string = self.option_set.parse_string
self.get_option = self.option_set.get_option
self.preferences = self.option_set.preferences
def update(self, other):
self.option_set.update(other.option_set)
@ -381,6 +391,7 @@ class ConfigInterface(object):
def smart_update(self, opts1, opts2):
self.option_set.smart_update(opts1, opts2)
class Config(ConfigInterface):
'''
A file based configuration.

View File

@ -48,7 +48,7 @@ recipe_modules = ['recipe_' + r for r in (
'the_budget_fashionista', 'elperiodico_catalan',
'elperiodico_spanish', 'expansion_spanish', 'lavanguardia',
'marca', 'kellog_faculty', 'kellog_insight', 'noaa',
'7dias', 'buenosaireseconomico', 'huntechnet',
'7dias', 'buenosaireseconomico', 'huntechnet', 'cubadebate',
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',

View File

@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Cro24Sata(BasicNewsRecipe):
title = '24 Sata - Hr'
@ -22,18 +23,18 @@ class Cro24Sata(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -45,9 +46,11 @@ class Cro24Sata(BasicNewsRecipe):
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -17,53 +17,51 @@ class Ser24Sata(BasicNewsRecipe):
description = '24 sata portal vesti iz Srbije'
publisher = 'Ringier d.o.o.'
category = 'news, politics, entertainment, Serbia'
oldest_article = 1
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
return self.cleanup_image_tags(soup)
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)
def print_version(self, url):
article, sep, rest = url.partition('#')
article_base, sep2, article_id = article.partition('id=')
return 'http://www.24sata.co.rs/_print.php?id=' + article_id
article = url.partition('#')[0]
article_id = article.partition('id=')[2]
return 'http://www.24sata.rs/_print.php?id=' + article_id

View File

@ -14,23 +14,21 @@ class B92(BasicNewsRecipe):
description = 'Dnevne vesti iz Srbije i sveta'
publisher = 'B92'
category = 'news, politics, Serbia'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -39,6 +37,7 @@ class B92(BasicNewsRecipe):
remove_tags = [
dict(name='ul', attrs={'class':'comment-nav'})
,dict(name=['embed','link','base'] )
,dict(name='div', attrs={'class':'udokum'} )
]
feeds = [
@ -51,14 +50,19 @@ class B92(BasicNewsRecipe):
def preprocess_html(self, soup):
del soup.body['onload']
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(align=True):
del item['align']
for item in soup.findAll('font'):
item.name='p'
item.name='div'
if item.has_key('size'):
del item['size']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup

View File

@ -26,14 +26,12 @@ class Blic(BasicNewsRecipe):
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,14 +42,21 @@ class Blic(BasicNewsRecipe):
remove_tags = [dict(name=['object','link'])]
def print_version(self, url):
start_url, question, rest_url = url.partition('?')
rest_url = url.partition('?')[2]
return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)
def get_article_url(self, article):

View File

@ -17,24 +17,23 @@ class Borba(BasicNewsRecipe):
publisher = 'IP Novine Borba'
category = 'news, politics, Serbia'
language = _('Serbian')
oldest_article = 1
lang = _('sr-Latn-RS')
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
use_embedded_content = False
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
INDEX = u'http://www.borba.rs/'
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -60,14 +59,17 @@ class Borba(BasicNewsRecipe):
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def parse_index(self):

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
newyorker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class CubaDebate(BasicNewsRecipe):
title = 'CubaDebate'
__author__ = 'Darko Miletic'
description = 'Contra el Terorismo Mediatico'
oldest_article = 15
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'Cubadebate'
category = 'news, politics, Cuba'
encoding = 'utf-8'
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
conversion_options = {
'comments' : description
,'tags' : category
,'language' : 'es'
,'publisher' : publisher
,'pretty_print': True
}
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
remove_tags = [dict(name='link')]
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
def print_version(self, url):
return url + 'print/'
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -7,9 +7,10 @@ danas.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Danas(BasicNewsRecipe):
title = u'Danas'
title = 'Danas'
__author__ = 'Darko Miletic'
description = 'Vesti'
publisher = 'Danas d.o.o.'
@ -17,19 +18,19 @@ class Danas(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,8 +45,17 @@ class Danas(BasicNewsRecipe):
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup

View File

@ -9,6 +9,7 @@ dnevniavaz.ba
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class DnevniAvaz(BasicNewsRecipe):
title = 'Dnevni Avaz'
@ -25,16 +26,17 @@ class DnevniAvaz(BasicNewsRecipe):
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
lang = 'bs-BA'
language = _('Bosnian')
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -47,9 +49,20 @@ class DnevniAvaz(BasicNewsRecipe):
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
]
def replace_tagname(self,soup,tagname,tagid,newtagname):
headtag = soup.find(tagname,attrs={'id':tagid})
if headtag:
headtag.name = newtagname
return
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="bs-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
return soup
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
self.replace_tagname(soup,'div','fullarticle-leading','h3')
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ dnevnik.hr
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr'
@ -22,19 +23,18 @@ class DnevnikCro(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -51,10 +51,24 @@ class DnevnikCro(BasicNewsRecipe):
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ e-novine.com
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class E_novine(BasicNewsRecipe):
title = 'E-Novine'
@ -16,23 +17,22 @@ class E_novine(BasicNewsRecipe):
description = 'News from Serbia'
publisher = 'E-novine'
category = 'news, politics, Balcans'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1250'
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
lang = 'sr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -43,10 +43,10 @@ class E_novine(BasicNewsRecipe):
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})

View File

@ -9,6 +9,7 @@ glassrpske.com
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class GlasSrpske(BasicNewsRecipe):
title = 'Glas Srpske'
@ -21,7 +22,6 @@ class GlasSrpske(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
lang = 'sr-BA'
language = _('Serbian')
@ -29,13 +29,13 @@ class GlasSrpske(BasicNewsRecipe):
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -64,8 +64,8 @@ class GlasSrpske(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="sr-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
return soup
def parse_index(self):

View File

@ -8,6 +8,7 @@ www.guardian.co.uk
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Guardian(BasicNewsRecipe):
@ -16,14 +17,33 @@ class Guardian(BasicNewsRecipe):
language = _('English')
oldest_article = 7
max_articles_per_feed = 20
remove_javascript = True
timefmt = ' [%a, %d %b %Y]'
keep_only_tags = [
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
]
remove_tags = [
dict(name='div', attrs={'class':["video-content","videos-third-column"]}),
dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
dict(name='ul', attrs={'class':["pagination"]}),
dict(name='ul', attrs={'id':["content-actions"]}),
]
use_embedded_content = False
remove_tags_before = dict(id='main-article-info')
remove_tags_after = dict(id='article-wrapper')
remove_tags_after = dict(id='content')
no_stylesheets = True
extra_css = 'h2 {font-size: medium;} \n h1 {text-align: left;}'
extra_css = '''
.article-attributes{font-size: x-small; font-family:Arial,Helvetica,sans-serif;}
.h1{font-size: large ;font-family:georgia,serif; font-weight:bold;}
.stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;}
.caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
#article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;}
.main-article-info{font-family:Arial,Helvetica,sans-serif;}
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;}
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
('Front Page', 'http://www.guardian.co.uk/rss'),
@ -37,3 +57,21 @@ class Guardian(BasicNewsRecipe):
('Comment','http://www.guardian.co.uk/commentisfree/rss'),
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
for tag in soup.findAll(name=['ul','li']):
tag.name = 'div'
return soup

View File

@ -24,13 +24,13 @@ class HRT(BasicNewsRecipe):
lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -8,32 +8,32 @@ jutarnji.hr
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Jutarnji(BasicNewsRecipe):
title = u'Jutarnji'
__author__ = u'Darko Miletic'
description = u'Hrvatski portal'
title = 'Jutarnji'
__author__ = 'Darko Miletic'
description = 'Hrvatski portal'
publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
simultaneous_downloads = 2
delay = 1
language = _('Croatian')
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .vijestnaslov{font-size: x-large; font-weight: bold}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -59,11 +59,24 @@ class Jutarnji(BasicNewsRecipe):
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ nacional.hr
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class NacionalCro(BasicNewsRecipe):
title = 'Nacional - Hr'
@ -22,18 +23,19 @@ class NacionalCro(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -42,9 +44,12 @@ class NacionalCro(BasicNewsRecipe):
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -26,21 +26,19 @@ class Nin(BasicNewsRecipe):
INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true'
FEED = PREFIX + '/misc/rss.php?feed=RSS2.0'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -75,11 +73,19 @@ class Nin(BasicNewsRecipe):
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def get_article_url(self, article):
raw = article.get('link', None)
return raw.replace('.co.yu','.co.rs')

View File

@ -8,30 +8,30 @@ novosti.rs
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Novosti(BasicNewsRecipe):
title = u'Vecernje Novosti'
__author__ = u'Darko Miletic'
description = u'Vesti'
title = 'Vecernje Novosti'
__author__ = 'Darko Miletic'
description = 'Vesti'
publisher = 'Kompanija Novosti'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -41,8 +41,17 @@ class Novosti(BasicNewsRecipe):
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup

View File

@ -21,19 +21,18 @@ class Nspm(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l'
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [
@ -51,28 +50,18 @@ class Nspm(BasicNewsRecipe):
def print_version(self, url):
return url.replace('.html','/stampa.html')
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup):
lng = 'sr-Latn-RS'
soup.html['xml:lang'] = lng
soup.html['lang'] = lng
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag:
ftag['content'] = lng
for item in soup.findAll(style=True):
del item['style']
return self.cleanup_image_tags(soup)
return self.adeify_images(soup)

View File

@ -8,6 +8,7 @@ pescanik.net
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Pescanik(BasicNewsRecipe):
title = 'Pescanik'
@ -19,20 +20,18 @@ class Pescanik(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'utf8'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
encoding = 'utf-8'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,18 +39,27 @@ class Pescanik(BasicNewsRecipe):
remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link','img','h4','ul'])
,dict(name=['object','link','h4','ul'])
]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
feeds = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url):
nurl = url.replace('/index.php','/index2.php')
return nurl + '&pop=1&page=0'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)

View File

@ -19,22 +19,20 @@ class Pobjeda(BasicNewsRecipe):
publisher = 'Pobjeda a.d.'
category = 'news, politics, Montenegro'
no_stylesheets = True
remove_javascript = True
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
use_embedded_content = False
language = _('Serbian')
language = _('Montenegrin')
lang = 'sr-Latn-Me'
INDEX = u'http://www.pobjeda.co.me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
politika.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Politika(BasicNewsRecipe):
title = u'Politika Online'
title = 'Politika Online'
__author__ = 'Darko Miletic'
description = 'Najstariji dnevni list na Balkanu'
publisher = 'Politika novine i Magazini d.o.o'
@ -21,15 +22,17 @@ class Politika(BasicNewsRecipe):
remove_javascript = True
encoding = 'utf8'
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -55,11 +58,13 @@ class Politika(BasicNewsRecipe):
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag.has_key('align'):
del ftag['align']
return soup
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ pressonline.rs
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class PressOnline(BasicNewsRecipe):
title = 'Press Online'
@ -19,20 +20,21 @@ class PressOnline(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
encoding = 'utf-8'
use_embedded_content = True
cover_url = 'http://www.pressonline.rs/img/logo.gif'
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -57,10 +59,8 @@ class PressOnline(BasicNewsRecipe):
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for img in soup.findAll('img', align=True):
del img['align']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
return self.adeify_images(soup)

View File

@ -24,13 +24,13 @@ class RTS(BasicNewsRecipe):
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
spiegel.de
'''
@ -9,21 +9,25 @@ spiegel.de
from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_int(BasicNewsRecipe):
title = u'Spiegel Online International'
title = 'Spiegel Online International'
__author__ = 'Darko Miletic'
description = "News and POV from Europe's largest newsmagazine"
oldest_article = 7
max_articles_per_feed = 100
language = _('English')
language = _('English')
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Germany'
, '--publisher', 'SPIEGEL ONLINE GmbH'
]
publisher = 'SPIEGEL ONLINE GmbH'
category = 'news, politics, Germany'
lang = 'en'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : lang
,'publisher' : publisher
,'pretty_print': True
}
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})

View File

@ -7,6 +7,7 @@ tanjug.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Tanjug(BasicNewsRecipe):
title = 'Tanjug'
@ -14,21 +15,22 @@ class Tanjug(BasicNewsRecipe):
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
publisher = 'Tanjug'
category = 'news, politics, Serbia'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = True
encoding = 'utf-8'
lang = 'sr-Latn-RS'
language = _('Serbian')
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -37,7 +39,7 @@ class Tanjug(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang' ] = self.lang
soup.html['dir' ] = "ltr"
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
soup.head.insert(0,mtag)
return soup
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
return self.adeify_images(soup)

View File

@ -20,14 +20,15 @@ class Twitchfilm(BasicNewsRecipe):
publisher = 'Twitch'
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
language = _('English')
lang = 'en-US'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
@ -36,6 +37,6 @@ class Twitchfilm(BasicNewsRecipe):
def preprocess_html(self, soup):
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
soup.head.insert(0,mtag)
soup.html['lang'] = 'en-US'
return soup
soup.html['lang'] = self.lang
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ www.vecernji.hr
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class VecernjiList(BasicNewsRecipe):
title = 'Vecernji List'
@ -18,23 +19,23 @@ class VecernjiList(BasicNewsRecipe):
category = 'news, politics, Croatia'
oldest_article = 2
max_articles_per_feed = 100
delay = 4
delay = 1
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -46,13 +47,16 @@ class VecernjiList(BasicNewsRecipe):
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)
def print_version(self, url):
return url.replace('/index.do','/print.do')
artid = url.rpartition('-')[2]
return 'http://www.vecernji.hr/index.php?cmd=show_clanak&action=print_popup&clanak_id='+artid

View File

@ -20,22 +20,19 @@ class Vijesti(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 150
no_stylesheets = True
remove_javascript = True
encoding = 'cp1250'
cover_url = 'http://www.vijesti.me/img/logo.gif'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
language = _('Montenegrin')
lang ='sr-Latn-Me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -22,22 +22,20 @@ class Vreme(BasicNewsRecipe):
needs_subscription = True
INDEX = 'http://www.vreme.com'
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
remove_javascript = True
use_embedded_content = False
encoding = 'utf-8'
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -84,12 +82,21 @@ class Vreme(BasicNewsRecipe):
del soup.body['text' ]
del soup.body['bgcolor']
del soup.body['onload' ]
for item in soup.findAll(face=True):
del item['face']
for item in soup.findAll(size=True):
del item['size']
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)