This commit is contained in:
GRiker 2013-03-03 08:46:49 -08:00
commit 61084c2392
13 changed files with 332 additions and 159 deletions

View File

@ -37,7 +37,7 @@ nbproject/
calibre_plugins/
recipes/.git
recipes/.gitignore
recipes/README
recipes/README.md
recipes/katalog_egazeciarz.recipe
recipes/tv_axnscifi.recipe
recipes/tv_comedycentral.recipe

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>'
__copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
fronda.pl
'''
@ -68,6 +68,7 @@ class Fronda(BasicNewsRecipe):
article_url = 'http://www.fronda.pl' + article_a['href']
article_title = self.tag_to_string(article_a)
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
if articles[genName]:
feeds.append((genName, articles[genName]))
return feeds
@ -82,8 +83,10 @@ class Fronda(BasicNewsRecipe):
dict(name='h3', attrs={'class':'block-header article comments'}),
dict(name='ul', attrs={'class':'comment-list'}),
dict(name='ul', attrs={'class':'category'}),
dict(name='ul', attrs={'class':'tag-list'}),
dict(name='p', attrs={'id':'comments-disclaimer'}),
dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}),
dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
dict(name='div', attrs={'class':'related-articles content'}),
dict(name='div', attrs={'id':'comment-form'})
]

View File

@ -2,7 +2,8 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
2013, Tomasz Długosz, tomek3d@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@ -12,9 +13,9 @@ import re
class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek'
title = u'Gość niedzielny'
description = 'Weekly magazine'
__author__ = 'Piotr Kontek, Tomasz Długosz'
title = u'Gość Niedzielny'
description = 'Ogólnopolski tygodnik katolicki'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
@ -38,17 +39,25 @@ class GN(BasicNewsRecipe):
first = True
for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
if first and p.find('img') != None:
article = article + '<p>'
article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article = article + '<font size="-2">'
article += '<p>'
article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article += '<font size="-2">'
for s in p.findAll('span'):
article = article + self.tag_to_string(s)
article = article + '</font></p>'
article += self.tag_to_string(s)
article += '</font></p>'
else:
article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
first = False
limiter = main_section.find('p', attrs={'class' : 'limiter'})
if limiter:
article += str(limiter)
html = unicode(title) + unicode(authors) + unicode(article)
html = unicode(title)
#sometimes authors are not filled in:
if authors:
html += unicode(authors) + unicode(article)
else:
html += unicode(article)
self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
self.temp_files[-1].write(html)
@ -65,7 +74,8 @@ class GN(BasicNewsRecipe):
if img != None:
a = img.parent
self.EDITION = a['href']
self.title = img['alt']
#this was preventing kindles from moving old issues to 'Back Issues' category:
#self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
if year != date.today().year or not first:
break

View File

@ -30,6 +30,11 @@ class tvn24(BasicNewsRecipe):
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:

View File

@ -217,6 +217,8 @@ class Cache(object):
field.series_field = self.fields[name[:-len('_index')]]
elif name == 'series_index':
field.series_field = self.fields['series']
elif name == 'authors':
field.author_sort_field = self.fields['author_sort']
@read_api
def field_for(self, name, book_id, default_value=None):

View File

@ -402,6 +402,13 @@ class AuthorsField(ManyToManyField):
def category_sort_value(self, item_id, book_ids, lang_map):
return self.table.asort_map[item_id]
def db_author_sort_for_book(self, book_id):
return self.author_sort_field.for_book(book_id)
def author_sort_for_book(self, book_id):
return ' & '.join(self.table.asort_map[k] for k in
self.table.book_col_map[book_id])
class FormatsField(ManyToManyField):
def for_book(self, book_id, default_value=None):

View File

@ -168,7 +168,7 @@ class AuthorsTable(ManyToManyTable):
self.asort_map = {}
for row in db.conn.execute(
'SELECT id, name, sort, link FROM authors'):
self.id_map[row[0]] = row[1]
self.id_map[row[0]] = self.unserialize(row[1])
self.asort_map[row[0]] = (row[2] if row[2] else
author_to_author_sort(row[1]))
self.alink_map[row[0]] = row[3]

View File

@ -203,10 +203,63 @@ class WritingTest(BaseTest):
# }}}
def test_many_many_basic(self): # {{{
'Test the different code paths for writing to a many-many field'
cl = self.cloned_library
cache = self.init_cache(cl)
ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field
# Tags
ae(sf('#tags', {1:cache.field_for('tags', 1), 2:cache.field_for('tags', 2)}),
{1, 2})
for name in ('tags', '#tags'):
f = cache.fields[name]
af(sf(name, {1:('tag one', 'News')}, allow_case_change=False))
ae(sf(name, {1:'tag one, News'}), {1, 2})
ae(sf(name, {3:('tag two', 'sep,sep2')}), {2, 3})
ae(len(f.table.id_map), 4)
ae(sf(name, {1:None}), set([1]))
cache2 = self.init_cache(cl)
for c in (cache, cache2):
ae(c.field_for(name, 3), ('tag two', 'sep;sep2'))
ae(len(c.fields[name].table.id_map), 3)
ae(len(c.fields[name].table.id_map), 3)
ae(c.field_for(name, 1), ())
ae(c.field_for(name, 2), ('tag one', 'tag two'))
del cache2
# Authors
ae(sf('#authors', {k:cache.field_for('authors', k) for k in (1,2,3)}),
{1,2,3})
for name in ('authors', '#authors'):
f = cache.fields[name]
ae(len(f.table.id_map), 3)
af(cache.set_field(name, {3:None if name == 'authors' else 'Unknown'}))
ae(cache.set_field(name, {3:'Kovid Goyal & Divok Layog'}), set([3]))
ae(cache.set_field(name, {1:'', 2:'An, Author'}), {1,2})
cache2 = self.init_cache(cl)
for c in (cache, cache2):
ae(len(c.fields[name].table.id_map), 4 if name =='authors' else 3)
ae(c.field_for(name, 3), ('Kovid Goyal', 'Divok Layog'))
ae(c.field_for(name, 2), ('An, Author',))
ae(c.field_for(name, 1), ('Unknown',) if name=='authors' else ())
ae(c.field_for('author_sort', 1), 'Unknown')
ae(c.field_for('author_sort', 2), 'An, Author')
ae(c.field_for('author_sort', 3), 'Goyal, Kovid & Layog, Divok')
del cache2
ae(cache.set_field('authors', {1:'KoviD GoyaL'}), {1, 3})
ae(cache.field_for('author_sort', 1), 'GoyaL, KoviD')
ae(cache.field_for('author_sort', 3), 'GoyaL, KoviD & Layog, Divok')
# TODO: identifiers, languages
# }}}
def tests():
return unittest.TestLoader().loadTestsFromTestCase(WritingTest)
tl = unittest.TestLoader()
# return tl.loadTestsFromName('writing.WritingTest.test_many_many_basic')
return tl.loadTestsFromTestCase(WritingTest)
def run():
unittest.TextTestRunner(verbosity=2).run(tests())

View File

@ -12,8 +12,11 @@ from functools import partial
from datetime import datetime
from calibre.constants import preferred_encoding, ispy3
from calibre.ebooks.metadata import author_to_author_sort
from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE,
isoformat)
from calibre.utils.icu import strcmp
if ispy3:
unicode = str
@ -45,15 +48,20 @@ def get_series_values(val):
pass
return (val, None)
def multiple_text(sep, x):
if x is None:
def multiple_text(sep, ui_sep, x):
if not x:
return ()
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replce')
if isinstance(x, unicode):
x = x.split(sep)
x = (y.strip() for y in x if y.strip())
return (' '.join(y.split()) for y in x if y)
else:
x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
else y for y in x)
ui_sep = ui_sep.strip()
repsep = ',' if ui_sep == ';' else ';'
x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip())
return tuple(' '.join(y.split()) for y in x if y)
def adapt_datetime(x):
if isinstance(x, (unicode, bytes)):
@ -92,7 +100,8 @@ def get_adapter(name, metadata):
dt = metadata['datatype']
if dt == 'text':
if metadata['is_multiple']:
ans = partial(multiple_text, metadata['is_multiple']['ui_to_list'])
m = metadata['is_multiple']
ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui'])
else:
ans = single_text
elif dt == 'series':
@ -132,7 +141,7 @@ def get_adapter(name, metadata):
def one_one_in_books(book_id_val_map, db, field, *args):
'Set a one-one field in the books table'
if book_id_val_map:
sequence = tuple((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
db.conn.executemany(
'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence)
field.table.book_col_map.update(book_id_val_map)
@ -150,7 +159,7 @@ def one_one_in_other(book_id_val_map, db, field, *args):
if updated:
db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%(
field.metadata['table'], field.metadata['column']),
tuple((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
field.table.book_col_map.update(updated)
return set(book_id_val_map)
@ -178,6 +187,44 @@ def safe_lower(x):
except (TypeError, ValueError, KeyError, AttributeError):
return x
def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
case_changes, val_map, is_authors=False):
''' Get the db id for the value val. If val does not exist in the db it is
inserted into the db. '''
kval = kmap(val)
item_id = rid_map.get(kval, None)
if item_id is None:
if is_authors:
aus = author_to_author_sort(val)
db.conn.execute('INSERT INTO authors(name,sort) VALUES (?,?)',
(val.replace(',', '|'), aus))
else:
db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
m['table'], m['column']), (val,))
item_id = rid_map[kval] = db.conn.last_insert_rowid()
table.id_map[item_id] = val
table.col_book_map[item_id] = set()
if is_authors:
table.asort_map[item_id] = aus
table.alink_map[item_id] = ''
elif allow_case_change and val != table.id_map[item_id]:
case_changes[item_id] = val
val_map[val] = item_id
def change_case(case_changes, dirtied, db, table, m, is_authors=False):
if is_authors:
vals = ((val.replace(',', '|'), item_id) for item_id, val in
case_changes.iteritems())
else:
vals = ((val, item_id) for item_id, val in case_changes.iteritems())
db.conn.executemany(
'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals)
for item_id, val in case_changes.iteritems():
table.id_map[item_id] = val
dirtied.update(table.col_book_map[item_id])
if is_authors:
table.asort_map[item_id] = author_to_author_sort(val)
def many_one(book_id_val_map, db, field, allow_case_change, *args):
dirtied = set()
m = field.metadata
@ -185,108 +232,61 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
dt = m['datatype']
is_custom_series = dt == 'series' and table.name.startswith('#')
# Map values to their canonical form for later comparison
# Map values to db ids, including any new values
kmap = safe_lower if dt in {'text', 'series'} else lambda x:x
rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
val_map = {None:None}
case_changes = {}
for val in book_id_val_map.itervalues():
if val is not None:
get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
case_changes, val_map)
if case_changes:
change_case(case_changes, dirtied, db, table, m)
book_id_item_id_map = {k:val_map[v] for k, v in book_id_val_map.iteritems()}
# Ignore those items whose value is the same as the current value
no_changes = {k:nval for k, nval in book_id_val_map.iteritems() if
kmap(nval) == kmap(field.for_book(k, default_value=None))}
for book_id in no_changes:
del book_id_val_map[book_id]
book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
if v != table.book_col_map.get(k, None)}
dirtied |= set(book_id_item_id_map)
# If we are allowed case changes check that none of the ignored items are
# case changes. If they are, update the item's case in the db.
if allow_case_change:
for book_id, nval in no_changes.iteritems():
if nval is not None and nval != field.for_book(
book_id, default_value=None):
# Change of case
item_id = table.book_col_map[book_id]
db.conn.execute('UPDATE %s SET %s=? WHERE id=?'%(
m['table'], m['column']), (nval, item_id))
table.id_map[item_id] = nval
dirtied |= table.col_book_map[item_id]
deleted = {k:v for k, v in book_id_val_map.iteritems() if v is None}
updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None}
link_table = table.link_table
# Update the book->col and col->book maps
deleted = set()
updated = {}
for book_id, item_id in book_id_item_id_map.iteritems():
old_item_id = table.book_col_map.get(book_id, None)
if old_item_id is not None:
table.col_book_map[old_item_id].discard(book_id)
if item_id is None:
table.book_col_map.pop(book_id, None)
deleted.add(book_id)
else:
table.book_col_map[book_id] = item_id
table.col_book_map[item_id].add(book_id)
updated[book_id] = item_id
# Update the db link table
if deleted:
db.conn.executemany('DELETE FROM %s WHERE book=?'%link_table,
tuple((book_id,) for book_id in deleted))
for book_id in deleted:
item_id = table.book_col_map.pop(book_id, None)
if item_id is not None:
table.col_book_map[item_id].discard(book_id)
dirtied |= set(deleted)
db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
((k,) for k in deleted))
if updated:
rid_map = {kmap(v):k for k, v in table.id_map.iteritems()}
book_id_item_id_map = {k:rid_map.get(kmap(v), None) for k, v in
book_id_val_map.iteritems()}
# items that dont yet exist
new_items = {k:v for k, v in updated.iteritems() if
book_id_item_id_map[k] is None}
# items that already exist
changed_items = {k:book_id_item_id_map[k] for k in updated if
book_id_item_id_map[k] is not None}
def sql_update(imap):
sql = (
'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
if is_custom_series else
'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
)
db.conn.executemany(sql.format(link_table, m['link_column']),
tuple((book_id, book_id, item_id) for book_id, item_id in
imap.iteritems()))
if new_items:
item_ids = {}
val_map = {}
for val in set(new_items.itervalues()):
lval = kmap(val)
if lval in val_map:
item_id = val_map[lval]
else:
db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
m['table'], m['column']), (val,))
item_id = val_map[lval] = db.conn.last_insert_rowid()
item_ids[val] = item_id
table.id_map[item_id] = val
imap = {}
for book_id, val in new_items.iteritems():
item_id = item_ids[val]
old_item_id = table.book_col_map.get(book_id, None)
if old_item_id is not None:
table.col_book_map[old_item_id].discard(book_id)
if item_id not in table.col_book_map:
table.col_book_map[item_id] = set()
table.col_book_map[item_id].add(book_id)
table.book_col_map[book_id] = imap[book_id] = item_id
sql_update(imap)
dirtied |= set(imap)
if changed_items:
imap = {}
sql_update(changed_items)
for book_id, item_id in changed_items.iteritems():
old_item_id = table.book_col_map.get(book_id, None)
if old_item_id != item_id:
table.book_col_map[book_id] = item_id
table.col_book_map[item_id].add(book_id)
if old_item_id is not None:
table.col_book_map[old_item_id].discard(book_id)
imap[book_id] = item_id
sql_update(imap)
dirtied |= set(imap)
db.conn.executemany(sql.format(table.link_table, m['link_column']),
((book_id, book_id, item_id) for book_id, item_id in
updated.iteritems()))
# Remove no longer used items
remove = {item_id for item_id in table.id_map if not
table.col_book_map.get(item_id, False)}
if remove:
db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
tuple((item_id,) for item_id in remove))
((item_id,) for item_id in remove))
for item_id in remove:
del table.id_map[item_id]
table.col_book_map.pop(item_id, None)
@ -294,6 +294,96 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
return dirtied
# }}}
# Many-Many fields {{{
def many_many(book_id_val_map, db, field, allow_case_change, *args):
dirtied = set()
m = field.metadata
table = field.table
dt = m['datatype']
is_authors = field.name == 'authors'
# Map values to db ids, including any new values
kmap = safe_lower if dt == 'text' else lambda x:x
rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
val_map = {}
case_changes = {}
for vals in book_id_val_map.itervalues():
for val in vals:
get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
case_changes, val_map, is_authors=is_authors)
if case_changes:
change_case(case_changes, dirtied, db, table, m, is_authors=is_authors)
if is_authors:
for item_id, val in case_changes.iteritems():
for book_id in table.col_book_map[item_id]:
current_sort = field.db_author_sort_for_book(book_id)
new_sort = field.author_sort_for_book(book_id)
if strcmp(current_sort, new_sort) == 0:
# The sort strings differ only by case, update the db
# sort
field.author_sort_field.writer.set_books({book_id:new_sort}, db)
book_id_item_id_map = {k:tuple(val_map[v] for v in vals)
for k, vals in book_id_val_map.iteritems()}
# Ignore those items whose value is the same as the current value
book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
if v != table.book_col_map.get(k, None)}
dirtied |= set(book_id_item_id_map)
# Update the book->col and col->book maps
deleted = set()
updated = {}
for book_id, item_ids in book_id_item_id_map.iteritems():
old_item_ids = table.book_col_map.get(book_id, None)
if old_item_ids:
for old_item_id in old_item_ids:
table.col_book_map[old_item_id].discard(book_id)
if item_ids:
table.book_col_map[book_id] = item_ids
for item_id in item_ids:
table.col_book_map[item_id].add(book_id)
updated[book_id] = item_ids
else:
table.book_col_map.pop(book_id, None)
deleted.add(book_id)
# Update the db link table
if deleted:
db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
((k,) for k in deleted))
if updated:
vals = (
(book_id, val) for book_id, vals in updated.iteritems()
for val in vals
)
db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
((k,) for k in updated))
db.conn.executemany('INSERT INTO {0}(book,{1}) VALUES(?, ?)'.format(
table.link_table, m['link_column']), vals)
if is_authors:
aus_map = {book_id:field.author_sort_for_book(book_id) for book_id
in updated}
field.author_sort_field.writer.set_books(aus_map, db)
# Remove no longer used items
remove = {item_id for item_id in table.id_map if not
table.col_book_map.get(item_id, False)}
if remove:
db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
((item_id,) for item_id in remove))
for item_id in remove:
del table.id_map[item_id]
table.col_book_map.pop(item_id, None)
if is_authors:
table.asort_map.pop(item_id, None)
table.alink_map.pop(item_id, None)
return dirtied
# }}}
def dummy(book_id_val_map, *args):
return set()
@ -311,9 +401,7 @@ class Writer(object):
elif self.name[0] == '#' and self.name.endswith('_index'):
self.set_books_func = custom_series_index
elif field.is_many_many:
# TODO: Implement this
pass
# TODO: Remember to change commas to | when writing authors to sqlite
self.set_books_func = many_many
elif field.is_many:
self.set_books_func = (self.set_books_for_enum if dt ==
'enumeration' else many_one)

View File

@ -7,9 +7,10 @@ __docformat__ = 'restructuredtext en'
import cStringIO, ctypes, datetime, os, platform, re, shutil, sys, tempfile, time
from calibre.constants import __appname__, __version__, cache_dir, DEBUG as CALIBRE_DEBUG
from calibre import fit_image, confirm_config_name, strftime as _strftime
from calibre.constants import isosx, iswindows, cache_dir as _cache_dir
from calibre.constants import (
__appname__, __version__, DEBUG as CALIBRE_DEBUG, isosx, iswindows,
cache_dir as _cache_dir)
from calibre.devices.errors import OpenFeedback, UserFeedback
from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre.devices.interface import DevicePlugin
@ -290,8 +291,6 @@ class ITUNES(DriverBase):
# Properties
cached_books = {}
cache_dir = os.path.join(_cache_dir(), 'itunes')
archive_path = os.path.join(cache_dir, "thumbs.zip")
calibre_library_path = prefs['library_path']
description_prefix = "added by calibre"
ejected = False
@ -312,7 +311,7 @@ class ITUNES(DriverBase):
@property
def cache_dir(self):
return os.path.join(cache_dir(), 'itunes')
return os.path.join(_cache_dir(), 'itunes')
@property
def archive_path(self):

View File

@ -88,7 +88,7 @@ class Container(object):
self.mime_map[name] = guess_type('a.opf')
if not hasattr(self, 'opf_name'):
raise InvalidBook('Book has no OPF file')
raise InvalidBook('Could not locate opf file: %r'%opfpath)
# Update mime map with data from the OPF
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):

View File

@ -10,6 +10,7 @@ assumes a prior call to the flatcss transform.
'''
import os, math, functools, collections, re, copy
from collections import OrderedDict
from lxml.etree import XPath as _XPath
from lxml import etree
@ -106,8 +107,7 @@ class Split(object):
continue
for elem in selector(body[0]):
if elem not in body:
if before:
elem.set('pb_before', '1')
elem.set('pb_before', '1' if before else '0')
page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()):
@ -134,14 +134,12 @@ class Split(object):
id = 'calibre_pb_%d'%i
x.set('id', id)
xp = XPath('//*[@id=%r]'%id)
page_breaks_.append((xp,
x.get('pb_before', False)))
page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
page_break_ids.append(id)
for elem in item.data.iter():
elem.attrib.pop('pb_order', False)
if elem.get('pb_before', False):
elem.attrib.pop('pb_before')
elem.attrib.pop('pb_before', False)
return page_breaks_, page_break_ids
@ -223,22 +221,27 @@ class FlowSplitter(object):
self.commit()
def split_on_page_breaks(self, orig_tree):
ordered_ids = []
for elem in orig_tree.xpath('//*[@id]'):
id = elem.get('id')
if id in self.page_break_ids:
ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
ordered_ids = OrderedDict()
all_page_break_ids = frozenset(self.page_break_ids)
for elem_id in orig_tree.xpath('//*/@id'):
if elem_id in all_page_break_ids:
ordered_ids[elem_id] = self.page_breaks[
self.page_break_ids.index(elem_id)]
self.trees = []
tree = orig_tree
for pattern, before in ordered_ids:
self.trees = [orig_tree]
while ordered_ids:
pb_id, (pattern, before) = ordered_ids.iteritems().next()
del ordered_ids[pb_id]
for i in xrange(len(self.trees)-1, -1, -1):
tree = self.trees[i]
elem = pattern(tree)
if elem:
self.log.debug('\t\tSplitting on page-break at %s'%
self.log.debug('\t\tSplitting on page-break at id=%s'%
elem[0].get('id'))
before, after = self.do_split(tree, elem[0], before)
self.trees.append(before)
tree = after
before_tree, after_tree = self.do_split(tree, elem[0], before)
self.trees[i:i+1] = [before_tree, after_tree]
break
self.trees.append(tree)
trees, ids = [], set([])
for tree in self.trees:
@ -289,7 +292,6 @@ class FlowSplitter(object):
if self.opts.verbose > 3 and npath != path:
self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath))
return npath
def do_split(self, tree, split_point, before):
@ -304,6 +306,10 @@ class FlowSplitter(object):
root = tree.getroot()
root2 = tree2.getroot()
body, body2 = map(self.get_body, (root, root2))
if before:
# We cannot adjust for after since moving an after split point to a
# parent will cause breakage if the parent contains any content
# after the original split point
path = self.adjust_split_point(root, path)
split_point = root.xpath(path)[0]
split_point2 = root2.xpath(path)[0]

View File

@ -171,7 +171,7 @@ class ZshCompleter(object): # {{{
arg = ''
if opt.takes_value():
arg = ':"%s":'%h
if opt.dest in {'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
if opt.dest in {'extract_to', 'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
arg += "'_path_files -/'"
elif opt.choices:
arg += "(%s)"%'|'.join(opt.choices)