Sync to trunk.

This commit is contained in:
John Schember 2011-11-26 18:46:28 -05:00
commit d88183ed29
8 changed files with 85 additions and 32 deletions

View File

@ -39,7 +39,9 @@ class TheIndependentNew(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
remove_tags =[ remove_tags =[
dict(attrs={'id' : ['RelatedArtTag','renderBiography']}), dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
dict(attrs={'class' : ['autoplay','openBiogPopup']}) dict(attrs={'class' : ['autoplay','openBiogPopup']}),
dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
dict(attrs={'style' : re.compile('.*')}),
] ]
keep_only_tags =[dict(attrs={'id':'main'})] keep_only_tags =[dict(attrs={'id':'main'})]
@ -113,6 +115,7 @@ class TheIndependentNew(BasicNewsRecipe):
return None return None
items_to_extract = [] items_to_extract = []
slideshow_elements = []
for item in soup.findAll(attrs={'class' : re.compile("widget.*")}): for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
remove = True remove = True
@ -131,6 +134,7 @@ class TheIndependentNew(BasicNewsRecipe):
if (pattern.search(item['class'])) is not None: if (pattern.search(item['class'])) is not None:
if self._FETCH_IMAGES: if self._FETCH_IMAGES:
remove = False remove = False
slideshow_elements.append(item)
else: else:
remove = True remove = True
@ -148,28 +152,29 @@ class TheIndependentNew(BasicNewsRecipe):
items_to_extract = [] items_to_extract = []
if self._FETCH_IMAGES: if self._FETCH_IMAGES:
for item in soup.findAll('a',attrs={'href' : re.compile('.*')}): for element in slideshow_elements:
if item.img is not None: for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
#use full size image if item.img is not None:
img = item.findNext('img') #use full size image
img = item.findNext('img')
img['src'] = item['href'] img['src'] = item['href']
#insert caption if available #insert caption if available
if img['title'] is not None and (len(img['title']) > 1): if img.get('title') and (len(img['title']) > 1):
tag = Tag(soup,'h3') tag = Tag(soup,'h3')
text = NavigableString(img['title']) text = NavigableString(img['title'])
tag.insert(0,text) tag.insert(0,text)
#picture before text #picture before text
img.extract() img.extract()
item.insert(0,img) item.insert(0,img)
item.insert(1,tag) item.insert(1,tag)
# remove link # remove link
item.name = "div" item.name = "div"
item["class"]='image' item["class"]='image'
del item["href"] del item["href"]
#remove empty subtitles #remove empty subtitles
@ -283,7 +288,7 @@ class TheIndependentNew(BasicNewsRecipe):
items_to_extract = [] items_to_extract = []
for item in soup.findAll('div', attrs={'class' : 'image'}): for item in soup.findAll('div', attrs={'class' : 'image'}):
img = item.findNext('img') img = item.findNext('img')
if img is not None and img['src'] is not None: if img and img.get('src'):
# broken images still point to remote url # broken images still point to remote url
pattern = re.compile('http://www.independent.co.uk.*') pattern = re.compile('http://www.independent.co.uk.*')
if pattern.match(img["src"]) is not None: if pattern.match(img["src"]) is not None:

View File

@ -232,7 +232,9 @@ per_language_title_sort_articles = {
'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+', 'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
r'Eine\s+', r'Einen\s+', ), r'Eine\s+', r'Einen\s+', ),
# Dutch # Dutch
'nld' : (r'De\s+', r'Het\s+', r'Een\s+', ), 'nld' : (r'De\s+', r'Het\s+', r'Een\s+', r"'n\s+", r"'s\s+", r'Ene\s+',
r'Ener\s+', r'Enes\s+', r'Den\s+', r'Der\s+', r'Des\s+',
r"'t\s+"),
# Swedish # Swedish
'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ), 'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
# Turkish # Turkish
@ -242,6 +244,8 @@ per_language_title_sort_articles = {
# Greek # Greek
'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+', 'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ), r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
# Hungarian
'hun' : (r'A\s+', 'Az\s+', 'Egy\s+',),
} }
default_language_for_title_sort = None default_language_for_title_sort = None
title_sort_articles=r'^(A|The|An)\s+' title_sort_articles=r'^(A|The|An)\s+'

View File

@ -109,12 +109,16 @@ def get_title_sort_pat(lang=None):
q = canonicalize_lang(q) if q else q q = canonicalize_lang(q) if q else q
data = tweaks['per_language_title_sort_articles'] data = tweaks['per_language_title_sort_articles']
ans = data.get(q, None) ans = data.get(q, None)
if ans is None: try:
ans = data['eng'] ans = frozenset(ans) if ans else frozenset(data['eng'])
ans = frozenset(ans + data['eng']) except:
ans = frozenset((r'A\s+', r'The\s+', r'An\s+'))
ans = '|'.join(ans) ans = '|'.join(ans)
ans = '^(%s)'%ans ans = '^(%s)'%ans
ans = re.compile(ans, re.IGNORECASE) try:
ans = re.compile(ans, re.IGNORECASE)
except:
ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE)
_title_pats[lang] = ans _title_pats[lang] = ans
return ans return ans

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -1409,8 +1409,8 @@ void PictureFlow::dataChanged() { d->dataChanged(); }
void PictureFlow::emitcurrentChanged(int index) { emit currentChanged(index); } void PictureFlow::emitcurrentChanged(int index) { emit currentChanged(index); }
int FlowImages::count() { return 0; } int FlowImages::count() { return 0; }
QImage FlowImages::image(int index) { index=0; return QImage(); } QImage FlowImages::image(int index) { Q_UNUSED(index); return QImage(); }
QString FlowImages::caption(int index) {index=0; return QString(); } QString FlowImages::caption(int index) { Q_UNUSED(index); return QString(); }
QString FlowImages::subtitle(int index) {index=0; return QString(); } QString FlowImages::subtitle(int index) { Q_UNUSED(index); return QString(); }
// }}} // }}}

View File

@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs
from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
from calibre.utils.localization import canonicalize_lang, lang_map from calibre.utils.localization import canonicalize_lang, lang_map, get_udc
from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre import prints from calibre import prints
@ -217,6 +217,7 @@ class ResultCache(SearchQueryParser): # {{{
self.FIELD_MAP = FIELD_MAP self.FIELD_MAP = FIELD_MAP
self.db_prefs = db_prefs self.db_prefs = db_prefs
self.composites = {} self.composites = {}
self.udc = get_udc()
for key in field_metadata: for key in field_metadata:
if field_metadata[key]['datatype'] == 'composite': if field_metadata[key]['datatype'] == 'composite':
self.composites[field_metadata[key]['rec_index']] = key self.composites[field_metadata[key]['rec_index']] = key
@ -261,6 +262,15 @@ class ResultCache(SearchQueryParser): # {{{
# Search functions {{{ # Search functions {{{
def ascii_name(self, name):
try:
ans = self.udc.decode(name)
if ans == name:
ans = False
except:
ans = False
return ans
def universal_set(self): def universal_set(self):
return set([i[0] for i in self._data if i is not None]) return set([i[0] for i in self._data if i is not None])
@ -734,6 +744,8 @@ class ResultCache(SearchQueryParser): # {{{
else: else:
q = query q = query
au_loc = self.FIELD_MAP['authors']
for id_ in candidates: for id_ in candidates:
item = self._data[id_] item = self._data[id_]
if item is None: continue if item is None: continue
@ -776,6 +788,9 @@ class ResultCache(SearchQueryParser): # {{{
if loc not in exclude_fields: # time for text matching if loc not in exclude_fields: # time for text matching
if is_multiple_cols[loc] is not None: if is_multiple_cols[loc] is not None:
vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
if loc == au_loc:
vals += filter(None, map(self.ascii_name,
vals))
else: else:
vals = [item[loc]] ### make into list to make _match happy vals = [item[loc]] ### make into list to make _match happy
if _match(q, vals, matchkind): if _match(q, vals, matchkind):

View File

@ -342,7 +342,8 @@ def remove_option_parser():
Remove the books identified by ids from the database. ids should be a comma separated \ Remove the books identified by ids from the database. ids should be a comma separated \
list of id numbers (you can get id numbers by using the list command). For example, \ list of id numbers (you can get id numbers by using the list command). For example, \
23,34,57-85 23,34,57-85 (when specifying a range, the last number in the range is not
included).
''')) '''))
def command_remove(args, dbpath): def command_remove(args, dbpath):

View File

@ -13,7 +13,7 @@ import threading, random
from itertools import repeat from itertools import repeat
from math import ceil from math import ceil
from calibre import prints from calibre import prints, force_unicode
from calibre.ebooks.metadata import (title_sort, author_to_author_sort, from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
string_to_authors, authors_to_string, get_title_sort_pat) string_to_authors, authors_to_string, get_title_sort_pat)
from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.opf2 import metadata_to_opf
@ -33,7 +33,7 @@ from calibre import isbytestring
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp
from calibre.utils.config import prefs, tweaks, from_json, to_json from calibre.utils.config import prefs, tweaks, from_json, to_json
from calibre.utils.icu import sort_key, strcmp from calibre.utils.icu import sort_key, strcmp, lower
from calibre.utils.search_query_parser import saved_searches, set_saved_searches from calibre.utils.search_query_parser import saved_searches, set_saved_searches
from calibre.ebooks import BOOK_EXTENSIONS, check_ebook_format from calibre.ebooks import BOOK_EXTENSIONS, check_ebook_format
from calibre.utils.magick.draw import save_cover_data_to from calibre.utils.magick.draw import save_cover_data_to
@ -1003,6 +1003,19 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False)) return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
return False return False
def books_with_same_title(self, mi, all_matches=True):
title = mi.title
ans = set()
if title:
title = lower(force_unicode(title))
for book_id in self.all_ids():
x = self.title(book_id, index_is_id=True)
if lower(x) == title:
ans.add(book_id)
if not all_matches:
break
return ans
def find_identical_books(self, mi): def find_identical_books(self, mi):
fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if
isinstance(pat, basestring) else pat, repl) for pat, repl in isinstance(pat, basestring) else pat, repl) for pat, repl in