KG revisions

This commit is contained in:
GRiker 2010-02-20 11:36:43 -07:00
commit 10da9fccb6
6 changed files with 159 additions and 83 deletions

View File

@ -1,17 +1,41 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HoustonChronicle(BasicNewsRecipe): class HoustonChronicle(BasicNewsRecipe):
title = u'The Houston Chronicle' title = u'The Houston Chronicle'
description = 'News from Houston, Texas' description = 'News from Houston, Texas'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal and Sujata Raman'
language = 'en' language = 'en'
timefmt = ' [%a, %d %b, %Y]' timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True no_stylesheets = True
keep_only_tags = [dict(id=['story-head', 'story'])] keep_only_tags = [
remove_tags = [dict(id=['share-module', 'resource-box', dict(id=['story-head', 'story'])
'resource-box-header'])] ]
remove_tags = [
dict(id=['share-module', 'resource-box',
'resource-box-header'])
]
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
'''
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.chron.com/news/') soup = self.index_to_soup('http://www.chron.com/news/')
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds

View File

@ -7,10 +7,11 @@ sfgate.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class SanFranciscoChronicle(BasicNewsRecipe): class SanFranciscoChronicle(BasicNewsRecipe):
title = u'San Francisco Chronicle' title = u'San Francisco Chronicle'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic and Sujata Raman'
description = u'San Francisco news' description = u'San Francisco news'
language = 'en' language = 'en'
@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_tags_before = {'class':'articleheadings'}
remove_tags_after = dict(name='div', attrs={'id':'articlecontent' })
remove_tags = [ remove_tags_before = {'id':'printheader'}
dict(name='div', attrs={'class':'tools tools_top'})
,dict(name='div', attrs={'id':'articlebox' }) remove_tags = [
] dict(name='div',attrs={'id':'printheader'})
,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
,dict(name='div',attrs={'id':'footer'})
]
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
.byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
.date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
.dtlcomment{font-style:italic;}
.georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
'''
feeds = [ feeds = [
(u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml') (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
] ]
def print_version(self,url):
url= url +"&type=printable"
return url
def get_article_url(self, article):
print str(article['title_detail']['value'])
url = article.get('guid',None)
url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
if "Presented By:" in str(article['title_detail']['value']):
url = ''
return url

View File

@ -4,8 +4,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, shutil, time
import shutil
from calibre.devices.errors import PathError from calibre.devices.errors import PathError
@ -55,6 +54,7 @@ class CLI(object):
shutil.copyfileobj(infile, dest) shutil.copyfileobj(infile, dest)
except IOError: except IOError:
print 'WARNING: First attempt to send file to device failed' print 'WARNING: First attempt to send file to device failed'
time.sleep(0.2)
infile.seek(0) infile.seek(0)
dest.seek(0) dest.seek(0)
dest.truncate() dest.truncate()

View File

@ -131,9 +131,9 @@ class RtfTokenParser():
if isString(self.tokens[i].name, "\\'"): if isString(self.tokens[i].name, "\\'"):
i = i + 1 i = i + 1
if not isinstance(self.tokens[i], tokenData): if not isinstance(self.tokens[i], tokenData):
raise BaseException('Error: token8bitChar without data.') raise Exception('Error: token8bitChar without data.')
if len(self.tokens[i].data) < 2: if len(self.tokens[i].data) < 2:
raise BaseException('Error: token8bitChar without data.') raise Exception('Error: token8bitChar without data.')
newTokens.append(token8bitChar(self.tokens[i].data[0:2])) newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
if len(self.tokens[i].data) > 2: if len(self.tokens[i].data) > 2:
newTokens.append(tokenData(self.tokens[i].data[2:])) newTokens.append(tokenData(self.tokens[i].data[2:]))
@ -195,7 +195,7 @@ class RtfTokenParser():
i = i + 1 i = i + 1
j = j + 1 j = j + 1
continue continue
raise BaseException('Error: incorect utf replacement.') raise Exception('Error: incorect utf replacement.')
#calibre rtf2xml does not support utfreplace #calibre rtf2xml does not support utfreplace
replace = [] replace = []
@ -248,7 +248,7 @@ class RtfTokenizer():
if isChar(self.rtfData[i], '\\'): if isChar(self.rtfData[i], '\\'):
if i + 1 >= len(self.rtfData): if i + 1 >= len(self.rtfData):
raise BaseException('Error: Control character found at the end of the document.') raise Exception('Error: Control character found at the end of the document.')
if lastDataStart > -1: if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@ -269,7 +269,7 @@ class RtfTokenizer():
i = i + 1 i = i + 1
if not consumed: if not consumed:
raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart)) raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
#we have numeric argument before delimiter #we have numeric argument before delimiter
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]): if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
@ -283,10 +283,10 @@ class RtfTokenizer():
l = l + 1 l = l + 1
i = i + 1 i = i + 1
if l > 10 : if l > 10 :
raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart]) raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
if not consumed: if not consumed:
raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart]) raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
separator = '' separator = ''
if isChar(self.rtfData[i], ' '): if isChar(self.rtfData[i], ' '):

View File

@ -1,10 +1,11 @@
from calibre.ebooks.metadata import authors_to_string
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, textwrap, traceback, re, shutil import os, textwrap, traceback, re, shutil
from operator import attrgetter from operator import attrgetter
from math import cos, sin, pi from math import cos, sin, pi
from contextlib import closing
from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \ from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \
QItemDelegate, QPainterPath, QLinearGradient, QBrush, \ QItemDelegate, QPainterPath, QLinearGradient, QBrush, \
QPen, QStyle, QPainter, \ QPen, QStyle, QPainter, \
@ -22,7 +23,8 @@ from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
from calibre.gui2.widgets import EnLineEdit, TagsLineEdit from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.ebooks.metadata import string_to_authors, fmt_sidx from calibre.ebooks.metadata import string_to_authors, fmt_sidx, \
authors_to_string
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
from calibre.utils.date import dt_factory, qt_to_dt, isoformat from calibre.utils.date import dt_factory, qt_to_dt, isoformat
@ -469,9 +471,10 @@ class BooksModel(QAbstractTableModel):
break break
if format is not None: if format is not None:
pt = PersistentTemporaryFile(suffix='.'+format) pt = PersistentTemporaryFile(suffix='.'+format)
src = self.db.format(id, format, index_is_id=True, as_file=True) with closing(self.db.format(id, format, index_is_id=True,
shutil.copyfileobj(src, pt) as_file=True)) as src:
pt.flush() shutil.copyfileobj(src, pt)
pt.flush()
pt.seek(0) pt.seek(0)
if set_metadata: if set_metadata:
_set_metadata(pt, self.db.get_metadata(id, get_cover=True, index_is_id=True), _set_metadata(pt, self.db.get_metadata(id, get_cover=True, index_is_id=True),
@ -505,8 +508,10 @@ class BooksModel(QAbstractTableModel):
break break
if format is not None: if format is not None:
pt = PersistentTemporaryFile(suffix='.'+format) pt = PersistentTemporaryFile(suffix='.'+format)
pt.write(self.db.format(row, format)) with closing(self.db.format(row, format, as_file=True)) as src:
pt.flush() shutil.copyfileobj(src, pt)
pt.flush()
pt.seek(0)
if set_metadata: if set_metadata:
_set_metadata(pt, self.db.get_metadata(row, get_cover=True), _set_metadata(pt, self.db.get_metadata(row, get_cover=True),
format) format)

View File

@ -6,14 +6,14 @@ __docformat__ = 'restructuredtext en'
''' '''
A parser for search queries with a syntax very similar to that used by A parser for search queries with a syntax very similar to that used by
the Google search engine. the Google search engine.
For details on the search query syntax see :class:`SearchQueryParser`. For details on the search query syntax see :class:`SearchQueryParser`.
To use the parser, subclass :class:`SearchQueryParser` and implement the To use the parser, subclass :class:`SearchQueryParser` and implement the
methods :method:`SearchQueryParser.universal_set` and methods :method:`SearchQueryParser.universal_set` and
:method:`SearchQueryParser.get_matches`. See for example :class:`Tester`. :method:`SearchQueryParser.get_matches`. See for example :class:`Tester`.
If this module is run, it will perform a series of unit tests. If this module is run, it will perform a series of unit tests.
''' '''
import sys, string, operator import sys, string, operator
@ -24,26 +24,26 @@ from calibre.utils.pyparsing import Keyword, Group, Forward, CharsNotIn, Suppres
class SearchQueryParser(object): class SearchQueryParser(object):
''' '''
Parses a search query. Parses a search query.
A search query consists of tokens. The tokens can be combined using A search query consists of tokens. The tokens can be combined using
the `or`, `and` and `not` operators as well as grouped using parentheses. the `or`, `and` and `not` operators as well as grouped using parentheses.
When no operator is specified between two tokens, `and` is assumed. When no operator is specified between two tokens, `and` is assumed.
Each token is a string of the form `location:query`. `location` is a string Each token is a string of the form `location:query`. `location` is a string
from :member:`LOCATIONS`. It is optional. If it is omitted, it is assumed to from :member:`LOCATIONS`. It is optional. If it is omitted, it is assumed to
be `all`. `query` is an arbitrary string that must not contain parentheses. be `all`. `query` is an arbitrary string that must not contain parentheses.
If it contains whitespace, it should be quoted by enclosing it in `"` marks. If it contains whitespace, it should be quoted by enclosing it in `"` marks.
Examples:: Examples::
* `Asimov` [search for the string "Asimov" in location `all`] * `Asimov` [search for the string "Asimov" in location `all`]
* `comments:"This is a good book"` [search for "This is a good book" in `comments`] * `comments:"This is a good book"` [search for "This is a good book" in `comments`]
* `author:Asimov tag:unread` [search for books by Asimov that have been tagged as unread] * `author:Asimov tag:unread` [search for books by Asimov that have been tagged as unread]
* `author:Asimov or author:Hardy` [search for books by Asimov or Hardy] * `author:Asimov or author:Hardy` [search for books by Asimov or Hardy]
* `(author:Asimov or author:Hardy) and not tag:read` [search for unread books by Asimov or Hardy] * `(author:Asimov or author:Hardy) and not tag:read` [search for unread books by Asimov or Hardy]
''' '''
LOCATIONS = [ LOCATIONS = [
'tag', 'tag',
'title', 'title',
@ -57,12 +57,12 @@ class SearchQueryParser(object):
'isbn', 'isbn',
'all', 'all',
] ]
@staticmethod @staticmethod
def run_tests(parser, result, tests): def run_tests(parser, result, tests):
failed = [] failed = []
for test in tests: for test in tests:
print '\tTesting:', test[0], print '\tTesting:', test[0],
res = parser.parseString(test[0]) res = parser.parseString(test[0])
if list(res.get(result, None)) == test[1]: if list(res.get(result, None)) == test[1]:
print 'OK' print 'OK'
@ -70,7 +70,7 @@ class SearchQueryParser(object):
print 'FAILED:', 'Expected:', test[1], 'Got:', list(res.get(result, None)) print 'FAILED:', 'Expected:', test[1], 'Got:', list(res.get(result, None))
failed.append(test[0]) failed.append(test[0])
return failed return failed
def __init__(self, test=False): def __init__(self, test=False):
self._tests_failed = False self._tests_failed = False
# Define a token # Define a token
@ -95,50 +95,50 @@ class SearchQueryParser(object):
('title:"one two"', ['title', 'one two']), ('title:"one two"', ['title', 'one two']),
) )
) )
Or = Forward() Or = Forward()
Parenthesis = Group( Parenthesis = Group(
Suppress('(') + Or + Suppress(')') Suppress('(') + Or + Suppress(')')
).setResultsName('parenthesis') | Token ).setResultsName('parenthesis') | Token
Not = Forward() Not = Forward()
Not << (Group( Not << (Group(
Suppress(Keyword("not", caseless=True)) + Not Suppress(Keyword("not", caseless=True)) + Not
).setResultsName("not") | Parenthesis) ).setResultsName("not") | Parenthesis)
And = Forward() And = Forward()
And << (Group( And << (Group(
Not + Suppress(Keyword("and", caseless=True)) + And Not + Suppress(Keyword("and", caseless=True)) + And
).setResultsName("and") | Group( ).setResultsName("and") | Group(
Not + OneOrMore(~oneOf("and or") + And) Not + OneOrMore(~oneOf("and or", caseless=True) + And)
).setResultsName("and") | Not) ).setResultsName("and") | Not)
Or << (Group( Or << (Group(
And + Suppress(Keyword("or", caseless=True)) + Or And + Suppress(Keyword("or", caseless=True)) + Or
).setResultsName("or") | And) ).setResultsName("or") | And)
if test: if test:
Or.validate() Or.validate()
self._tests_failed = bool(failed) self._tests_failed = bool(failed)
self._parser = Or self._parser = Or
#self._parser.setDebug(True) #self._parser.setDebug(True)
#self.parse('(tolstoy)') #self.parse('(tolstoy)')
self._parser.setDebug(False) self._parser.setDebug(False)
def parse(self, query): def parse(self, query):
res = self._parser.parseString(query)[0] res = self._parser.parseString(query)[0]
return self.evaluate(res) return self.evaluate(res)
def method(self, group_name): def method(self, group_name):
return getattr(self, 'evaluate_'+group_name) return getattr(self, 'evaluate_'+group_name)
def evaluate(self, parse_result): def evaluate(self, parse_result):
return self.method(parse_result.getName())(parse_result) return self.method(parse_result.getName())(parse_result)
def evaluate_and(self, argument): def evaluate_and(self, argument):
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
@ -150,27 +150,27 @@ class SearchQueryParser(object):
def evaluate_parenthesis(self, argument): def evaluate_parenthesis(self, argument):
return self.evaluate(argument[0]) return self.evaluate(argument[0])
def evaluate_token(self, argument): def evaluate_token(self, argument):
return self.get_matches(argument[0], argument[1]) return self.get_matches(argument[0], argument[1])
def get_matches(self, location, query): def get_matches(self, location, query):
''' '''
Should return the set of matches for :param:'location` and :param:`query`. Should return the set of matches for :param:'location` and :param:`query`.
:param:`location` is one of the items in :member:`SearchQueryParser.LOCATIONS`. :param:`location` is one of the items in :member:`SearchQueryParser.LOCATIONS`.
:param:`query` is a string literal. :param:`query` is a string literal.
''' '''
return set([]) return set([])
def universal_set(self): def universal_set(self):
''' '''
Should return the set of all matches. Should return the set of all matches.
''' '''
return set([]) return set([])
class Tester(SearchQueryParser): class Tester(SearchQueryParser):
texts = { texts = {
1: [u'Eugenie Grandet', u'Honor\xe9 de Balzac', u'manybooks.net', u'lrf'], 1: [u'Eugenie Grandet', u'Honor\xe9 de Balzac', u'manybooks.net', u'lrf'],
2: [u'Fanny Hill', u'John Cleland', u'manybooks.net', u'lrf'], 2: [u'Fanny Hill', u'John Cleland', u'manybooks.net', u'lrf'],
@ -459,30 +459,30 @@ class Tester(SearchQueryParser):
u'Washington Square Press', u'Washington Square Press',
u'lrf,rar'] u'lrf,rar']
} }
tests = { tests = {
'Dysfunction' : set([348]), 'Dysfunction' : set([348]),
'title:Dysfunction' : set([348]), 'title:Dysfunction' : set([348]),
'title:Dysfunction or author:Laurie': set([348, 444]), 'title:Dysfunction OR author:Laurie': set([348, 444]),
'(tag:txt or tag:pdf)': set([33, 258, 354, 305, 242, 51, 55, 56, 154]), '(tag:txt or tag:pdf)': set([33, 258, 354, 305, 242, 51, 55, 56, 154]),
'(tag:txt or tag:pdf) and author:Tolstoy': set([55, 56]), '(tag:txt OR tag:pdf) and author:Tolstoy': set([55, 56]),
'Tolstoy txt': set([55, 56]), 'Tolstoy txt': set([55, 56]),
'Hamilton Amsterdam' : set([]), 'Hamilton Amsterdam' : set([]),
u'Beär' : set([91]), u'Beär' : set([91]),
'dysfunc or tolstoy': set([348, 55, 56]), 'dysfunc or tolstoy': set([348, 55, 56]),
'tag:txt and not tolstoy': set([33, 258, 354, 305, 242, 154]), 'tag:txt AND NOT tolstoy': set([33, 258, 354, 305, 242, 154]),
'not tag:lrf' : set([305]), 'not tag:lrf' : set([305]),
'london:thames': set([13]), 'london:thames': set([13]),
'publisher:london:thames': set([13]), 'publisher:london:thames': set([13]),
'"(1977)"': set([13]), '"(1977)"': set([13]),
} }
fields = {'title':0, 'author':1, 'publisher':2, 'tag':3} fields = {'title':0, 'author':1, 'publisher':2, 'tag':3}
_universal_set = set(texts.keys()) _universal_set = set(texts.keys())
def universal_set(self): def universal_set(self):
return self._universal_set return self._universal_set
def get_matches(self, location, query): def get_matches(self, location, query):
location = location.lower() location = location.lower()
if location in self.fields.keys(): if location in self.fields.keys():
@ -491,19 +491,19 @@ class Tester(SearchQueryParser):
getter = lambda y: ''.join(x if x else '' for x in y) getter = lambda y: ''.join(x if x else '' for x in y)
else: else:
getter = lambda x: '' getter = lambda x: ''
if not query: if not query:
return set([]) return set([])
query = query.lower() query = query.lower()
return set(key for key, val in self.texts.items() \ return set(key for key, val in self.texts.items() \
if query and query in getattr(getter(val), 'lower', lambda : '')()) if query and query in getattr(getter(val), 'lower', lambda : '')())
def run_tests(self): def run_tests(self):
failed = [] failed = []
for query in self.tests.keys(): for query in self.tests.keys():
print 'Testing query:', query, print 'Testing query:', query,
res = self.parse(query) res = self.parse(query)
if res != self.tests[query]: if res != self.tests[query]:
print 'FAILED', 'Expected:', self.tests[query], 'Got:', res print 'FAILED', 'Expected:', self.tests[query], 'Got:', res
@ -511,7 +511,7 @@ class Tester(SearchQueryParser):
else: else:
print 'OK' print 'OK'
return failed return failed
def main(args=sys.argv): def main(args=sys.argv):
tester = Tester(test=True) tester = Tester(test=True)
@ -519,7 +519,7 @@ def main(args=sys.argv):
if tester._tests_failed or failed: if tester._tests_failed or failed:
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
return 1 return 1
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':