KG revisions

This commit is contained in:
GRiker 2010-02-20 11:36:43 -07:00
commit 10da9fccb6
6 changed files with 159 additions and 83 deletions

View File

@ -1,17 +1,41 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HoustonChronicle(BasicNewsRecipe): class HoustonChronicle(BasicNewsRecipe):
title = u'The Houston Chronicle' title = u'The Houston Chronicle'
description = 'News from Houston, Texas' description = 'News from Houston, Texas'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal and Sujata Raman'
language = 'en' language = 'en'
timefmt = ' [%a, %d %b, %Y]' timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True no_stylesheets = True
keep_only_tags = [dict(id=['story-head', 'story'])] keep_only_tags = [
remove_tags = [dict(id=['share-module', 'resource-box', dict(id=['story-head', 'story'])
'resource-box-header'])] ]
remove_tags = [
dict(id=['share-module', 'resource-box',
'resource-box-header'])
]
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
'''
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.chron.com/news/') soup = self.index_to_soup('http://www.chron.com/news/')
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds

View File

@ -7,10 +7,11 @@ sfgate.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class SanFranciscoChronicle(BasicNewsRecipe): class SanFranciscoChronicle(BasicNewsRecipe):
title = u'San Francisco Chronicle' title = u'San Francisco Chronicle'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic and Sujata Raman'
description = u'San Francisco news' description = u'San Francisco news'
language = 'en' language = 'en'
@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_tags_before = {'class':'articleheadings'}
remove_tags_after = dict(name='div', attrs={'id':'articlecontent' })
remove_tags_before = {'id':'printheader'}
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':'tools tools_top'}) dict(name='div',attrs={'id':'printheader'})
,dict(name='div', attrs={'id':'articlebox' }) ,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
,dict(name='div',attrs={'id':'footer'})
] ]
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
.byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
.date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
.dtlcomment{font-style:italic;}
.georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
'''
feeds = [ feeds = [
(u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml') (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
] ]
def print_version(self,url):
url= url +"&type=printable"
return url
def get_article_url(self, article):
print str(article['title_detail']['value'])
url = article.get('guid',None)
url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
if "Presented By:" in str(article['title_detail']['value']):
url = ''
return url

View File

@ -4,8 +4,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, shutil, time
import shutil
from calibre.devices.errors import PathError from calibre.devices.errors import PathError
@ -55,6 +54,7 @@ class CLI(object):
shutil.copyfileobj(infile, dest) shutil.copyfileobj(infile, dest)
except IOError: except IOError:
print 'WARNING: First attempt to send file to device failed' print 'WARNING: First attempt to send file to device failed'
time.sleep(0.2)
infile.seek(0) infile.seek(0)
dest.seek(0) dest.seek(0)
dest.truncate() dest.truncate()

View File

@ -131,9 +131,9 @@ class RtfTokenParser():
if isString(self.tokens[i].name, "\\'"): if isString(self.tokens[i].name, "\\'"):
i = i + 1 i = i + 1
if not isinstance(self.tokens[i], tokenData): if not isinstance(self.tokens[i], tokenData):
raise BaseException('Error: token8bitChar without data.') raise Exception('Error: token8bitChar without data.')
if len(self.tokens[i].data) < 2: if len(self.tokens[i].data) < 2:
raise BaseException('Error: token8bitChar without data.') raise Exception('Error: token8bitChar without data.')
newTokens.append(token8bitChar(self.tokens[i].data[0:2])) newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
if len(self.tokens[i].data) > 2: if len(self.tokens[i].data) > 2:
newTokens.append(tokenData(self.tokens[i].data[2:])) newTokens.append(tokenData(self.tokens[i].data[2:]))
@ -195,7 +195,7 @@ class RtfTokenParser():
i = i + 1 i = i + 1
j = j + 1 j = j + 1
continue continue
raise BaseException('Error: incorect utf replacement.') raise Exception('Error: incorect utf replacement.')
#calibre rtf2xml does not support utfreplace #calibre rtf2xml does not support utfreplace
replace = [] replace = []
@ -248,7 +248,7 @@ class RtfTokenizer():
if isChar(self.rtfData[i], '\\'): if isChar(self.rtfData[i], '\\'):
if i + 1 >= len(self.rtfData): if i + 1 >= len(self.rtfData):
raise BaseException('Error: Control character found at the end of the document.') raise Exception('Error: Control character found at the end of the document.')
if lastDataStart > -1: if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@ -269,7 +269,7 @@ class RtfTokenizer():
i = i + 1 i = i + 1
if not consumed: if not consumed:
raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart)) raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
#we have numeric argument before delimiter #we have numeric argument before delimiter
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]): if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
@ -283,10 +283,10 @@ class RtfTokenizer():
l = l + 1 l = l + 1
i = i + 1 i = i + 1
if l > 10 : if l > 10 :
raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart]) raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
if not consumed: if not consumed:
raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart]) raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
separator = '' separator = ''
if isChar(self.rtfData[i], ' '): if isChar(self.rtfData[i], ' '):

View File

@ -1,10 +1,11 @@
from calibre.ebooks.metadata import authors_to_string
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, textwrap, traceback, re, shutil import os, textwrap, traceback, re, shutil
from operator import attrgetter from operator import attrgetter
from math import cos, sin, pi from math import cos, sin, pi
from contextlib import closing
from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \ from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \
QItemDelegate, QPainterPath, QLinearGradient, QBrush, \ QItemDelegate, QPainterPath, QLinearGradient, QBrush, \
QPen, QStyle, QPainter, \ QPen, QStyle, QPainter, \
@ -22,7 +23,8 @@ from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
from calibre.gui2.widgets import EnLineEdit, TagsLineEdit from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.ebooks.metadata import string_to_authors, fmt_sidx from calibre.ebooks.metadata import string_to_authors, fmt_sidx, \
authors_to_string
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
from calibre.utils.date import dt_factory, qt_to_dt, isoformat from calibre.utils.date import dt_factory, qt_to_dt, isoformat
@ -469,7 +471,8 @@ class BooksModel(QAbstractTableModel):
break break
if format is not None: if format is not None:
pt = PersistentTemporaryFile(suffix='.'+format) pt = PersistentTemporaryFile(suffix='.'+format)
src = self.db.format(id, format, index_is_id=True, as_file=True) with closing(self.db.format(id, format, index_is_id=True,
as_file=True)) as src:
shutil.copyfileobj(src, pt) shutil.copyfileobj(src, pt)
pt.flush() pt.flush()
pt.seek(0) pt.seek(0)
@ -505,8 +508,10 @@ class BooksModel(QAbstractTableModel):
break break
if format is not None: if format is not None:
pt = PersistentTemporaryFile(suffix='.'+format) pt = PersistentTemporaryFile(suffix='.'+format)
pt.write(self.db.format(row, format)) with closing(self.db.format(row, format, as_file=True)) as src:
shutil.copyfileobj(src, pt)
pt.flush() pt.flush()
pt.seek(0)
if set_metadata: if set_metadata:
_set_metadata(pt, self.db.get_metadata(row, get_cover=True), _set_metadata(pt, self.db.get_metadata(row, get_cover=True),
format) format)

View File

@ -112,7 +112,7 @@ class SearchQueryParser(object):
And << (Group( And << (Group(
Not + Suppress(Keyword("and", caseless=True)) + And Not + Suppress(Keyword("and", caseless=True)) + And
).setResultsName("and") | Group( ).setResultsName("and") | Group(
Not + OneOrMore(~oneOf("and or") + And) Not + OneOrMore(~oneOf("and or", caseless=True) + And)
).setResultsName("and") | Not) ).setResultsName("and") | Not)
Or << (Group( Or << (Group(
@ -463,14 +463,14 @@ class Tester(SearchQueryParser):
tests = { tests = {
'Dysfunction' : set([348]), 'Dysfunction' : set([348]),
'title:Dysfunction' : set([348]), 'title:Dysfunction' : set([348]),
'title:Dysfunction or author:Laurie': set([348, 444]), 'title:Dysfunction OR author:Laurie': set([348, 444]),
'(tag:txt or tag:pdf)': set([33, 258, 354, 305, 242, 51, 55, 56, 154]), '(tag:txt or tag:pdf)': set([33, 258, 354, 305, 242, 51, 55, 56, 154]),
'(tag:txt or tag:pdf) and author:Tolstoy': set([55, 56]), '(tag:txt OR tag:pdf) and author:Tolstoy': set([55, 56]),
'Tolstoy txt': set([55, 56]), 'Tolstoy txt': set([55, 56]),
'Hamilton Amsterdam' : set([]), 'Hamilton Amsterdam' : set([]),
u'Beär' : set([91]), u'Beär' : set([91]),
'dysfunc or tolstoy': set([348, 55, 56]), 'dysfunc or tolstoy': set([348, 55, 56]),
'tag:txt and not tolstoy': set([33, 258, 354, 305, 242, 154]), 'tag:txt AND NOT tolstoy': set([33, 258, 354, 305, 242, 154]),
'not tag:lrf' : set([305]), 'not tag:lrf' : set([305]),
'london:thames': set([13]), 'london:thames': set([13]),
'publisher:london:thames': set([13]), 'publisher:london:thames': set([13]),