Sync to trunk.

This commit is contained in:
John Schember 2009-08-31 20:21:35 -04:00
commit b8166db5cc
4 changed files with 273 additions and 256 deletions

View File

@ -192,7 +192,7 @@ class PMLMLizer(object):
text += '\\w'
width = elem.get('width')
if width:
text += '="%s%"' % width
text += '="%s%%"' % width
else:
text += '="50%"'

View File

@ -17,13 +17,8 @@
<iconset resource="../images.qrc">
<normaloff>:/images/edit_input.svg</normaloff>:/images/edit_input.svg</iconset>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="0">
<widget class="QSplitter" name="splitter">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<widget class="QWidget" name="layoutWidget">
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QVBoxLayout">
<property name="spacing">
<number>6</number>
@ -50,6 +45,13 @@
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="auto_author_sort">
<property name="text">
<string>A&amp;utomatically set author sort</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_8">
<property name="text">
@ -115,6 +117,13 @@
</property>
</widget>
</item>
<item row="4" column="1">
<widget class="EnComboBox" name="publisher">
<property name="editable">
<bool>true</bool>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
@ -201,9 +210,6 @@
</property>
</widget>
</item>
<item row="8" column="1">
<widget class="QComboBox" name="remove_format"/>
</item>
<item row="8" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
@ -214,12 +220,8 @@
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="auto_author_sort">
<property name="text">
<string>A&amp;utomatically set author sort</string>
</property>
</widget>
<item row="8" column="1">
<widget class="QComboBox" name="remove_format"/>
</item>
<item row="0" column="1">
<widget class="EnComboBox" name="authors">
@ -228,21 +230,12 @@
</property>
</widget>
</item>
<item row="4" column="1">
<widget class="EnComboBox" name="publisher">
<property name="editable">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
</widget>
</item>
<item row="1" column="0">
<item>
<widget class="QDialogButtonBox" name="button_box">
<property name="orientation">
<enum>Qt::Horizontal</enum>

View File

@ -41,7 +41,7 @@ class SearchBox2(QComboBox):
self.connect(self.line_edit, SIGNAL('key_pressed(PyQt_PyObject)'),
self.key_pressed, Qt.DirectConnection)
self.connect(self.line_edit, SIGNAL('mouse_released(PyQt_PyObject)'),
self.key_pressed, Qt.DirectConnection)
self.mouse_released, Qt.DirectConnection)
self.setEditable(True)
self.help_state = True
self.as_you_type = True

View File

@ -3,7 +3,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Profile to download CNN
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class CNN(BasicNewsRecipe):
@ -11,28 +10,53 @@ class CNN(BasicNewsRecipe):
title = 'CNN'
description = 'Global news'
timefmt = ' [%d %b %Y]'
__author__ = 'Kovid Goyal'
__author__ = 'Kovid Goyal and Sujata Raman'
language = _('English')
no_stylesheets = True
use_embedded_content = False
oldest_article = 15
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
(r'<head>.*?<title', lambda match : '<head><title'),
(r'</title>.*?</head>', lambda match : '</title></head>'),
(r'<body.*?<\!\-\-Article.*?>', lambda match : '<body>'),
(r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
(r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
(r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around
(r'<span class="cnnEmbeddedMosLnk">.*?</span>', lambda match : ''), # drop 'watch more' links
(r'(<div class="cnnstorybody">).*?(<p)', lambda match : match.group(1) + match.group(2)), # drop sports photos
(r'</?table.*?>|</?tr.*?>|</?td.*?>', lambda match : ''), # drop table formatting
(r'<div class="cnnendofstorycontent".*?>.*?</div>', lambda match : ''), # drop extra business links
(r'<a href="#TOP">.*?</a>', lambda match : '') # drop business 'to top' link
] ]
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.cnnTxtCmpnt{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.cnnTMcontent{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;color:#575757}
.storytext{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.storybyline{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
.credit{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
.storyBrandingBanner{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
.storytimestamp{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
.timestamp{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
.subhead p{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
.cnnStoryContent{font-family :Arial,Helvetica,sans-serif; font-size:xx-small}
.cnnContentContainer{font-family :Arial,Helvetica,sans-serif; font-size:xx-small}
.col1{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.col3{color:#333333; font-family :Arial,Helvetica,sans-serif; font-size:x-small;font-weight:bold;}
.cnnInlineT1Caption{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;font-weight:bold;}
.cnnInlineT1Credit{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;color:#333333;}
.col10{color:#5A637E}
.cnnTimeStamp{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;color:#333333;}
.galleryhedDek{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;color:#575757;}
.galleryWidgetHeader{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;color:#004276;}
.article-content{font-family :Arial,Helvetica,sans-serif; font-size:xx-small}
.cnnRecapStory{font-family :Arial,Helvetica,sans-serif; font-size:xx-small}
'''
keep_only_tags = [
dict(name='div', attrs={'class':["cnnWCBoxContent","cnnContent","cnnMainBodySecs"]}),
dict(name='div', attrs={'id':["contentBody","content"]}),
dict(name='td', attrs={'id':["cnnRecapStory"]}),]
remove_tags = [
dict(name='div', attrs={'class':["storyLink","article-tools clearfix","widget video related-video vList","cnnFooterBox","scrollArrows","boxHeading","cnnInlineMailbag","mainCol_lastBlock","cnn_bookmarks","cnnFooterBox","cnnEndOfStory","cnnInlineSL","cnnStoryHighlights","cnnFooterClick","cnnSnapShotHeader","cnnStoryToolsFooter","cnnWsnr","cnnUGCBox","cnnTopNewsModule","cnnStoryElementBox","cnnStoryPhotoBoxNavigation"]}),
dict(name='span', attrs={'class':["cnnEmbeddedMosLnk"]}),
dict(name='div', attrs={'id':["cnnIncldHlder","articleCommentsContainer","featuredContent","superstarsWidget","shareMenuContainer","rssMenuContainer","storyBrandingBanner","cnnRightCol","siteFeatures","quigo628","rightColumn","clickIncludeBox","cnnHeaderRightCol","cnnSCFontLabel","cnnSnapShotBottomRight","cnnSCFontButtons","rightColumn"]}),
dict(name='p', attrs={'class':["cnnTopics"]}),
dict(name='td', attrs={'class':["cnnRightRail"]}),
dict(name='table', attrs={'class':["cnnTMbox"]}),
dict(name='ul', attrs={'id':["cnnTopNav","cnnBotNav","cnnSBNav"]}),
]
def print_version(self, url):
return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
# def print_version(self, url):
# return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
feeds = [
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),