diff --git a/src/libprs500/ebooks/lrf/web/profiles/ap.py b/src/libprs500/ebooks/lrf/web/profiles/ap.py new file mode 100644 index 0000000000..d581767253 --- /dev/null +++ b/src/libprs500/ebooks/lrf/web/profiles/ap.py @@ -0,0 +1,38 @@ +import re +from libprs500.ebooks.lrf.web.profiles import DefaultProfile + + +class AssociatedPress(DefaultProfile): + + title = 'Associated Press' + max_recursions = 2 + max_articles_per_feed = 15 + html2lrf_options = ['--force-page-break-before-tag="chapter"'] + + + preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in +[ + (r'
.*?' , lambda match : ''), + (r'.*?', lambda match : ''), + (r'.*?', lambda match : ''), + (r'', lambda match : '
'), + (r'
', lambda match : '
'), + (r'Learn more about our Privacy Policy.*?', lambda match : '