My adventures with eBooks: A calibre recipe for the Cynthiana Democrat

ā€¦another poorly written recipeā€“made through copying, pasting, and a whole lot of trial and errorā€“this time for the Cynthiana Democrat. (It doesn’t really work anymore…)


'''
Cynthiana Democrat Calibre Recipe
'''

# Import the regular expressions module.
import re, string, time

# Import the BasicNewsRecipe class which this class extends.
from calibre.web.feeds.recipes import BasicNewsRecipe

from calibre import strftime
from datetime import timedelta, date
from time import sleep
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

class AdvancedUserRecipe1371040942(BasicNewsRecipe):
title = u'The Cynthiana Democrat'

# A brief description for the ebook.
#
description = u'Cynthiana Democrat web site ebook created using rss feeds.'

# The max number of articles which may be downloaded from each feed.
#
max_articles_per_feed = 100
filterDuplicates = True
ignore_duplicate_articles = {'title', 'url'}
scale_news_images_to_device = True
recursions=0
# The max age of articles which may be downloaded from each feed. This is
# specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
# half days). My default of 1.5 days is the last 36 hours, the point at
# which I've decided 'news' becomes 'old news', but be warned this is not
# so good for the blogs, technology, magazine, etc., and sports feeds.
# You may wish to extend this to 2-5 but watch out ebook creation time will
# increase as well. Setting this to 30 will get everything (AFAICT) as long
# as max_articles_per_feed remains set high.
#
oldest_article = 7

# Number of simultaneous downloads. Speeds things up from the defualt of 5.
# If you have a lot of feeds and/or have increased oldest_article above 2
# then you may wish to try increasing simultaneous_downloads to 25-30,
# Or, of course, if you are in a hurry. [I've not tried beyond 20.]
#
simultaneous_downloads = 20

# Timeout for fetching files from the server in seconds. The default of
# 120 seconds, seems somewhat excessive.
#
timeout = 30

# The format string for the date shown on the ebook's first page.
# List of all values: http://docs.python.org/library/time.html
# Default in news.py has a leading space so that's mirrored here.
# As with 'feeds' select/de-select by adding/removing the initial '#',
# only one timefmt should be selected, here's a few to choose from.
#
#timefmt = ' [%a, %d %b %Y]' # [Fri, 14 Nov 2011] (Calibre default)
#timefmt = ' [%a, %d %b %Y %H:%M]' # [Fri, 14 Nov 2011 18:30]
timefmt = ' [%a, %d %b %Y %I:%M %p]' # [Fri, 14 Nov 2011 06:30 PM]
#timefmt = ' [%d %b %Y]' # [14 Nov 2011]
#timefmt = ' [%d %b %Y %H:%M]' # [14 Nov 2011 18.30]
#timefmt = ' [%Y-%m-%d]' # [2011-11-14]
#timefmt = ' [%Y-%m-%d-%H-%M]' # [2011-11-14-18-30]
auto_cleanup = True
auto_cleanup_keep = '//*[@class="content"]|//*[@class="slide-wrap"]'
#auto_cleanup_keep = '//div[@class="subsection-photo"]|//div[@class="top-news-pic"]|//div[@class="slide-wrap"]|//div[@class="source"]|//div[@class="news-sidebar"]'

# Author of this recipe.
__author__ = 'kg4vma'

# Specify English as the language of the RSS feeds (ISO-639 code).
language = 'en_US'

# Set tags.
tags = 'news, sport, blog'

# Set publisher and publication type.
publisher = 'The Cynthiana Democrat'
publication_type = 'newspaper'

# Disable stylesheets from site.
no_stylesheets = True
masthead_url = 'http://www.cynthianademocrat.com/sites/www.cynthianademocrat.com/files/cynthianalogov2.png'

# Specifies an override encoding for sites that have an incorrect charset
# specified. Default of 'None' says to auto-detect.
encoding = None

# Sets whether a feed has full articles embedded in it.
use_embedded_content = False

# Removes empty feeds - why keep them!?
remove_empty_feeds = True

feeds = [(u'News', u'http://www.cynthianademocrat.com/todaysnews/rss.xml'),
(u'Did you Know', u'http://www.cynthianademocrat.com/rss.xml'),
(u'Features', u'http://www.cynthianademocrat.com/features/rss.xml'),
(u'Obituaries', u'http://www.legacy.com/services/obitrss.asp?Source=cynthianademocrat'),
(u'Opinion', u'http://www.cynthianademocrat.com/todaysopinions/rss.xml'),
(u'Business', u'http://www.cynthianademocrat.com/news/business/rss.xml'),
(u'Sports', u'http://www.cynthianademocrat.com/sports/rss.xml')]

#cover_tag = 'KY_LHL'
#def get_cover_url(self):
# from datetime import timedelta, date
# cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
# br = BasicNewsRecipe.get_browser(self)
# daysback=1
# try:
# br.open(cover)
# except:
# while daysback<7:
# cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
# br = BasicNewsRecipe.get_browser(self)
# try:
# br.open(cover)
# except:
# daysback = daysback+1
# continue
# break
# if daysback==7:
# self.log("\nCover unavailable")
# cover = None
# return cover

One thought on “My adventures with eBooks: A calibre recipe for the Cynthiana Democrat”

Comments are closed.