00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 __title__ ="tedtalks_api - Simple-to-use Python interface to the TedTalks videos (http://www.ted.com)"
00015 __author__="R.D. Vaughan"
00016 __purpose__='''
00017 This python script is intended to perform a variety of utility functions to search and access text
00018 meta data, video and image URLs from the TedTalks Web site. These routines process videos
00019 provided by TedTalks (http://www.ted.com). The specific TedTalks RSS feeds that are processed are controled through a user XML preference file usually found at
00020 "~/.mythtv/MythNetvision/userGrabberPrefs/tedtalks.xml"
00021 '''
00022
00023 __version__="v0.1.0"
00024
00025
00026 import os, struct, sys, re, time, datetime, shutil, urllib
00027 from string import capitalize
00028 import logging
00029 from threading import Thread
00030 from copy import deepcopy
00031 from operator import itemgetter, attrgetter
00032
00033 from tedtalks_exceptions import (TedTalksUrlError, TedTalksHttpError, TedTalksRssError, TedTalksVideoNotFound, TedTalksConfigFileError, TedTalksUrlDownloadError)
00034
00035 class OutStreamEncoder(object):
00036 """Wraps a stream with an encoder"""
00037 def __init__(self, outstream, encoding=None):
00038 self.out = outstream
00039 if not encoding:
00040 self.encoding = sys.getfilesystemencoding()
00041 else:
00042 self.encoding = encoding
00043
00044 def write(self, obj):
00045 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00046 if isinstance(obj, unicode):
00047 try:
00048 self.out.write(obj.encode(self.encoding))
00049 except IOError:
00050 pass
00051 else:
00052 try:
00053 self.out.write(obj)
00054 except IOError:
00055 pass
00056
00057 def __getattr__(self, attr):
00058 """Delegate everything but write to the stream"""
00059 return getattr(self.out, attr)
00060 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00061 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00062
00063
00064 try:
00065 from StringIO import StringIO
00066 from lxml import etree
00067 except Exception, e:
00068 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00069 sys.exit(1)
00070
00071
00072
00073
00074
00075 version = ''
00076 for digit in etree.LIBXML_VERSION:
00077 version+=str(digit)+'.'
00078 version = version[:-1]
00079 if version < '2.7.2':
00080 sys.stderr.write(u'''
00081 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00082 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00083 ''' % version)
00084 sys.exit(1)
00085
00086
00087
00088 try:
00089 '''Import the python mashups support classes
00090 '''
00091 import nv_python_libs.mashups.mashups_api as mashups_api
00092 except Exception, e:
00093 sys.stderr.write('''
00094 The subdirectory "nv_python_libs/mashups" containing the modules mashups_api and
00095 mashups_exceptions.py (v0.1.0 or greater),
00096 They should have been included with the distribution of tedtalks.py.
00097 Error(%s)
00098 ''' % e)
00099 sys.exit(1)
00100 if mashups_api.__version__ < '0.1.0':
00101 sys.stderr.write("\n! Error: Your current installed mashups_api.py version is (%s)\nYou must at least have version (0.1.0) or higher.\n" % mashups_api.__version__)
00102 sys.exit(1)
00103
00104
00105 class Videos(object):
00106 """Main interface to http://www.ted.com
00107 This is done to support a common naming framework for all python Netvision plugins no matter their
00108 site target.
00109
00110 Supports search methods
00111 The apikey is a not required to access http://www.ted.com
00112 """
00113 def __init__(self,
00114 apikey,
00115 mythtv = True,
00116 interactive = False,
00117 select_first = False,
00118 debug = False,
00119 custom_ui = None,
00120 language = None,
00121 search_all_languages = False,
00122 ):
00123 """apikey (str/unicode):
00124 Specify the target site API key. Applications need their own key in some cases
00125
00126 mythtv (True/False):
00127 When True, the returned meta data is being returned has the key and values massaged to match MythTV
00128 When False, the returned meta data is being returned matches what target site returned
00129
00130 interactive (True/False): (This option is not supported by all target site apis)
00131 When True, uses built-in console UI is used to select the correct show.
00132 When False, the first search result is used.
00133
00134 select_first (True/False): (This option is not supported currently implemented in any grabbers)
00135 Automatically selects the first series search result (rather
00136 than showing the user a list of more than one series).
00137 Is overridden by interactive = False, or specifying a custom_ui
00138
00139 debug (True/False):
00140 shows verbose debugging information
00141
00142 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
00143 A callable subclass of interactive class (overrides interactive option)
00144
00145 language (2 character language abbreviation): (This option is not supported by all target site apis)
00146 The language of the returned data. Is also the language search
00147 uses. Default is "en" (English). For full list, run..
00148
00149 search_all_languages (True/False): (This option is not supported by all target site apis)
00150 By default, a Netvision grabber will only search in the language specified using
00151 the language option. When this is True, it will search for the
00152 show in any language
00153
00154 """
00155 self.config = {}
00156
00157 if apikey is not None:
00158 self.config['apikey'] = apikey
00159 else:
00160 pass
00161
00162 self.config['debug_enabled'] = debug
00163 self.common = common
00164 self.common.debug = debug
00165
00166 self.log_name = u'TedTalks_Grabber'
00167 self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
00168 self.logger = self.common.logger
00169
00170 self.config['custom_ui'] = custom_ui
00171
00172 self.config['interactive'] = interactive
00173
00174 self.config['select_first'] = select_first
00175
00176 self.config['search_all_languages'] = search_all_languages
00177
00178 self.error_messages = {'TedTalksUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TedTalksHttpError': u"! Error: An HTTP communications error with the TedTalks was raised (%s)\n", 'TedTalksRssError': u"! Error: Invalid RSS meta data\nwas received from the TedTalks error (%s). Skipping item.\n", 'TedTalksVideoNotFound': u"! Error: Video search with the TedTalks did not return any results (%s)\n", 'TedTalksConfigFileError': u"! Error: tedtalks_config.xml file missing\nit should be located in and named as (%s).\n", 'TedTalksUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", }
00179
00180
00181 self.channel = {'channel_title': u'TedTalks', 'channel_link': u'http://www.ted.com', 'channel_description': u"TED is a small nonprofit devoted to Ideas Worth Spreading.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0}
00182
00183 self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/tedtalks.png'
00184
00185 self.config[u'image_extentions'] = ["png", "jpg", "bmp"]
00186
00187
00188 mashups_api.common = self.common
00189 self.mashups_api = mashups_api.Videos(u'')
00190 self.mashups_api.channel = self.channel
00191 if language:
00192 self.mashups_api.config['language'] = self.config['language']
00193 self.mashups_api.config['debug_enabled'] = self.config['debug_enabled']
00194 self.mashups_api.getUserPreferences = self.getUserPreferences
00195
00196
00197
00198
00199
00200
00201
00202
00203 def getTedTalksConfig(self):
00204 ''' Read the MNV TedTalks grabber "tedtalks_config.xml" configuration file
00205 return nothing
00206 '''
00207
00208 url = u'file://%s/nv_python_libs/configs/XML/tedtalks_config.xml' % (baseProcessingDir, )
00209 if not os.path.isfile(url[7:]):
00210 raise TedTalksConfigFileError(self.error_messages['TedTalksConfigFileError'] % (url[7:], ))
00211
00212 if self.config['debug_enabled']:
00213 print url
00214 print
00215 try:
00216 self.tedtalks_config = etree.parse(url)
00217 except Exception, errormsg:
00218 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
00219 return
00220
00221
00222
00223 def getUserPreferences(self):
00224 '''Read the tedtalks_config.xml and user preference tedtalks.xml file.
00225 If the tedtalks.xml file does not exist then create it.
00226 If the tedtalks.xml file is too old then update it.
00227 return nothing
00228 '''
00229
00230 self.getTedTalksConfig()
00231
00232
00233 userPreferenceFile = self.tedtalks_config.find('userPreferenceFile').text
00234 if userPreferenceFile[0] == '~':
00235 self.tedtalks_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:])
00236 if os.path.isfile(self.tedtalks_config.find('userPreferenceFile').text):
00237
00238 url = u'file://%s' % (self.tedtalks_config.find('userPreferenceFile').text, )
00239 if self.config['debug_enabled']:
00240 print url
00241 print
00242 try:
00243 self.userPrefs = etree.parse(url)
00244 except Exception, errormsg:
00245 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
00246 create = False
00247 else:
00248 create = True
00249
00250
00251 self.updateTedTalks(create)
00252 return
00253
00254
00255 def updateTedTalks(self, create=False):
00256 ''' Create or update the tedtalks.xml user preferences file
00257 return nothing
00258 '''
00259 userDefaultFile = u'%s/nv_python_libs/configs/XML/defaultUserPrefs/tedtalks.xml' % (baseProcessingDir, )
00260 if os.path.isfile(userDefaultFile):
00261
00262 url = u'file://%s' % (userDefaultFile, )
00263 if self.config['debug_enabled']:
00264 print url
00265 print
00266 try:
00267 userTedTalks = etree.parse(url)
00268 except Exception, e:
00269 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, e))
00270 else:
00271 raise Exception(u'!Error: The default TedTalk file is missing (%s)', userDefaultFile)
00272
00273
00274
00275 if not create:
00276 for showElement in self.userPrefs.xpath("//sourceURL"):
00277 showName = showElement.getparent().attrib['name']
00278 sourceName = showElement.attrib['name']
00279 elements = userTedTalks.xpath("//sourceURL[@name=$showName]", showName=showName,)
00280 if len(elements):
00281 elements[0].attrib['enabled'] = showElement.attrib['enabled']
00282 elements[0].attrib['parameter'] = showElement.attrib['parameter']
00283
00284 if self.config['debug_enabled']:
00285 print "After any merging userTedTalks:"
00286 sys.stdout.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True))
00287 print
00288
00289
00290 prefDir = self.tedtalks_config.find('userPreferenceFile').text.replace(u'/tedtalks.xml', u'')
00291 if not os.path.isdir(prefDir):
00292 os.makedirs(prefDir)
00293 fd = open(self.tedtalks_config.find('userPreferenceFile').text, 'w')
00294 fd.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True))
00295 fd.close()
00296
00297
00298 try:
00299 self.userPrefs = etree.parse(self.tedtalks_config.find('userPreferenceFile').text)
00300 self.mashups_api.userPrefs = self.userPrefs
00301 except Exception, errormsg:
00302 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
00303 return
00304
00305
00306
00307
00308
00309
00310
00311
00312 def searchTitle(self, title, pagenumber, pagelen):
00313 '''Key word video search of the TedTalks web site
00314 return an array of matching item elements
00315 return
00316 '''
00317 searchVar = self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text
00318 try:
00319 searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title.encode("utf-8")))
00320 searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber))
00321 except UnicodeDecodeError:
00322 searchVar = u'?q=%s' % ()
00323 searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title))
00324 searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber))
00325 url = searchVar
00326
00327 if self.config['debug_enabled']:
00328 print url
00329 print
00330
00331 self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text = url
00332
00333
00334
00335 self.common.buildFunctionDict()
00336 mnvXpath = etree.FunctionNamespace('http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format')
00337 mnvXpath.prefix = 'mnvXpath'
00338 for key in self.common.functionDict.keys():
00339 mnvXpath[key] = common.functionDict[key]
00340
00341
00342 paraMeter = self.userPrefs.find('search').xpath("//search//sourceURL[@enabled='true']/@parameter")
00343 if not len(paraMeter):
00344 raise Exception(u'TedTalks User preferences file "tedtalks.xml" does not have an enabled search with a "parameter" attribute.')
00345 etree.SubElement(self.tedtalks_config.find('searchURLS').xpath(".//url")[0], "parameter").text = paraMeter[0]
00346
00347
00348 try:
00349 resultTree = self.common.getUrlData(self.tedtalks_config.find('searchURLS'))
00350 except Exception, errormsg:
00351 raise TedTalksUrlDownloadError(self.error_messages['TedTalksUrlDownloadError'] % (errormsg))
00352
00353 if resultTree is None:
00354 raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title)
00355
00356 searchResults = resultTree.xpath('//result//item')
00357 if not len(searchResults):
00358 raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title)
00359
00360 return searchResults
00361
00362
00363
00364 def searchForVideos(self, title, pagenumber):
00365 """Common name for a video search. Used to interface with MythTV plugin NetVision
00366 """
00367
00368 self.getUserPreferences()
00369
00370 if self.config['debug_enabled']:
00371 print "self.tedtalks_config:"
00372 sys.stdout.write(etree.tostring(self.tedtalks_config, encoding='UTF-8', pretty_print=True))
00373 print
00374
00375
00376
00377
00378
00379
00380 try:
00381 data = self.searchTitle(title, pagenumber, self.page_limit)
00382 except TedTalksVideoNotFound, msg:
00383 sys.stderr.write(u"%s\n" % msg)
00384 sys.exit(0)
00385 except TedTalksUrlError, msg:
00386 sys.stderr.write(u'%s\n' % msg)
00387 sys.exit(1)
00388 except TedTalksHttpError, msg:
00389 sys.stderr.write(self.error_messages['TedTalksHttpError'] % msg)
00390 sys.exit(1)
00391 except TedTalksRssError, msg:
00392 sys.stderr.write(self.error_messages['TedTalksRssError'] % msg)
00393 sys.exit(1)
00394 except Exception, e:
00395 sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
00396 sys.exit(1)
00397
00398
00399 rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
00400
00401
00402 if len(data) == self.page_limit:
00403 self.channel['channel_returned'] = len(data)
00404 self.channel['channel_startindex'] = len(data)+(self.page_limit*(int(pagenumber)-1))
00405 self.channel['channel_numresults'] = len(data)+(self.page_limit*(int(pagenumber)-1)+1)
00406 else:
00407 self.channel['channel_returned'] = len(data)+(self.page_limit*(int(pagenumber)-1))
00408 self.channel['channel_startindex'] = len(data)
00409 self.channel['channel_numresults'] = len(data)
00410
00411
00412 channelTree = self.common.mnvChannelElement(self.channel)
00413 rssTree.append(channelTree)
00414
00415 for item in data:
00416 channelTree.append(item)
00417
00418
00419 sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
00420 sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
00421 sys.exit(0)
00422
00423
00424 def displayTreeView(self):
00425 '''Gather all videos for each TedTalks show
00426 Display the results and exit
00427 '''
00428 self.mashups_api.page_limit = self.page_limit
00429 self.mashups_api.grabber_title = self.grabber_title
00430 self.mashups_api.mashup_title = self.mashup_title
00431 self.mashups_api.channel_icon = self.channel_icon
00432 self.mashups_api.mashup_title = u'tedtalks'
00433
00434
00435
00436
00437
00438
00439 try:
00440 self.mashups_api.Search = False
00441 self.mashups_api.displayTreeView()
00442 except Exception, e:
00443 sys.stderr.write(u"! Error: During a TedTalks Video treeview\nError(%s)\n" % (e))
00444 sys.exit(1)
00445
00446 sys.exit(0)
00447
00448