00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 __title__ ="tributeca_api - XPath and XSLT functions for the Tribute.ca grabber"
00016 __author__="R.D. Vaughan"
00017 __purpose__='''
00018 This python script is intended to perform a variety of utility functions
00019 for the conversion of data to the MNV standard RSS output format.
00020 See this link for the specifications:
00021 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
00022 '''
00023
00024 __version__="v0.1.1"
00025
00026
00027
00028
00029
00030 __xpathClassList__ = ['xpathFunctions', ]
00031
00032
00033
00034 __xsltExtentionList__ = []
00035
00036 import os, sys, re, time, datetime, shutil, urllib, string
00037 from copy import deepcopy
00038
00039
00040 class OutStreamEncoder(object):
00041 """Wraps a stream with an encoder"""
00042 def __init__(self, outstream, encoding=None):
00043 self.out = outstream
00044 if not encoding:
00045 self.encoding = sys.getfilesystemencoding()
00046 else:
00047 self.encoding = encoding
00048
00049 def write(self, obj):
00050 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00051 if isinstance(obj, unicode):
00052 try:
00053 self.out.write(obj.encode(self.encoding))
00054 except IOError:
00055 pass
00056 else:
00057 try:
00058 self.out.write(obj)
00059 except IOError:
00060 pass
00061
00062 def __getattr__(self, attr):
00063 """Delegate everything but write to the stream"""
00064 return getattr(self.out, attr)
00065 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00066 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00067
00068 try:
00069 from StringIO import StringIO
00070 from lxml import etree
00071 except Exception, e:
00072 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00073 sys.exit(1)
00074
00075
00076
00077
00078
00079 version = ''
00080 for digit in etree.LIBXML_VERSION:
00081 version+=str(digit)+'.'
00082 version = version[:-1]
00083 if version < '2.7.2':
00084 sys.stderr.write(u'''
00085 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00086 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00087 ''' % version)
00088 sys.exit(1)
00089
00090
00091 class xpathFunctions(object):
00092 """Functions specific extending XPath
00093 """
00094 def __init__(self):
00095 self.functList = ['tributecaLinkGeneration', 'tributecaThumbnailLink', 'tributecaTopTenTitle', 'tributecaIsCustomHTML', 'tributecaCheckIfDBItem', 'tributecaDebug', 'tributecaGetAnchors', ]
00096 self.TextTail = etree.XPath("string()")
00097 self.anchorList = etree.XPath(".//a", namespaces=common.namespaces)
00098 self.persistence = {}
00099
00100
00101
00102
00103
00104
00105
00106
00107 def tributecaLinkGeneration(self, context, *args):
00108 '''Generate a link for the Tribute.ca site. Sigificant massaging of the title is required.
00109 Call example: 'mnvXpath:tributecaLinkGeneration(position(), ..//a)'
00110 return the url link
00111 '''
00112 downloadURL = u'http://www.tribute.ca/streamingflash/%s.flv'
00113 position = int(args[0])-1
00114 webURL = u'http://www.tribute.ca%s' % args[1][position].attrib['href'].strip()
00115
00116
00117 if self.persistence.has_key('tributecaLinkGeneration'):
00118 if self.persistence['tributecaLinkGeneration'] != None:
00119 returnValue = self.persistence['tributecaLinkGeneration']
00120 self.persistence['tributecaLinkGeneration'] = None
00121 if returnValue != webURL:
00122 return downloadURL % returnValue
00123 else:
00124 return webURL
00125
00126 currentTitle = self.TextTail(args[1][position]).strip()
00127 if position == 0:
00128 previousTitle = u''
00129 else:
00130 previousTitle = self.TextTail(args[1][position-1]).strip()
00131
00132
00133 titleArray = [currentTitle, previousTitle]
00134 if titleArray[0].startswith(u'IMAX:'):
00135 titleArray[0] = titleArray[0].replace(u'IMAX:', u'').strip()
00136 else:
00137
00138
00139 for counter in range(len(titleArray)):
00140 index = titleArray[counter].find(": ")
00141 if index != -1:
00142 titleArray[counter] = titleArray[counter][:index].strip()
00143 index = titleArray[counter].find(" (")
00144 if index != -1:
00145 titleArray[counter] = titleArray[counter][:index].strip()
00146 if titleArray[0].startswith(titleArray[1]) and titleArray[1]:
00147 index = titleArray[counter].find("3D")
00148 if index != -1:
00149 titleArray[counter] = titleArray[counter][:index].strip()
00150
00151
00152 trailer2 = u''
00153 if titleArray[0].startswith(titleArray[1]) and titleArray[1]:
00154 trailer2 = u'tr2'
00155 if currentTitle.find(': An IMAX') != -1:
00156 trailer2 = u'tr2'
00157 titleArray[0] = titleArray[0].replace(u'&', u'and')
00158 self.persistence['tributecaThumbnailLink'] = urllib.quote_plus(titleArray[0].lower().replace(u' ', u'_').replace(u"'", u'').replace(u'-', u'_').replace(u'?', u'').replace(u'.', u'').encode("utf-8"))
00159 titleArray[0] = urllib.quote_plus(re.sub('[%s]' % re.escape(string.punctuation), '', titleArray[0].lower().replace(u' ', u'').encode("utf-8")))
00160
00161
00162 videocode = u'%s%s' % (titleArray[0], trailer2)
00163 flvURL = downloadURL % videocode
00164 resultCheckUrl = common.checkURL(flvURL)
00165 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != u'video/x-flv':
00166 if trailer2 != u'':
00167 videocode = titleArray[0]
00168 flvURL = downloadURL % titleArray[0]
00169 resultCheckUrl = common.checkURL(flvURL)
00170 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != u'video/x-flv':
00171 flvURL = webURL
00172 else:
00173 videocode = titleArray[0]+u'tr2'
00174 flvURL = downloadURL % videocode
00175 resultCheckUrl = common.checkURL(flvURL)
00176 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != u'video/x-flv':
00177 if currentTitle.find(': An IMAX') == -1 and currentTitle.find(': ') != -1:
00178 titleArray[0] = currentTitle.replace(u'&', u'and')
00179 titleArray[0] = urllib.quote_plus(re.sub('[%s]' % re.escape(string.punctuation), '', titleArray[0].lower().replace(u' ', u'').encode("utf-8")))
00180 videocode = titleArray[0]
00181 flvURL = downloadURL % videocode
00182 resultCheckUrl = common.checkURL(flvURL)
00183 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != u'video/x-flv':
00184 flvURL = webURL
00185 else:
00186 flvURL = webURL
00187 if flvURL != webURL:
00188 self.persistence['tributecaLinkGeneration'] = videocode
00189 return common.linkWebPage(u'dummycontext', 'tributeca')+videocode
00190 else:
00191 self.persistence['tributecaLinkGeneration'] = flvURL
00192 return flvURL
00193
00194
00195 def tributecaThumbnailLink(self, context, *args):
00196 '''Verify that the thumbnail actually exists. If it does not then use the site image.
00197 Call example: 'mnvXpath:tributecaThumbnailLink(string(.//img/@src))'
00198 return the thumbnail url
00199 '''
00200 siteImage = u'http://www.tribute.ca/images/tribute_title.gif'
00201 if not len(args[0]) or not self.persistence['tributecaThumbnailLink']:
00202 return siteImage
00203
00204 if args[0].startswith(u'http:'):
00205 url = args[0].strip()
00206 else:
00207 url = u'http://www.tribute.ca/tribute_objects/images/movies/%s%s' % (self.persistence['tributecaThumbnailLink'], u'/poster.jpg')
00208 resultCheckUrl = common.checkURL(url)
00209 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != u'image/jpeg':
00210 return siteImage
00211
00212 return url
00213
00214
00215 def tributecaTopTenTitle(self, context, *args):
00216 '''Take a top ten title and add a leading '0' if less than 10 as it forces correct sort order
00217 Call example: 'mnvXpath:tributecaTopTenTitle(string(..))'
00218 return a replacement title
00219 '''
00220 if not len(args[0]):
00221 return args[0]
00222
00223 index = args[0].find('.')
00224 if index == 1:
00225 return u'0'+args[0]
00226 else:
00227 return args[0]
00228
00229
00230 def tributecaIsCustomHTML(self, context, *args):
00231 '''Check if the link is for a custom HTML
00232 Example call: mnvXpath:isCustomHTML(('dummy'))
00233 return True if the link does not starts with "http://"
00234 return False if the link starts with "http://"
00235 '''
00236 if self.persistence['tributecaLinkGeneration'] == None:
00237 return False
00238
00239 if self.persistence['tributecaLinkGeneration'].startswith(u'http://'):
00240 return False
00241 else:
00242 return True
00243
00244
00245 def tributecaCheckIfDBItem(self, context, *arg):
00246 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
00247 has a matching item. This is done to save accessing the Internet when not required.
00248 Call example: 'mnvXpath:tributecaCheckIfDBItem(.)'
00249 return True if a match was found
00250 return False if a match was not found
00251 '''
00252 return common.checkIfDBItem('dummy', {'feedtitle': 'Movie Trailers', 'title': arg[0].replace('Trailer', u'').strip(), 'author': arg[1], 'description': arg[2]})
00253
00254
00255 def tributecaGetAnchors(self, context, *arg):
00256 ''' Routine used to get specific anchor elements.
00257 Unfortunitely position dependant.
00258 Call: mnvXpath:tributecaGetAnchors(//ul[@class='clump'], 3)
00259 '''
00260 return self.anchorList(arg[0][int(arg[1])])
00261
00262
00263 def tributecaDebug(self, context, *arg):
00264 ''' Routine only used for debugging. Prints out the node
00265 passed as an argument. Not to be used in production.
00266 Call example: mnvXpath:tributecaDebug(//a)
00267 '''
00268 testpath = etree.XPath(".//a", namespaces=common.namespaces)
00269 print arg
00270 count = 0
00271 for x in arg:
00272 sys.stdout.write(u'\nElement Count (%s):\n' % count)
00273
00274
00275 print "testpath(%s)" % testpath(x)
00276 count+=1
00277 print
00278
00279 return u"========tributecaDebug Called========="
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294 class xsltExtExample(etree.XSLTExtension):
00295 '''Example of an XSLT extension. This code must be changed to do anything useful!!!
00296 return nothing
00297 '''
00298 def execute(self, context, self_node, input_node, output_parent):
00299 copyItem = deepcopy(input_node)
00300 min_sec = copyItem.xpath('duration')[0].text.split(':')
00301 seconds = 0
00302 for count in range(len(min_sec)):
00303 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
00304 output_parent.text = u'%s' % seconds
00305
00306
00307
00308
00309
00310