00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 __title__ ="youtubeXSL_api - XPath and XSLT functions for the mashup grabbers"
00016 __author__="R.D. Vaughan"
00017 __purpose__='''
00018 This python script is intended to perform a variety of utility functions
00019 for the conversion of data to the MNV standard RSS output format.
00020 See this link for the specifications:
00021 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
00022 '''
00023
00024 __version__="v0.1.0"
00025
00026
00027
00028
00029 __xpathClassList__ = ['xpathFunctions', ]
00030
00031
00032
00033 __xsltExtentionList__ = []
00034
00035 import os, sys, re, time, datetime, shutil, urllib, string
00036 from copy import deepcopy
00037
00038
00039 class OutStreamEncoder(object):
00040 """Wraps a stream with an encoder"""
00041 def __init__(self, outstream, encoding=None):
00042 self.out = outstream
00043 if not encoding:
00044 self.encoding = sys.getfilesystemencoding()
00045 else:
00046 self.encoding = encoding
00047
00048 def write(self, obj):
00049 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00050 if isinstance(obj, unicode):
00051 try:
00052 self.out.write(obj.encode(self.encoding))
00053 except IOError:
00054 pass
00055 else:
00056 try:
00057 self.out.write(obj)
00058 except IOError:
00059 pass
00060
00061 def __getattr__(self, attr):
00062 """Delegate everything but write to the stream"""
00063 return getattr(self.out, attr)
00064 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00065 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00066
00067 try:
00068 from StringIO import StringIO
00069 from lxml import etree
00070 except Exception, e:
00071 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00072 sys.exit(1)
00073
00074
00075
00076
00077
00078 version = ''
00079 for digit in etree.LIBXML_VERSION:
00080 version+=str(digit)+'.'
00081 version = version[:-1]
00082 if version < '2.7.2':
00083 sys.stderr.write(u'''
00084 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00085 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00086 ''' % version)
00087 sys.exit(1)
00088
00089
00090 class xpathFunctions(object):
00091 """Functions specific extending XPath
00092 """
00093 def __init__(self):
00094 self.functList = ['youtubeTrailerFilter', 'youtubePaging', ]
00095 self.tailerNum_Patterns = [
00096
00097 re.compile(u'''^.+?trailer\\ (?P<trailerNum>[0-9]+).*$''', re.UNICODE),
00098
00099 re.compile(u'''^.+?trailer\\ \\#(?P<trailerNum>[0-9]+).*$''', re.UNICODE),
00100 ]
00101
00102
00103
00104
00105
00106
00107
00108
00109 def youtubeTrailerFilter(self, context, *args):
00110 '''Generate a list of entry elements that are relevant to the requested search term. Basically
00111 remove duplicate and non-relevant search results and order them to provide the best results
00112 for the user.
00113 Also set the paging variables.
00114 Call example: 'mnvXpath:youtubeTrailerFilter(//atm:entry)'
00115 return the list of relevant "entry" elements
00116 '''
00117 searchTerm = common.removePunc('dummy', common.searchterm.lower())
00118 titleFilter = etree.XPath('.//atm:title', namespaces=common.namespaces)
00119
00120
00121 if searchTerm.startswith(u'the '):
00122 searchTerm = searchTerm[4:].strip()
00123
00124 titleDict = {}
00125 for entry in args[0]:
00126 titleDict[titleFilter(entry)[0].text] = entry
00127
00128
00129 filteredDict = {}
00130 for key in titleDict.keys():
00131 title = common.removePunc('dummy', key.lower())
00132 if title.startswith(u'the '):
00133 title = title[4:].strip()
00134 if searchTerm.find('new ') == -1:
00135 title = title.replace(u'new ', u'')
00136 if searchTerm.find('official ') == -1:
00137 title = title.replace(u'official ', u'')
00138 if title.find(searchTerm) != -1:
00139 addOns = u''
00140 HD = False
00141 if searchTerm.find('game ') == -1:
00142 if title.find('game') != -1:
00143 addOns+=u'ZZ-Game'
00144 if title.find('hd') != -1 or title.find('1080p') != -1 or title.find('720p') != -1:
00145 HD = True
00146 if title.startswith(searchTerm):
00147 addOns+=u'1-'
00148 for regexPattern in self.tailerNum_Patterns:
00149 match = regexPattern.match(title)
00150 if not match:
00151 continue
00152 trailerNum = match.groups()
00153 if int(trailerNum[0]) < 20:
00154 addOns+=u'Trailer #%s' % trailerNum[0]
00155 title = title.replace((u'trailer %s' % trailerNum[0]), u'')
00156 else:
00157 addOns+=u'Trailer #1'
00158 break
00159 else:
00160 if title.find('trailer') != -1:
00161 addOns+=u'Trailer #1'
00162 if HD and not addOns.startswith(u'ZZ-Game'):
00163 if addOns:
00164 addOns=u'HD-'+addOns
00165 else:
00166 addOns=u'YHD'
00167 for text in [u'hd', u'trailer', u'game', u'1080p', u'720p']:
00168 title = title.replace(text, u'').replace(u' ', u' ').strip()
00169 filteredDict[(u'%s %s' % (addOns, title)).strip()] = titleDict[key]
00170
00171
00172 filtered2Dict = {}
00173 sortedList = sorted(filteredDict.keys())
00174 for index in range(len(sortedList)):
00175 if index == 0:
00176 filtered2Dict[sortedList[index]] = deepcopy(filteredDict[sortedList[index]])
00177 continue
00178 if sortedList[index] != sortedList[index-1]:
00179 filtered2Dict[sortedList[index]] = deepcopy(filteredDict[sortedList[index]])
00180
00181
00182 finalElements = []
00183 sortedList = sorted(filtered2Dict.keys())
00184 for index in range(len(sortedList)):
00185 titleFilter(filtered2Dict[sortedList[index]])[0].text = u'%02d. %s' % (index+1, titleFilter(filtered2Dict[sortedList[index]])[0].text)
00186 finalElements.append(filtered2Dict[sortedList[index]])
00187
00188
00189 common.numresults = str(len(finalElements))
00190 common.returned = common.numresults
00191 common.startindex = common.numresults
00192
00193 return finalElements
00194
00195
00196 def youtubePaging(self, context, args):
00197 '''Generate a page value specific to the mashup search for YouTube searches
00198 Call example: 'mnvXpath:youtubePaging('dummy')'
00199 The page value is some times a page # and sometimes an item position number
00200 return the page value that will be used in the search as a string
00201 '''
00202 return str((int(common.pagenumber) -1) * common.page_limit + 1)
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222