00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 __title__ ="mevio_api - XPath and XSLT functions for the www.mevio.com RSS/HTML"
00016 __author__="R.D. Vaughan"
00017 __purpose__='''
00018 This python script is intended to perform a variety of utility functions
00019 for the conversion of data to the MNV standard RSS output format.
00020 See this link for the specifications:
00021 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
00022 '''
00023
00024 __version__="v0.1.1"
00025
00026
00027
00028
00029
00030
00031 __xpathClassList__ = ['xpathFunctions', ]
00032
00033
00034
00035 __xsltExtentionList__ = []
00036
00037 import os, sys, re, time, datetime, shutil, urllib, string
00038 from copy import deepcopy
00039
00040
00041 class OutStreamEncoder(object):
00042 """Wraps a stream with an encoder"""
00043 def __init__(self, outstream, encoding=None):
00044 self.out = outstream
00045 if not encoding:
00046 self.encoding = sys.getfilesystemencoding()
00047 else:
00048 self.encoding = encoding
00049
00050 def write(self, obj):
00051 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00052 if isinstance(obj, unicode):
00053 try:
00054 self.out.write(obj.encode(self.encoding))
00055 except IOError:
00056 pass
00057 else:
00058 try:
00059 self.out.write(obj)
00060 except IOError:
00061 pass
00062
00063 def __getattr__(self, attr):
00064 """Delegate everything but write to the stream"""
00065 return getattr(self.out, attr)
00066 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00067 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00068
00069 try:
00070 from StringIO import StringIO
00071 from lxml import etree
00072 except Exception, e:
00073 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00074 sys.exit(1)
00075
00076
00077
00078
00079
00080 version = ''
00081 for digit in etree.LIBXML_VERSION:
00082 version+=str(digit)+'.'
00083 version = version[:-1]
00084 if version < '2.7.2':
00085 sys.stderr.write(u'''
00086 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00087 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00088 ''' % version)
00089 sys.exit(1)
00090
00091
00092 class xpathFunctions(object):
00093 """Functions specific extending XPath
00094 """
00095 def __init__(self):
00096 self.functList = ['mevioLinkGeneration', 'mevioTitle', 'mevioEpisode', 'mevioCheckIfDBItem', ]
00097 self.episodeRegex = [
00098
00099 re.compile(u'''^.+?Episode\\ (?P<episodeno>[0-9]+).*$''', re.UNICODE),
00100
00101 re.compile(u'''^.+?(?P<episodeno>[0-9]+)\\:.*$''', re.UNICODE),
00102 ]
00103 self.namespaces = {
00104 'atom10': u"http://www.w3.org/2005/Atom",
00105 'media': u"http://search.yahoo.com/mrss/",
00106 'itunes':"http://www.itunes.com/dtds/podcast-1.0.dtd",
00107 'xhtml': u"http://www.w3.org/1999/xhtml",
00108 'feedburner': u"http://rssnamespace.org/feedburner/ext/1.0",
00109 'mythtv': "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
00110 'dc': "http://purl.org/dc/elements/1.1/",
00111 'fb': "http://www.facebook.com/2008/fbml/",
00112 }
00113 self.mediaIdFilters = [
00114 [etree.XPath(".//embed/@flashvars", namespaces=self.namespaces), re.compile(u'''^.+?MediaId=(?P<videocode>[0-9]+).*$''', re.UNICODE)],
00115 [etree.XPath(".//div[@class='player_wrapper']/a/@href", namespaces=self.namespaces), re.compile(u'''^.+?\\'(?P<videocode>[0-9]+)\\'\\)\\;.*$''', re.UNICODE)]
00116 ]
00117
00118
00119
00120
00121
00122
00123
00124
00125 def mevioLinkGeneration(self, context, *arg):
00126 '''Generate a link for the video.
00127 Call example: 'mnvXpath:mevioLinkGeneration(string(link))'
00128 return the url link
00129 '''
00130 webURL = arg[0]
00131 try:
00132 tmpHTML = etree.parse(webURL, etree.HTMLParser())
00133 except Exception, errmsg:
00134 sys.stderr.write(u"Error reading url(%s) error(%s)\n" % (webURL, errmsg))
00135 return webURL
00136
00137 for index in range(len(self.mediaIdFilters)):
00138 mediaId = self.mediaIdFilters[index][0](tmpHTML)
00139 if not len(mediaId):
00140 continue
00141 match = self.mediaIdFilters[index][1].match(mediaId[0])
00142 if match:
00143 videocode = match.groups()
00144 return u'file://%s/nv_python_libs/configs/HTML/mevio.html?videocode=%s' % (common.baseProcessingDir, videocode[0])
00145 else:
00146 return webURL
00147
00148
00149 def mevioTitle(self, context, arg):
00150 '''Parse the title string extract only the title text removing the redundant show name
00151 Call example: 'mnvXpath:mevioTitle(./title/text())'
00152 return the title text
00153 '''
00154 epText = self.mevioEpisode('dummy', arg).text
00155 if epText:
00156 epText = u'Ep %s: ' % epText
00157 else:
00158 epText = u''
00159 seperatorStrs = [[' | ', 'before'], [': ', 'after'], [' - ', 'before']]
00160 for sepStr in seperatorStrs:
00161 if sepStr[1] == 'after':
00162 index = arg[0].find(sepStr[0])
00163 else:
00164 index = arg[0].rfind(sepStr[0])
00165 if index != -1:
00166 if sepStr[1] == 'after':
00167 return u'%s%s' % (epText, arg[0][index+len(sepStr[0]):].strip())
00168 else:
00169 return u'%s%s' % (epText, arg[0][:index].strip())
00170 else:
00171 if epText:
00172 return epText
00173 else:
00174 return arg[0].strip()
00175
00176
00177 def mevioEpisode(self, context, arg):
00178 '''Parse the title string and extract an episode number
00179 Call example: 'mnvXpath:mevioEpisode(./title/text())'
00180 return an episode element
00181 '''
00182 episodeNumber = u''
00183 for index in range(len(self.episodeRegex)):
00184 match = self.episodeRegex[index].match(arg[0])
00185 if match:
00186 episodeNumber = match.groups()
00187 break
00188 return etree.XML(u'<episode>%s</episode>' % episodeNumber)
00189
00190
00191 def mevioCheckIfDBItem(self, context, *arg):
00192 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
00193 has a matching item. This is done to save accessing the Internet when not required.
00194 Call example: 'mnvXpath:mevioCheckIfDBItem(title, description)'
00195 return True if a match was found
00196 return False if a match was not found
00197 '''
00198 return common.checkIfDBItem('dummy', {'feedtitle': 'Technology', 'title': arg[0], 'description': arg[1]})
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217