00001
00002
00003 """
00004 This Python script is intended to find the best possible poster/cover image
00005 for a video.
00006
00007 Uses the following www-sites for scraping for the poster image (in this
00008 order):
00009
00010 movieposter.com
00011 imdb.com
00012
00013 Picks the largest (in pixels) vertical poster.
00014
00015 Written by Pekka Jääskeläinen (gmail: pekka.jaaskelainen) 2007
00016 """
00017
00018 import urllib
00019 import re
00020 import tempfile
00021 import os
00022 import optparse
00023 import sys
00024 import imdbpy
00025
00026 movie_poster_site = True
00027 try:
00028 import BeautifulSoup
00029 except:
00030 print """BeautifulSoup class is required for parsing the MoviePoster site.
00031
00032 In Debian/Ubuntu it is packaged as 'python-beautifulsoup'.
00033
00034 http://www.crummy.com/software/BeautifulSoup/#Download/"""
00035 movie_poster_site = False
00036
00037 imaging_library = True
00038 try:
00039 import Image
00040 except:
00041 print """Python Imaging Library is required for figuring out the sizes of
00042 the fetched poster images.
00043
00044 In Debian/Ubuntu it is packaged as 'python-imaging'.
00045
00046 http://www.pythonware.com/products/pil/"""
00047 imaging_library = False
00048
00049 class PosterImage:
00050 """
00051 Holds a single poster image.
00052
00053 Contains information of the resolution, location of the file in
00054 the file system. etc.
00055 """
00056 width = 0
00057 height = 0
00058 file_name = None
00059 def __init__(self, file_name):
00060 self.file_name = file_name
00061 try:
00062 (self.width, self.height) = Image.open(file_name).size
00063 except:
00064
00065
00066 pass
00067
00068
00069 def is_vertical(self):
00070 return self.width < self.height
00071
00072 def pixels(self):
00073 return self.width*self.height
00074
00075 class PosterFetcher:
00076 """
00077 Base class for poster image fetchers.
00078 """
00079 def fetch(self, title_string, imdb_id = None):
00080 """
00081 Fetch and download to a local temporary filename movie posters
00082 for the given title.
00083
00084 Return empty list in case no images was found.
00085 """
00086 pass
00087
00088 def download_image(self, image_url, extension=None):
00089
00090 (fid, local_filename) = tempfile.mkstemp(extension)
00091 local_file = os.fdopen(fid, "wb")
00092 local_file.write(urllib.urlopen(image_url).read())
00093 local_file.close()
00094 return PosterImage(local_filename)
00095
00096 class MoviePosterPosterFetcher(PosterFetcher):
00097 """
00098 Fetches poster images from movieposter.com
00099 """
00100 def fetch(self, title_string, imdb_id = None):
00101
00102 poster_urls = self.title_search(title_string)
00103 results = 0
00104 max_results = 4
00105 images = []
00106
00107 for url in poster_urls:
00108 image_url = self.find_poster_image_url(url)
00109 if image_url is not None:
00110 images.append(self.download_image(image_url, ".jpg"))
00111 results += 1
00112 if results >= max_results:
00113 break
00114 return images
00115
00116 def find_poster_image_url(self, poster_page_url):
00117 """
00118 Parses the given poster page and returns an URL pointing to the poster
00119 image.
00120 """
00121
00122
00123 soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(poster_page_url))
00124
00125 imgs = soup.findAll('img', attrs={'src':re.compile('/posters/archive/main/.*')})
00126
00127 if len(imgs) == 1:
00128 return "http://eu.movieposter.com/" + imgs[0]['src']
00129 return None
00130
00131
00132 def title_search(self, title_string):
00133 """
00134 Executes a title search on movieposter.com.
00135
00136 Returns a list of URLs leading to the page for the poster
00137 for the given title_string.
00138 """
00139 params = urllib.urlencode(\
00140 {'ti': title_string.encode("ascii", 'replace'),
00141 'pl': 'action',
00142 'th': 'y',
00143 'rs': '12',
00144 'size': 'any'})
00145 opener = urllib.URLopener()
00146 (filename, headers) = \
00147 opener.retrieve("http://eu.movieposter.com/cgi-bin/mpw8/search.pl",
00148 data=params)
00149
00150 f = open(filename, 'r')
00151 results = f.read()
00152 f.close()
00153
00154 return self.parse_title_search_results(results, title_string)
00155
00156 def parse_title_search_results(self, result_page, title_string):
00157 """
00158 Parses the result page of a title search on movieposter.com.
00159
00160 Returns a list of URLs leading to a page with poster for the given title.
00161 """
00162 search = title_string.lower()
00163 soup = BeautifulSoup.BeautifulSoup(result_page)
00164 divs = soup.findAll('div', attrs={'class':'pid'})
00165 urls = []
00166 for div in divs:
00167 links = div.findAll('a')
00168
00169 if len(links) > 0:
00170 for link in links:
00171
00172 spl = link['href'].split(":")
00173 if len(spl) > 1:
00174 if spl[0].lower() == "mailto":
00175 continue
00176 title = link['title'].lower()
00177 if title.endswith("poster"):
00178 title = title[0:-len(" poster")]
00179 if title == search:
00180 urls.append(link['href'])
00181 return urls
00182
00183
00184 class IMDbPosterFetcher(PosterFetcher):
00185 """
00186 Fetches poster images from imdb.com.
00187 """
00188 def fetch(self, title_string, imdb_id = None):
00189
00190 if imdb_id is None:
00191 return []
00192 poster_url = imdbpy.find_poster_url(imdb_id)
00193 if poster_url is not None:
00194 filename = poster_url.split("/")[-1]
00195 (name, extension) = os.path.splitext(filename)
00196 return [self.download_image(poster_url, extension)]
00197 return []
00198
00199 def find_best_posters(title, count=1, accept_horizontal=False, imdb_id=None):
00200
00201 fetchers = [MoviePosterPosterFetcher(), IMDbPosterFetcher()]
00202
00203 posters = []
00204
00205
00206
00207 (series_title, season, episode) = imdbpy.detect_series_title(title)
00208 if series_title is not None and season is not None and episode is not None:
00209 title = series_title.strip()
00210 if title.endswith(","):
00211 title = title[0:-1]
00212
00213
00214 preps = ["the", "a" , "an", "die", "der"]
00215 for prep in preps:
00216 if title.lower().startswith(prep + " "):
00217 title = title[len(prep + " "):]
00218 break
00219
00220 for fetcher in fetchers:
00221 new_posters = fetcher.fetch(title, imdb_id)
00222 for poster in new_posters:
00223 if not accept_horizontal and not poster.is_vertical():
00224 os.remove(poster.file_name)
00225 continue
00226 posters.append(poster)
00227
00228 def size_cmp(a, b):
00229 return cmp(a.pixels(), b.pixels())
00230
00231 posters.sort(size_cmp)
00232 posters.reverse()
00233
00234 for small_poster in posters[count:]:
00235 os.remove(small_poster.file_name)
00236
00237 return posters[0:count]
00238
00239 def main():
00240
00241
00242 p = optparse.OptionParser()
00243 p.add_option('--number', '-n', action="store", type="int", default=1,
00244 help="the count of biggest posters to get")
00245 p.add_option('--all', '-a', action="store_true", default=False,
00246 help="accept all posters, even horizontal ones")
00247 p.add_option('--poster_search', '-P', metavar='IMDB_ID', default=None, dest="imdb_id",
00248 help="Displays a list of URL's to movie posters. The lines are "\
00249 "ranked by descending value. For MythVideo.")
00250
00251 options, arguments = p.parse_args()
00252
00253 title = ""
00254 if len(arguments) != 1:
00255 if options.imdb_id:
00256
00257 metadata = imdbpy.metadata_search(options.imdb_id)
00258 title = imdbpy.parse_meta(metadata, "Title")
00259 else:
00260 print "Please give a video title as argument."
00261 sys.exit(1)
00262 else:
00263 title = arguments[0]
00264
00265 posters = find_best_posters(title, options.number, options.all,
00266 imdb_id=options.imdb_id)
00267
00268 if options.imdb_id is not None:
00269 for poster in posters:
00270 print "file://%s" % poster.file_name
00271 else:
00272 for poster in posters:
00273 print "%s [%dx%d] vertical: %s " % \
00274 (poster.file_name, poster.width,
00275 poster.height, poster.is_vertical())
00276
00277 if __name__ == '__main__':
00278 main()
00279