
##########################################################################
#                                                                        #
#  copyright:          (c) 2003 by Konrad Wojas <wojas@vvtp.tudelft.nl>  #
#                                                                        #
#  This program is free software; you can redistribute it and/or modify  #
#  it under the terms of the GNU General Public License as published by  #
#  the Free Software Foundation; either version 2 of the License, or     #
#  (at your option) any later version.                                   #
#                                                                        #
##########################################################################

"""
IMDb is a Python module for accessing the 
Internet Movie Database (http://www.imdb.com/)

Example:
 
  from IMDb.imdb import IMDb
  imdb = IMDb()
  results = imdb.search('some movie')
  for movie in results:
      print movie.title(), movie.year(), movie.rating()

"""

import urllib
import urllib2
from imdbparsers import *
from imdbcache import IMDbCache
import types

VERSION = (0,1,2)
VERSION_STRING = "0.1.2"

class _SingleMovieException(Exception):

	def __init__(self,url):
		self.url = str(url)
	
	def __str__(self):
		return self.url

class _RedirectHandler(urllib2.HTTPRedirectHandler):

	def http_error_302(self, req, fp, code, msg, hdrs):
		loc = hdrs.get("Location",None)
		raise _SingleMovieException, loc

# ---------------------------------------------------------------------

class IMDb:
	"""
	Main IMDb search class
	"""
	
	def __init__(self,id=None,cachefile=None):
		"""
		id        : value of 'id' cookie (not implemented yet)
		cachefile : path to cache file, if any
		"""
		self.id = id
		self.cache = IMDbCache(cachefile)
	
	def search(self,query):
		"""
		Search for movies matching 'query'
		query : search query
		Result: list of Movie instances
		Note  : only the 'most polular title searches' are returned 
		"""
		if self.cache:
			c = self.cache.getQuery(query)
			if not type(c) is types.NoneType: 
				res = []
				for r in c:
					res.append( Movie( \
					  url=r[2], title=r[0], year=r[1], cache=self.cache ) )
				return res
		qquery = urllib.quote_plus(query)
		requrl = "http://imdb.com/Find?%s" % qquery
		try:
			try:
				opener = urllib2.build_opener(_RedirectHandler)
				req = opener.open(requrl)
				resp = req.read()
				opener.close()
			except _SingleMovieException, e:
				if self.cache:
					self.cache.addQuery(query,
							    [(None,None,
							      str(e))])
				return  [Movie(url=str(e), cache=self.cache)]
			else:
				# print resp
				parser = IMDbSearchParser()
				parser.feed(resp)
				parser.close()
				res = []
				presults = parser.getResults()
				if self.cache:
					self.cache.addQuery(query,presults)
				for r in presults:
					res.append( Movie( \
					  url=r[2], title=r[0], year=r[1], cache=self.cache ) )
				return res
		finally:
			opener.close()
		
# ---------------------------------------------------------------------

class Movie:
	"""
	This class contains the movie details like the
	title, year, genres and rating.

	Accessible attributes:
	- url [read-only]
	"""

	def __init__(self,url,title=None,year=None,cache=None):
		"""
		Only 'url' is required, this is the 
		http://us.imdb.com/Title?nnnnnnnn URL at IMDb
		"""
		self.url = url
		self.cached = 0
		self._cache = cache
		if cache:
			c = cache.getDetails(url)
			if c:
				# print "Cache hit for %s" % url 
				self._data = c
				self.cached = 1
				return
		self._data = {
			"title" : title,
			"year" : year,
			"_cached" : 0
		}

	def _get_details(self):
		try:
			opener = urllib2.build_opener()
			req = opener.open(self.url)
			resp = req.read()
		finally:
			opener.close()
		try:
			parser = IMDbDetailsParser()
			parser.feed(resp)
		finally:
			parser.close()
		# print parser.data
		self._data = parser.data
		self._data["_cached"] = 1
		if self._cache:
			self._cache.addDetails(self.url,self._data)

	def _check_cache(self):
		if not self._data["_cached"]:
			self._get_details()

	def year(self): 
		"""
		Return the movie year as a string
		"""
		if not self._data["year"]: self._check_cache()
		return self._data["year"]
	
	def title(self): 
		"""
		Return the movie title as a string
		"""
		if not self._data["title"]: self._check_cache()
		return self._data["title"]
	
	def rating(self): 
		"""
		Return the movie rating as a string
		Example: "7.9"
		"""
		self._check_cache()
		return self._data.get("rating","")

	def genres(self): 
		"""
		Return the movie genres as a list
		Example: [ "Drama", "Action" ]
		"""
		self._check_cache()
		return self._data.get("genres","")

	def ratingsUrl(self):
		"""
		Return the movie ratings URL as a string
		"""
		return self.url.replace('Title','Ratings')

# ---------------------------------------------------------------------

if __name__=='__main__':
	import sys
	cachefile = "/tmp/imdbcache"
	imdb = IMDb(cachefile=cachefile)
	qq = sys.argv[1:]
	for q in qq:
		print "Query '%s'" % q 
		res = imdb.search(q)
		for r in res:
			print "\t" + r.title(), "-", r.year()
			print "\t\t" + r.url, r.ratingsUrl()
			print "\t\t" + r.rating(), r.genres()



