#! /usr/bin/env python
#
# FIXME: strip "TryExec" from the extracted menu files (and noDisplay)
#        
# TODO:
# - emacs21 ships it's icon in emacs-data, deal with this
# - some stuff needs to be blacklisted (e.g. gnome-about)
# - lots of packages have there desktop file in "-data", "-comon" (e.g. anjuta)
# - lots of packages have multiple desktop files for the same application
#   abiword, abiword-gnome, abiword-gtk

import os
import tarfile
import sys
import apt
import apt_pkg
import apt_inst
#import xdg.Menu
import os.path
import re
import tempfile
import subprocess
import string
import shutil
import urllib
import logging


# these globals here suck
tmpdir =  tempfile.mkdtemp()
blacklist = "blacklist.cfg"
blacklist_desktop = "blacklist_desktop.cfg"
renamecfg = "rename.cfg"

# packages we have already seen 
pkgs_seen = set()

# pkgs that shouldn't be written (e.g. gnome-about)
pkgs_blacklisted = set()

# desktop files that are not wanted
desktop_blacklist = set()

# a mapping to transform certain detections to a new name
# (e.g.abiword-plugins -> abiword-gnome)
pkgs_transform = {}


# a dictionary with "$arch" -> set() of packages mapping
# this makes it easy to analyse what packages are only
# available in certain arches
pkgs_per_arch = {}


def getMemberFromAr(arFile, member):
  """ helper to extract the files """
  dataPath = tmpdir
  olddir = os.getcwd()
  os.chdir(dataPath)
  command = ["ar","x",arFile, member]
  subprocess.call(command)
  memberpath = dataPath+"/"+member
  os.chdir(olddir)
  if os.path.exists(memberpath):
    return memberpath
  else:
    return None

def getFiles(dataPath, pkgname, section, outputdir):
  """ actually extract the files from a data.tar.gz of a package """
  try:
    os.makedirs(os.path.join(outputdir, "icons"))
  except OSError:
    pass

  try:
    dataFile = tarfile.open(dataPath)
  except:
    logging.error("Couldn't open tarball. Package: '%s'" % dataPath)
    return

  desktopPaths = []
  iconPaths = []
  # get the available desktop files
  try:
    for filename in dataFile.getnames():
      if filename.endswith(".desktop") and \
         not os.path.basename(filename) in desktop_blacklist:
        desktopPaths.append(filename)
      if "usr/share/pixmaps" in filename:
        iconPaths.append(filename)
  except:
    logging.error("Choked on tarball. Package: '%s'" % package)
    return # too many bad debs. take this line out if you fix that

  for path in [x for x in desktopPaths if ("usr/share/applications" in x) or
                                          ("usr/share/gnome/apps" in x)
               ]:
    try:
      desktopfile = dataFile.extractfile(path)
    except KeyError, e:
      logging.error("Failed to extract '%s' from '%s' (%s)" % (path,
                                                               dataFile.name,
                                                               e))
      continue
    desktop = os.path.join(outputdir, os.path.split(path)[-1])

    # extract the icon
    iconName = None
    newIconName = None
    for line in desktopfile.readlines():
      line = string.strip(line)
      if line.startswith("Icon="):
        iconName = line[line.index("=")+1:]
        logging.debug("Package '%s' needs icon '%s'" % (pkgname, iconName))

    if iconName != None:
      if os.path.split(iconName)[0] != '':
        newIconName = iconName.replace("/", "_")
        try:
          if iconName.startswith('/'):
            extractName = iconName[1:]
          else:
            extractName = "usr/share/pixmaps/" + iconName
          iconFile = dataFile.extractfile(extractName)
          outicon = open(os.path.join(outputdir, "icons", iconName.replace("/", "_")), "w")
          outicon.write(iconFile.read())
          outicon.close()
          iconFile.close()
          logging.debug("wrote iconfile '%s' for '%s' " % (os.path.basename(outicon.name), pkgname))
        except Exception, e:
          logging.error("ERROR: Icon '%s' for '%s' could not be obtained. (%s)" % (iconName,pkgname,e))
      # the trouble here is that it extracts extraceName*
      # this gives loads of stupid stuff for e.g. kwin-16.png where
      # we only want kwin. we change the extract name to add a "."
      # so we get effectively "$extractname.*"
      if not "." in iconName:
        iconName += "."
      for iconPath in [ipath for ipath in iconPaths if (iconName in ipath) or
                                                       (not "usr/share/pixmaps" in ipath)
                       ]:
        try:
          iconFile = dataFile.extractfile(iconPath)
          outicon = open(os.path.join(outputdir, "icons", os.path.split(iconPath)[-1]), "w")
          outicon.write(iconFile.read())
          outicon.close()
          iconFile.close()
          logging.debug("wrote iconfile '%s' for '%s' (from iconpath)" % (os.path.basename(outicon.name), pkgname))
        except Exception,e:
          logging.error("Icon for '%s' could not be obtained (%s)" % (pkgname,e))

    # now check for supicious pkgnames (FIXME: make this not hardcoded)
    if "-common" in pkgname or "-data" in pkgname:
      logging.warning("'%s' looks wrong, trying to correct" % pkgname)
      pkg = cache[pkgname]
      parentpkg = pkgname[0:pkgname.rindex("-")]
      if cache.has_key(parentpkg):
        logging.warning("Corrected to '%s'" % parentpkg)
        pkgname = parentpkg

    # now write out the file
    logging.debug("Wrote desktop file '%s' for arch '%s'" % (desktopfile.name,
                                                             arch))
    outfile = open(desktop, "w")
    desktopfile.seek(0)
    for line in desktopfile.readlines():
      if newIconName != None and line.startswith("Icon="):
        line = "Icon=%s\n" % newIconName
      if not line.endswith("\n"):
        line += "\n"
      outfile.write(line)
    outfile.write("X-AppInstall-Package=%s\n" % pkgname)
    outfile.write("X-AppInstall-Section=%s\n" % section)
    outfile.close()

    # close the desktop file
    desktopfile.close()


def processDeb(debPath, pkgname, section,
               outputdir=os.path.join(os.getcwd(), "menu-data")):
  """ extract the desktop file and the icons from a deb """
  logging.debug("processing: %s" % debPath)
  datafile = getMemberFromAr(debPath, "data.tar.gz")
  if datafile == None:
    logging.error("error geting data.tar.gz from %s" % debPath)
    return
  getFiles(datafile, pkgname, section, outputdir)
  os.remove(datafile)

def inspectDeb(filename):
  """ check if the deb is interessting for us (our arch etc) """
  logging.debug("inspectDeb '%s'"% filename)
  m = re.match(".*/(.*)_(.*)_(.*).deb", filename)
  pkgname = m.group(1)
  ver = m.group(2)
  # fix the quoting
  ver = urllib.unquote(ver)
  pkgarch = m.group(3)
  
  # certain pkgs are blacklisted
  if pkgname in pkgs_blacklisted:
    logging.warning("skipping blacklisted pkg: '%s'" % pkgname)
    return
  if pkgs_transform.has_key(pkgname):
    logging.warning("transforming '%s' to '%s'" % (pkgname, pkgs_transform[pkgname]))
    pkgname = pkgs_transform[pkgname]

  # not for our arch
  if pkgarch != "all" and arch != pkgarch:
    logging.debug("Skipping because of not-for-us arch '%s'" % pkgarch)
    return
    
  # check if the deb is in the current distro at all
  candVer = "xxx"
  if cache.has_key(pkgname):
    candVer = cache[pkgname].candidateVersion
    # strip the epoch
    if candVer and ":" in candVer:
      candVer = candVer.split(":")[1]
  if candVer != ver:
    logging.debug("Skipping because '%s' it's not in our distro release"%pkgname)
    return

  # add it to the arch-table now, even if it might be known already
  pkgs_per_arch[pkgarch].add(pkgname)
  # we have seen this package already (probably for a different arch)    
  if pkgname in pkgs_seen:
    logging.debug("Skipping because we have it in pkgs_seen")
    return
  
  # valid deb
  section = cache[pkgname].section
  if not "/" in section:
    component = "main"
  else:
    component = section[0:section.find("/")]
  #print "%s in: %s" % (filename, component)
  logging.debug("Found interessting deb '%s' in section '%s'" % (filename, component))

  # found somethat worth looking at
  processDeb(filename, pkgname, component)
  pkgs_seen.add(pkgname)

def dir_walk(cache, dirname, names):
  #print "Looking at: %s" % dirname
  logging.debug("Entering dir: '%s' " % dirname)
  for filename in names:
    if filename.endswith(".deb"):
      inspectDeb(dirname+"/"+filename)



if __name__ == "__main__":

  logging.basicConfig(level=logging.DEBUG,
                      filename="menu-data-extract.log",
                      format='%(asctime)s %(levelname)s %(message)s',
                      filemode='w')

  try:
    pooldir = sys.argv[1]
  except:
    print "Usage: getMenuData.py pooldir"
    sys.exit()

  # run this once for each arch, it will skip packages already seen
  pkgs_per_arch["all"] = set()
    
  if os.path.exists(blacklist):
    logging.info("using blacklist: '%s'" % blacklist)
    for line in open(blacklist).readlines():
      line = line.strip()
      if line != "" and not line.startswith("#"):
        logging.debug("blacklisting: '%s'" % line.strip())
        pkgs_blacklisted.add(line.strip())

  if os.path.exists(blacklist_desktop):
    logging.info("using blacklist desktop: '%s'" % blacklist_desktop)
    for line in open(blacklist_desktop).readlines():
      line = line.strip()
      if line != "" and not line.startswith("#"):
        logging.debug("blacklisting (desktop file): '%s'" % line.strip())
        desktop_blacklist.add(line.strip())

  if os.path.exists(renamecfg):
    logging.info("using rename: '%s'" % renamecfg)
    for line in open(renamecfg).readlines():
      line = line.strip()
      if line != "" and not line.startswith("#"):
        (oldname,newname) = string.split(string.strip(line))
        logging.debug("renaming: %s -> %s" % (oldname,newname))
        pkgs_transform[oldname] = newname
    
  for arch in ["i386","amd64", "powerpc"]:

    # fake a $arch machine
    # FIXME: we want arch-independent stuff in the long run
    #        e.g. for Mac-On-Linux (MoL)
    apt_pkg.Config.Set("APT::Architecture",arch)
    apt_pkg.Config.Set("Dir::state","./apt/")
    apt_pkg.Config.Set("Dir::Etc","./apt")

    # init the set for the given arch
    pkgs_per_arch[arch] = set()

    try:
      os.makedirs("apt/lists/partial")
    except OSError:
      pass

    logging.info("Starting extraction in %s for %s" % (pooldir,arch))
    cache = apt.Cache(apt.progress.OpTextProgress())
    try:
      prog = apt.progress.TextFetchProgress() 
    except:
      prog = apt.progress.FetchProgress()

    # update the cache
    cache.update(prog)
    cache.open(apt.progress.OpTextProgress())


    # now do the postmans walk!
    os.path.walk(pooldir, dir_walk, cache)

  # now analyze the result
  #logging.debug(pkgs_per_arch)
  only_in_i386 = pkgs_per_arch["i386"] - (pkgs_per_arch["powerpc"]|pkgs_per_arch["amd64"])
  logging.debug("only on i386: %s" % only_in_i386)
  only_in_amd64 = pkgs_per_arch["amd64"] - (pkgs_per_arch["powerpc"]|pkgs_per_arch["i386"])
  logging.debug("only on amd64: %s" % only_in_amd64)
  only_in_powerpc = pkgs_per_arch["powerpc"] - (pkgs_per_arch["amd64"]|pkgs_per_arch["i386"])
  logging.debug("only on powerpc: %s" % only_in_powerpc)
    
  # be nice, clean-up
  if tmpdir != None and tmpdir != "":
    shutil.rmtree(tmpdir)
