## This file is part of mlpy.
## FSSun

## Yijun Sun, S. Todorovic, and S. Goodison.
## A Feature Selection Algorithm Capable of Handling Extremely Large
## Data Dimensionality. In Proc. 8th SIAM International Conference on
## Data Mining (SDM08), pp. 530-540, April 2008.
    
## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ['SigmaErrorFS', 'FSSun']

import numpy as np


class SigmaErrorFS(Exception):
    """Sigma Error

    Sigma parameter is too small.
    """

    pass


def norm_w(x, w):
    """Compute sum_i( w[i] * |x[i]| ).
    """

    return (w * np.abs(x)).sum()


def norm(x, n):
    """Compute n-norm.
    """

    return (np.sum(np.abs(x)**n))**(1.0/n)


def kernel(d, sigma):
    """Exponential kernel.

    See page 532.
    """

    return np.exp(-d/sigma)  


def compute_M_H(y):
    """
    Compute sets M[n] = {i:1<=i<=N, y[i]!=y[n]}.
    Compute sets H[n] = {i:1<=i<=N, y[i]==y[n], i!=n}.
    """
    
    M, H = [], []
    for n in np.arange(y.shape[0]):
        Mn = np.where(y != y[n])[0].tolist()
        M.append(Mn)
        Hn = np.where(y == y[n])[0]
        Hn = Hn[Hn != n].tolist()
        H.append(Hn)
    return (M, H)
    

def compute_distance_kernel(x, w, sigma):
    """Compute matrix dk[i][j] = f(||x[i] - x[j]||_w).

    See step 3 in Figure 2 at page 534.
    """
    
    d = np.zeros((x.shape[0], x.shape[0]), dtype=np.float)
    for i in np.arange(x.shape[0]):
        for j in np.arange(i + 1, x.shape[0]):
            d[i][j] = norm_w(x[i]-x[j], w)
            d[j][i] = d[i][j]
    dk = kernel(d, sigma)
   
    return dk


def compute_prob(x, dist_k, i, n, indices):
    """
    See eqs. (2.4), (2.5) at page 532.
    """

    den = dist_k[n][indices].sum()    
    if den == 0.0:
        raise SigmaError("sigma (kernel parameter) too small")
    
    return dist_k[n][i] / den


def fun(z, v, lmbd):
    """See eq. (2.8) at page 533.    
    """

    tmp = 0.0
    for n in np.arange(z.shape[0]):
        tmp += np.log(1.0 + np.exp(-(v**2 * z[n]).sum()))

    return tmp + (lmbd * norm(v, 2)**2)
    

def grad_fun(z, v, lmbd):
    """See eq. (2.9) at page 533.
    """

    tmp = np.zeros(z.shape[1], dtype=np.float)
    for n in np.arange(z.shape[0]):
        t = np.exp(-(v**2 * z[n]).sum())
        tmp += t / (1.0 + t) * z[n]

    return (lmbd - tmp) * v


def update_w(w, z, lmbd, eps, alpha0, c, rho):
    """
    See eq. 2.8, 2.9 at Page 533.

    Parameters:
     w: v^2 [1darray]
     z: z [2darray]
     lmbd: regularization parameter [float]
     eps: termination tolerance for Steepest Descent [0 < eps << 1]
     alpha0: initial step length [usually 1.0] for line search
     c: costant [0 < c < 1/2] for line search
     rho: alpha coefficient [0 < rho < 1] for line search
    

    Steepest Descent Method
    -----------------------

    Di Wenyu Sun,Ya-xiang Yuan.
    Optimization theory and methods: nonlinear programming. Page 120.
    
    Backtracking Line Search
    ------------------------
    
    J. Nocedal, S. J. Wright. Numerical Optimization.
    Page 41, 42 [Procedure 3.1].
    """

    v = np.sqrt(w)

    # Steepest (Gradient) Descent Method  
    
    delta = grad_fun(z, v, lmbd)       
    while True:
        
        fa = c * np.inner(-delta, delta)
        fun(z, v, lmbd)
        
        # Backtracking Line Search
        alpha = alpha0
        
        while not fun(z, v-(alpha*delta), lmbd) <= (fun(z, v, lmbd) + (alpha * fa)):
            alpha *= rho
     
        v_new = v - (alpha * delta)               
        delta = grad_fun(z, v_new, lmbd)

        if norm(delta, 2) <= eps:
            break
        
        v = v_new.copy()
        
    return v_new**2
    

def compute_w(x, y, w, M, H, sigma, lmbd, eps, alpha0, c, rho):
    """
    See Step 3, 4, 5 and 6 in Figure 2 at page 534.
    """
    
    z = np.empty((x.shape[0], x.shape[1]), dtype=np.float)
    dist_k = compute_distance_kernel(x, w, sigma)
    for n in np.arange(x.shape[0]):        
        m_n = np.zeros(x.shape[1], dtype=np.float)
        h_n = np.zeros(x.shape[1], dtype=np.float)
        for i in M[n]:
            a_in = compute_prob(x, dist_k, i, n, M[n])
            m_in = np.abs(x[n] - x[i])
            m_n += a_in * m_in
        for i in H[n]:
            b_in = compute_prob(x, dist_k, i, n, H[n])
            h_in = np.abs(x[n] - x[i])
            h_n += b_in * h_in        
        z[n] = m_n - h_n
        
    return update_w(w, z, lmbd, eps, alpha0, c, rho)


def compute_fssun(x, y, T, sigma, theta, lmbd, eps, alpha0, c, rho):
    """
    Figure 2 at page 534.
    """

    w_old = np.ones(x.shape[1])
    M, H = compute_M_H(y)
    
    for t in range(T):

        w = compute_w(x, y, w_old, M, H, sigma, lmbd, eps, alpha0, c, rho) 
        stp = norm(w - w_old, 2)

        if stp < theta:
            break
        w_old = w
        
    return (w, t + 1)


class FSSun:
    """Sun Algorithm for feature weighting/selection
    """
   
    def __init__(self, T=1000, sigma=1.0, theta=0.001, lmbd=1.0, eps=0.001, alpha0=1.0, c=0.01, rho=0.5):
        """
        Initialize the FSSun class

        :Parameters:
          T : int (> 0)
            max loops
          sigma : float (> 0.0)
            kernel width
          theta : float (> 0.0)
            convergence parameter
          lmbd : float
            regularization parameter
          eps : float (0 < eps << 1)
            termination tolerance for steepest descent method
          alpha0 : float (> 0.0)
            initial step length (usually 1.0) for line search
          c : float (0 < c < 1/2)
            costant for line search
          rho : flaot (0 < rho < 1)
            alpha coefficient for line search
        """

        if T <= 0:
            raise ValueError("T (max loops) must be > 0")
        if sigma <= 0.0:
            raise ValueError("sigma (kernel parameter) must be > 0.0")
        if theta <= 0.0:
            raise ValueError("theta (convergence parameter) must be > 0.0")
         
        self.__T = T
        self.__sigma = sigma
        self.__theta = theta
        self.__lmbd = lmbd
        self.__eps = eps
        self.__alpha0 = alpha0
        self.__c = c
        self.__rho = rho
        
        self.loops = None

    def weights(self, x, y):
        """
        Compute the feature weights

        :Parameters:
          x : 2d ndarray float (samples x feats)
            training data
          y : 1d ndarray integer (-1 or 1)
            classes

        :Returns:     
          fw : 1d ndarray float
            feature weights

        :Attributes:
          FSSun.loops : int
            number of loops

        :Raises:
          ValueError
            if classes are not -1 or 1
          SigmaError
            if sigma parameter is too small
        """
        
        if np.unique(y).shape[0] != 2:
            raise ValueError("FSSun algorithm works only for two-classes problems")

        w, self.loops = compute_fssun(x, y, self.__T, self.__sigma, self.__theta, self.__lmbd,
                                      self.__eps, self.__alpha0, self.__c, self.__rho)
        return w


