

//////////////////////////////////////////////////////////////////
//                                                              //
//           PLINK (c) 2005-2007 Shaun Purcell                  //
//                                                              //
// This file is distributed under the GNU General Public        //
// License, Version 2.  Please see the file COPYING for more    //
// details                                                      //
//                                                              //
//////////////////////////////////////////////////////////////////


#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <cmath>
#include <vector>
#include <map>

#include "plink.h"
#include "options.h"
#include "helper.h"

extern ofstream LOG;
using namespace std;

void Plink::scoreIndividuals()
{
  
  printLOG("Reading set of predictors from [ " + par::score_risk_file + " ]\n");
  
  checkFileExists(par::score_risk_file);
  ifstream PROFIN;
  PROFIN.open( par::score_risk_file.c_str(), ios::in );
  
  map<string,int> mlocus;
  for(int l=0; l<nl_all;l++)
    mlocus.insert(make_pair(locus[l]->name,l));


  map<int,double> scores;
  map<int,bool> allele1;

  int cnt1 = 0, cnt2 = 0;

  while ( ! PROFIN.eof() )
    {
      
      // Format assumed: SNP allele score
      
      string snp, allele, sscore;
      double score;
      
      PROFIN >> snp >> allele >> sscore;

      if ( sscore=="" )
	continue;

      if ( ! from_string<double>( score, sscore , std::dec))
	error("Problem reading predictors: non-numeric value found for {SNP} {allele} {value}:\n{"+snp+"} {"+allele+"} {"+sscore+"}");
      
      ++cnt1;

      map<string,int>::iterator ilocus = mlocus.find(snp);
      
      // SNP not found
      if ( ilocus == mlocus.end() )
	continue;

      int l = ilocus->second;

      // Allele not found
      if ( allele == locus[l]->allele1 )
	{
	  scores.insert(make_pair(l,score));
	  allele1.insert(make_pair(l,false));
	  ++cnt2;
	}
      else if ( allele == locus[l]->allele2 )
	{
	  scores.insert(make_pair(l,score));
	  allele1.insert(make_pair(l,true));
	  ++cnt2;
	}

    }

  PROFIN.close();

  printLOG("Read " + int2str(cnt1) + " predictors, of which " + int2str(cnt2) + " mapped to present SNPs\n");

  // Now score risk for each individual

  printLOG("Writing profiles to [ " + par::output_file_name + ".profile ]");;


  ///////////////////////////////
  // Calculate

  vector_t profile;
  vector<int> cnt;
  
  calculateProfile(scores,allele1,profile,cnt);
  

  ///////////////////////////////
  // Report 

  ofstream PROFOUT;
  string f = par::output_file_name + ".profile";
  PROFOUT.open( f.c_str(), ios::out );

  PROFOUT << setw(par::pp_maxfid) << "FID" << " " 
	  << setw(par::pp_maxiid) << "IID" << " "
	  << setw(6) << "PHENO" << " " 
	  << setw(6) << "CNT" << " "
	  << setw(8) << "SCORE" << "\n";
  
  for ( int i=0; i<n; i++ )
    {
      Individual * person = sample[i];

      PROFOUT << setw(par::pp_maxfid) << person->fid << " " 
	      << setw(par::pp_maxiid) << person->iid << " "
	      << setw(6) << person->phenotype << " " 
	      << setw(6) << cnt[i] << " "
	      << setw(8) << profile[i] << "\n";
    }
  
  PROFOUT.close();

}


void Plink::calculateProfile(map<int,double> & scores, 
			     map<int,bool> & allele1, 
			     vector_t & profile,
			     vector<int> & count)
{  
  

  // Generate a vector of scores, one for each individual, given then
  // scoring set (and allele direction) for a set of SNPs

  profile.resize(n);
  count.resize(n);

  for (int i=0; i<n; i++)
    {

      Individual * person = sample[i];
      
      map<int,double>::iterator i1 = scores.begin();
      map<int,bool>::iterator i2 = allele1.begin();
      
      double score = 0;
      int cnt = 0;

      while ( i1 != scores.end() )
	{
	  
	  int l = i1->first;
	  bool a1 = i2->second;

	  bool s1 = par::SNP_major ? SNP[l]->one[i] : person->one[l];
	  bool s2 = par::SNP_major ? SNP[l]->two[i] : person->two[l];

	  // Individual is missing this genotype
	  if ( s1 && ! s2 ) 
	    {
	      ++i1;
	      ++i2;
	      continue;
	    }


	  // Currently, just an allelic scoring: we could extend this 
	  // to genotypes, dominant/recessive models,


	  if ( par::chr_haploid[l] || 
	       ( par::chr_sex[l] && person->sex ) )
	    {
	      // A single copy
	      
	      if ( i2->second ) 
		{
		  if ( s1 ) 
		    score += i1->second;
		}
	      else
		{
		  if ( !s1 ) 
		    score += i1->second;
		}

	      ++cnt;	      
	    }
	  else // .. autosomal
	    {
	      if ( i2->second ) 
		{
		  if ( s1 ) 
		    score += i1->second;
		  if ( s2 ) 
		    score += i1->second;
		}
	      else
		{
		  if ( !s1 ) 
		    score += i1->second;
		  if ( !s2 ) 
		    score += i1->second;	      
		}
	      cnt += 2;	      
	    }

	  ++i1;
	  ++i2;

	}
      
      
      // Get average per seen loci
      
      if ( cnt>0 ) 
	score /= (double)cnt;

      // Save for this individual

      profile[i] = score;
      count[i] = cnt;
      
    }
  

  return;
}
