// $Id: SmilesWrite.cpp 2004 2012-03-26 03:12:37Z glandrum $
//
//  Copyright (C) 2002-2010 Greg Landrum and Rational Discovery LLC
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include "SmilesWrite.h"
#include <GraphMol/RDKitBase.h>
#include <RDGeneral/types.h>
#include <GraphMol/Canon.h>
#include <boost/lexical_cast.hpp>
#include <boost/foreach.hpp>
#include <sstream>
#include <map>
#include <list>

namespace RDKit{

  namespace SmilesWrite{
    const int atomicSmiles[] = {5,6,7,8,9,15,16,17,35,53,-1};
    bool inOrganicSubset(int atomicNumber){
      unsigned int idx=0;
      while( atomicSmiles[idx]<atomicNumber &&
             atomicSmiles[idx]!=-1){
        ++idx;
      }
      if(atomicSmiles[idx]==atomicNumber){
        return true;
      }
      return false;
    }


    std::string GetAtomSmiles(const Atom *atom,bool doKekule,const Bond *bondIn){
      PRECONDITION(atom,"bad atom");
      INT_VECT atomicSmilesVect(atomicSmiles,
                                atomicSmiles+(sizeof(atomicSmiles)-1)/sizeof(atomicSmiles[0]));
      std::stringstream res;
      int fc = atom->getFormalCharge();
      int num = atom->getAtomicNum();
      double massDiff=fabs(PeriodicTable::getTable()->getAtomicWeight(num) -
                           atom->getMass());
      static double massTol=0.001;

      bool needsBracket=false;
      std::string symb;
      symb = atom->getSymbol();
      if(inOrganicSubset(num)){
        // it's a member of the organic subset
        //if(!doKekule && atom->getIsAromatic() && symb[0] < 'a') symb[0] -= ('A'-'a');

        // -----
        // figure out if we need to put a bracket around the atom,
        // the conditions for this are:
        //   - formal charge specified
        //   - the atom has a nonstandard valence
        //   - chirality present and writing isomeric smiles
        //   - non-default isotope and writing isomeric smiles
        //   - atom-map information present
        const INT_VECT &defaultVs=PeriodicTable::getTable()->getValenceList(num);
        int totalValence= atom->getExplicitValence()+atom->getImplicitValence();
        bool nonStandard;
        nonStandard = std::find(defaultVs.begin(),defaultVs.end(),
                                totalValence)==defaultVs.end();
        // another type of "nonstandard" valence is an aromatic N with
        // explicit Hs indicated:
        if(num==7 && atom->getIsAromatic() && atom->getNumExplicitHs()){
          nonStandard=true;
        }

        if(atom->getNumRadicalElectrons()){
          nonStandard=true;
        }

        if(fc || nonStandard){
          needsBracket=true;
        }
        if(atom->getOwningMol().hasProp("_doIsoSmiles")){
          if( atom->getChiralTag()!=Atom::CHI_UNSPECIFIED ){
            needsBracket = true;
          } else if(massDiff>massTol){
            needsBracket=true;
          }
        }
        if(atom->hasProp("molAtomMapNumber")){
          needsBracket=true;
        }
      } else {
        needsBracket = true;
      }
      if( needsBracket ) res << "[";

      if(massDiff>massTol && atom->getOwningMol().hasProp("_doIsoSmiles")){
        int iMass=static_cast<int>(atom->getMass()+.1);
        res <<iMass;
      }
      // this was originally only done for the organic subset,
      // applying it to other atom-types is a fix for Issue 3152751: 
      if(!doKekule && atom->getIsAromatic() && symb[0]>='A' && symb[0] <= 'Z'){
        symb[0] -= ('A'-'a');
      }
      res << symb;

      bool chiralityIncluded=false;
      if(atom->getOwningMol().hasProp("_doIsoSmiles") &&
         atom->getChiralTag()!=Atom::CHI_UNSPECIFIED ){
        INT_LIST trueOrder;
        atom->getProp("_TraversalBondIndexOrder",trueOrder);
#ifdef VERBOSE_CANON
        std::cout << "\tatom: " << atom->getIdx() << " | ";
        std::copy(trueOrder.begin(),trueOrder.end(),
                  std::ostream_iterator<int>(std::cout,", "));
        std::cout << std::endl;
        std::cout << "\t ---- | " ;
        ROMol::OEDGE_ITER beg,end;
        boost::tie(beg,end) = atom->getOwningMol().getAtomBonds(atom);
        ROMol::GRAPH_MOL_BOND_PMAP::type pMap = atom->getOwningMol().getBondPMap();
        while(beg!=end){
          std::cout <<pMap[*beg]->getIdx()<<", ";
          ++beg;
        }
        std::cout << std::endl;
#endif    
        int nSwaps;
#if 0
        if( !atom->hasProp("_CIPCode") && atom->hasProp("_CIPRank") ) {
          // this is a special case where the atom has stereochem indicated
          // but isn't a chiral center. This can happen in ring stereochem
          // situations. Instead of using the bond indices to collect
          // perturbation order (as is normal), we use the priorities of the
          // atoms at the end of the bonds
          INT_LIST ref;
          ROMol::OEDGE_ITER beg,end;
          boost::tie(beg,end) = atom->getOwningMol().getAtomBonds(atom);
          ROMol::GRAPH_MOL_BOND_PMAP::type pMap = atom->getOwningMol().getBondPMap();
          while(beg!=end){
            const Atom *endAtom=pMap[*beg]->getOtherAtom(atom);
            int cipRank=0;
            if(endAtom->hasProp("_CIPRank")){
              endAtom->getProp("_CIPRank",cipRank);
            }
            ref.push_back(cipRank);
            ++beg;
          }
          BOOST_FOREACH(int &oIdx,trueOrder){
            const Atom *endAtom=atom->getOwningMol().getBondWithIdx(oIdx)->getOtherAtom(atom);
            int cipRank=0;
            if(endAtom->hasProp("_CIPRank")){
              endAtom->getProp("_CIPRank",cipRank);
            }
            oIdx=cipRank;
          }
#if 0
          BOOST_LOG(rdErrorLog)<<" ****"<<std::endl;
          std::copy(ref.begin(),ref.end(),std::ostream_iterator<int>(std::cerr," "));
          std::cerr<<std::endl;
          std::copy(trueOrder.begin(),trueOrder.end(),std::ostream_iterator<int>(std::cerr," "));
          std::cerr<<std::endl;
          BOOST_LOG(rdErrorLog)<<" ****"<<std::endl;
#endif
          nSwaps=static_cast<int>(countSwapsToInterconvert(ref,trueOrder));
        } else {
          nSwaps =  atom->getPerturbationOrder(trueOrder);
        }
#else
        if( !atom->hasProp("_CIPCode") && atom->hasProp("_CIPRank") &&
            !atom->getOwningMol().hasProp("_ringSteroWarning") ){
          BOOST_LOG(rdWarningLog)<<"Warning: ring stereochemistry detected. The output SMILES is not canonical."<<std::endl;
          atom->getOwningMol().setProp("_ringStereoWarning",true,true);
        }
        nSwaps =  atom->getPerturbationOrder(trueOrder);
#endif
        if(atom->getDegree()==3 && !bondIn){
          // This is a special case. Here's an example:
          //   Our internal representation of a chiral center is equivalent to:
          //     [C@](F)(O)(C)[H]
          //   we'll be dumping it without the H, which entails a reordering:
          //     [C@@H](F)(O)C
          ++nSwaps;
        }
        //BOOST_LOG(rdErrorLog)<<">>>> "<<atom->getIdx()<<" "<<nSwaps<<" "<<atom->getChiralTag()<<std::endl;
        std::string atStr="";
        switch(atom->getChiralTag()){
        case Atom::CHI_TETRAHEDRAL_CW:
          if(!(nSwaps%2))
            atStr = "@@";
          else
            atStr = "@";
          chiralityIncluded=true;
          break;
        case Atom::CHI_TETRAHEDRAL_CCW:
          if(!(nSwaps%2))
            atStr = "@";
          else
            atStr = "@@";
          chiralityIncluded=true;
          break;
        default:
          break;
        }
        res << atStr;
      }

      if(needsBracket){
        unsigned int totNumHs=atom->getTotalNumHs();
        if(totNumHs > 0){
          res << "H";
          if(totNumHs > 1) res << totNumHs;
        }
        if(fc > 0){
          res << "+";
          if(fc > 1) res << fc;
        } else if(fc < 0) {
          res << "-";
          if(fc < -1) res << -fc;
        }
    
        if(atom->hasProp("molAtomMapNumber")){
          int mapNum;
          atom->getProp("molAtomMapNumber",mapNum);
          res<<":"<<mapNum;
        }
        res << "]";
      }

      // If the atom has this property, the contained string will
      // be inserted directly in the SMILES:
      if(atom->hasProp("_supplementalSmilesLabel")){
        std::string label;
        atom->getProp("_supplementalSmilesLabel",label);
        res << label;
      }

      return res.str();
    }

    std::string GetBondSmiles(const Bond *bond,int atomToLeftIdx,bool doKekule,bool allBondsExplicit){
      PRECONDITION(bond,"bad bond");
      if(atomToLeftIdx<0) atomToLeftIdx=bond->getBeginAtomIdx();

      std::stringstream res;
      bool aromatic=false;
      if( !doKekule &&
          (bond->getBondType() == Bond::SINGLE ||
           bond->getBondType() == Bond::DOUBLE ||
           bond->getBondType() == Bond::AROMATIC) ){
        Atom *a1,*a2;
        a1 = bond->getOwningMol().getAtomWithIdx(atomToLeftIdx);
        a2 = bond->getOwningMol().getAtomWithIdx(bond->getOtherAtomIdx(atomToLeftIdx));
        if((a1->getIsAromatic() && a2->getIsAromatic()) &&
           (a1->getAtomicNum()||a2->getAtomicNum())) aromatic=true;
      }

      Bond::BondDir dir= bond->getBondDir();

      if(bond->hasProp("_TraversalRingClosureBond")){
        if(dir==Bond::ENDDOWNRIGHT) dir=Bond::ENDUPRIGHT;
        else if(dir==Bond::ENDUPRIGHT) dir=Bond::ENDDOWNRIGHT;
        bond->clearProp("_TraversalRingClosureBond");
      }
  
      switch(bond->getBondType()){
      case Bond::SINGLE:
        if( dir != Bond::NONE && dir != Bond::UNKNOWN ){
          switch(dir){
          case Bond::ENDDOWNRIGHT:
            if(bond->getOwningMol().hasProp("_doIsoSmiles"))  res << "\\";
            break;
          case Bond::ENDUPRIGHT:
            if(bond->getOwningMol().hasProp("_doIsoSmiles"))  res << "/";
            break;
          default:
            break;
          }
        } else {
          // if the bond is marked as aromatic and the two atoms
          //  are aromatic, we need no marker (this arises in kekulized
          //  molecules).
          // FIX: we should be able to dump kekulized smiles
          //   currently this is possible by removing all
          //   isAromatic flags, but there should maybe be another way
          if(allBondsExplicit) res<<"-";
          else if( aromatic && !bond->getIsAromatic() ) res << "-";
        }
        break;
      case Bond::DOUBLE:
        // see note above
        if( !aromatic || !bond->getIsAromatic() ) res << "=";
        break;
      case Bond::TRIPLE: res << "#"; break;
      case Bond::AROMATIC:
        if ( dir != Bond::NONE && dir != Bond::UNKNOWN ){
          switch(dir){
          case Bond::ENDDOWNRIGHT:
            if(bond->getOwningMol().hasProp("_doIsoSmiles"))  res << "\\";
            break;
          case Bond::ENDUPRIGHT:
            if(bond->getOwningMol().hasProp("_doIsoSmiles"))  res << "/";
            break;
          default:
            break;
          }
        }
        if(allBondsExplicit) res << ":";
        else if(!aromatic) res << ":";
        break;
      case Bond::DATIVE:
        if(atomToLeftIdx>=0 &&
           bond->getBeginAtomIdx()==static_cast<unsigned int>(atomToLeftIdx) ) res << ">";
        else res << "<";
        break;
      default:
        res << "?";
      }
      return res.str();
    }

    std::string FragmentSmilesConstruct(ROMol &mol,int atomIdx,
                                        std::vector<Canon::AtomColors> &colors,
                                        INT_VECT &ranks,bool doKekule,bool canonical,
                                        bool allBondsExplicit){

      Canon::MolStack molStack;
      // try to prevent excessive reallocation
      molStack.reserve(mol.getNumAtoms()+
                       mol.getNumBonds());
      std::stringstream res;

      std::map<int,int> ringClosureMap;
      int ringIdx,closureVal;
      if(!canonical) mol.setProp("_StereochemDone",1);

      Canon::canonicalizeFragment(mol,atomIdx,colors,ranks,
                                    molStack);
      Bond *bond=0;
      BOOST_FOREACH(Canon::MolStackElem mSE,molStack){
        switch(mSE.type){
        case Canon::MOL_STACK_ATOM:
          //std::cout<<"\t\tAtom: "<<mSE.obj.atom->getIdx()<<std::endl;
          res << GetAtomSmiles(mSE.obj.atom,doKekule,bond);
          break;
        case Canon::MOL_STACK_BOND:
          bond = mSE.obj.bond;
          //std::cout<<"\t\tBond: "<<bond->getIdx()<<std::endl;
          res << GetBondSmiles(bond,mSE.number,doKekule,allBondsExplicit);
          break;
        case Canon::MOL_STACK_RING:
          ringIdx = mSE.number;
          //std::cout<<"\t\tRing: "<<ringIdx;
          if(ringClosureMap.count(ringIdx)){
            // the index is already in the map ->
            //   we're closing a ring, so grab
            //   the index and then delete the value:
            closureVal = ringClosureMap[ringIdx];
            ringClosureMap.erase(ringIdx);
          } else {
            // we're opening a new ring, find the index for it:
            closureVal = 1;
            bool done=false;
            // EFF: there's got to be a more efficient way to do this
            while(!done){
              std::map<int,int>::iterator mapIt;
              for(mapIt=ringClosureMap.begin();
                  mapIt!=ringClosureMap.end();
                  mapIt++){
                if(mapIt->second==closureVal) break;
              }
              if(mapIt==ringClosureMap.end()){
                done=true;
              } else {
                closureVal+=1;
              }
            }
            ringClosureMap[ringIdx]=closureVal;
          }
          if(closureVal >= 10){
            res << "%";
          }
          //std::cout << " > " << closureVal <<std::endl;
          res << closureVal;
          break;
        case Canon::MOL_STACK_BRANCH_OPEN:
          res << "(";
          break;
        case Canon::MOL_STACK_BRANCH_CLOSE:
          res << ")";
          break;
        default:
          break;
        }
      }
      return res.str();
    }

  } // end of namespace SmilesWrite


  // NOTE: I did not forget the const here... Producing SMILES for
  // a molecule actually can change the molecule.  Specifically,
  // things like the directionality of bonds may be changed when
  // the molecule is canonicalized.
  // Odds are good that this may be one of those foot-shooting
  // decisions and I'm gonna want to smack myself for doing this,
  // but we'll try anyway.
  std::string MolToSmiles(ROMol &mol,bool doIsomericSmiles,
                          bool doKekule,int rootedAtAtom,bool canonical,
                          bool allBondsExplicit){
    PRECONDITION(rootedAtAtom<0||static_cast<unsigned int>(rootedAtAtom)<mol.getNumAtoms(),
                 "rootedAtomAtom must be less than the number of atoms");
    if(!mol.getNumAtoms()) return "";

    ROMol tmol(mol);
    if(doIsomericSmiles){
      tmol.setProp("_doIsoSmiles",1);
    } else if(tmol.hasProp("_doIsoSmiles")){
      tmol.clearProp("_doIsoSmiles");
    }

    if(tmol.hasProp("_ringStereoWarning")){
      tmol.clearProp("_ringStereoWarning");
    }

#if 0
    std::cout << "----------------------------" << std::endl;
    std::cout << "MolToSmiles:"<< std::endl;
    tmol.debugMol(std::cout);
    std::cout << "----------------------------" << std::endl;
#endif  
    std::string res;

    for(ROMol::AtomIterator atIt=tmol.beginAtoms();atIt!=tmol.endAtoms();atIt++){
      (*atIt)->updatePropertyCache(false);
    }

    unsigned int nAtoms=tmol.getNumAtoms();
    INT_VECT ranks(nAtoms,-1);

    // clean up the chirality on any atom that is marked as chiral,
    // but that should not be:
    if(doIsomericSmiles){
      MolOps::assignStereochemistry(tmol,true);
    }
    if(canonical){
      MolOps::rankAtoms(tmol,ranks);
    } else {
      for(unsigned int i=0;i<tmol.getNumAtoms();++i) ranks[i]=i;
    }
#ifdef VERBOSE_CANON
    for(unsigned int tmpI=0;tmpI<ranks.size();tmpI++){
      std::cout << tmpI << " " << ranks[tmpI] << " " << *(tmol.getAtomWithIdx(tmpI)) << std::endl;
    }
#endif

    std::vector<Canon::AtomColors> colors(nAtoms,Canon::WHITE_NODE);
    std::vector<Canon::AtomColors>::iterator colorIt;
    colorIt = colors.begin();
    // loop to deal with the possibility that there might be disconnected fragments
    while(colorIt != colors.end()){
      int nextAtomIdx=-1;
      std::string subSmi;

      // find the next atom for a traverse
      if(rootedAtAtom>=0){
        nextAtomIdx=rootedAtAtom;
        rootedAtAtom=-1;
      } else {
        int nextRank = nAtoms+1;
        for(unsigned int i=0;i<nAtoms;i++){
          if( colors[i] == Canon::WHITE_NODE && ranks[i] < nextRank ){
            nextRank = ranks[i];
            nextAtomIdx = i;
          }
        }
      }
      CHECK_INVARIANT(nextAtomIdx>=0,"no start atom found");

      subSmi = SmilesWrite::FragmentSmilesConstruct(tmol, nextAtomIdx, colors,
                                                    ranks,doKekule,canonical,allBondsExplicit);

      res += subSmi;
      colorIt = std::find(colors.begin(),colors.end(),Canon::WHITE_NODE);
      if(colorIt != colors.end()){
        res += ".";
      }
    }

    return res;
  } // end of MolToSmiles()
}
