# -------------------------------------------------------------------------
#     This file is part of mMass - the spectrum analysis tool for MS.
#     Copyright (C) 2005-07 Martin Strohalm <mmass@biographics.cz>

#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     Complete text of GNU GPL can be found in the file LICENSE in the
#     main directory of the program
# -------------------------------------------------------------------------

# Function: Format document data to mMass format.

# load libs
import sys
import string
import re
import struct
import zlib
import base64


class formatMMass:
    """ Format document data to mMass format. """

    # ----
    def __init__(self, document):
        self.docData = document
    # ----


    # ----
    def formatData(self):
        """ Format document data to mMass format. """

        buff = '<?xml version="1.0" encoding="utf-8" ?>\n'
        buff += '<mSD version="1.0">\n'

        # get and format description
        buff += self.formatDescription()

        # get and format peaklist
        buff += self.formatPeaklist()

        # get and format sequence
        buff += self.formatSequence()

        # get and format spectrum
        buff += self.formatSpectrum()

        buff += '</mSD>\n'

        return buff
    # ----


    # ----
    def formatDescription(self):
        """ Format description. """

        # get data
        date = self.clearText(self.docData.getDescription('date'))
        operator = self.clearText(self.docData.getDescription('operator'))
        contact = self.clearText(self.docData.getDescription('contact'))
        institution = self.clearText(self.docData.getDescription('institution'))
        instrument = self.clearText(self.docData.getDescription('instrument'))
        notes = self.clearText(self.docData.getDescription('notes'))

        # format data
        buff = '    <description>\n'
        buff += '        <date value="%s" />\n' % date
        buff += '        <operator value="%s" />\n' % operator
        buff += '        <contact value="%s" />\n' % contact
        buff += '        <institution value="%s" />\n' % institution
        buff += '        <instrument value="%s" />\n' % instrument
        buff += '        <notes>%s</notes>\n' % notes
        buff += '    </description>\n'

        return buff
    # ----


    # ----
    def formatSpectrum(self):
        """ Format spectrum. """

        # get data
        endian = sys.byteorder
        spectrum = self.docData.getSpectrum()
        specArray = self.convertSpectrum(spectrum, endian)
        xRange = self.docData.getSpectrumXRange()
        points = len(spectrum)

        # format data
        buff = '    <spectrum>\n'
        if spectrum:
            buff += '        <points value="%d" />\n' % (points)
            buff += '        <startPoint value="%f" />\n' % (xRange[0])
            buff += '        <endPoint value="%f" />\n' % (xRange[1])
            buff += '        <mzArray compression="gz" endian="%s">%s</mzArray>\n' % (endian, specArray[0])
            buff += '        <intArray compression="gz" endian="%s">%s</intArray>\n' % (endian, specArray[1])
        buff += '    </spectrum>\n'

        return buff
    # ----


    # ----
    def formatPeaklist(self):
        """ Format peaklist. """

        # get data
        peaklist = self.docData.getPeaks()

        # format data
        buff = '    <peaklist>\n'
        for peak in peaklist:
            annots = self.clearText(peak[2])
            buff += '        <peak mass="%f" intens="%f" annots="%s" />\n' % (peak[0], peak[1], annots)
        buff += '    </peaklist>\n'

        return buff
    # ----


    # ----
    def formatSequence(self):
        """ Format sequence. """

        # get data
        sequence = self.docData.getSequenceString()
        title = self.clearText(self.docData.getSequenceTitle())
        modifications = self.docData.getModifications()
        modifications = self.formatModifications(sequence, modifications)

        # format data
        buff = '    <sequences>\n'
        if sequence:
            buff += '        <sequence id="seq1">\n'
            buff += '            <title>%s</title>\n' % title
            buff += '            <seq>%s</seq>\n' % sequence
            buff += '            <modifications>\n'
            buff += modifications
            buff += '            </modifications>\n'
            buff += '        </sequence>\n'
        buff += '    </sequences>\n'

        return buff
    # ----


    # ----
    def formatModifications(self, sequence, modifications):
        """ Format modifications. """

        formulaPattern = re.compile('^(([A-Z][a-z\*]?[0-9]*)*)(-(([A-Z][a-z\*]?[0-9]*)*))?$')
        buff = ''

        # format data
        if modifications:
            for mod in modifications:

                # get formula gain and loss
                parts = formulaPattern.match(mod[1])
                gain = parts.group(1)
                loss = parts.group(4)
                if not gain:
                    gain = ''
                if not loss:
                    loss = ''

                # residual
                if type(mod[0]) == int:
                    format = '                <modification name="%s" type="residual" amino="%s" position="%d" gain="%s" loss="%s" />\n'
                    buff += format % (mod[2], sequence[mod[0]], mod[0]+1, gain, loss)

                # global
                elif len(mod[0]) == 1:
                    format = '                <modification name="%s" type="global" amino="%s" gain="%s" loss="%s" />\n'
                    buff += format % (mod[2], mod[0], gain, loss)

        return buff
    # ----


    # ----
    def clearText(self, text):
        """ Clear special characters such as <> etc. """

        text = text.strip()
        sear = ('&', '"', "'", '<', '>')
        repl = ('&amp;', '&quot;', '&#39;', '&lt;', '&gt;')
        for x, item in enumerate(sear):
            text = text.replace(item, repl[x])

        return text
    # ----


    # ----
    def convertSpectrum(self, spectrum, endian):
        """ Convert spectrum parts to compressed binary format coded by base64. """

        #  convert data to binary
        mzArray = ''
        intArray = ''
        for point in spectrum:
            mzArray += struct.pack('f', point[0])
            intArray += struct.pack('f', point[1])

        # compress data by gz
        mzArray = zlib.compress(mzArray)
        intArray = zlib.compress(intArray)

        # convert to ascii by base64
        mzArray = base64.b64encode(mzArray)
        intArray = base64.b64encode(intArray)

        return mzArray, intArray
    # ----
