########################################################################
# $Source: /var/local/cvsroot/4Suite/Ft/Xml/Catalog.py,v $ $Revision: 1.22 $ $Date: 2004/09/08 23:24:12 $
"""
Classes and functions that help implement OASIS XML and TR9401 Catalogs.
Resolution with Catalogs is handled via the Ft.Xml.InputSource module.

Based on a contribution to PyXML from Tarn Weisner Burton
<twburton@users.sf.net>. See
http://sourceforge.net/tracker/index.php?func=detail&aid=490069&group_id=6473&atid=306473

Copyright 2004 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

import os, re, string, warnings
from cStringIO import StringIO
import xml.sax, xml.sax.handler

from Ft import DATADIR
from Ft.Lib import Uri
from Ft.Xml import READ_EXTERNAL_DTD, HAS_PYEXPAT


TR9401 = re.compile(r'^\s*(BASE|CATALOG|DELEGATE|PUBLIC|SYSTEM|OVERRIDE\s+YES|OVERRIDE\s+NO)\s+"((?:[^"\\]|\\.)*)"(?:\s+"((?:[^"\\]|\\.)*)")?', re.M | re.I)

class Catalog(xml.sax.handler.ContentHandler):
    """
    Reads and provides access to a catalog, providing mappings of public
    and system IDs to URIs, etc.

    It is implemented as a SAX ContentHandler and is able to read
    OASIS TR 9401 Catalogs <http://www.oasis-open.org/specs/a401.htm>
    and OASIS XML Catalogs <http://www.oasis-open.org/committees/entity/spec.html>
    """
    def __init__(self, uri=None, quiet=1, bootstrap=0):
        self.systemIds = {}
        self.publicIds = {}
        self.uris = {}
        self.publicDelegates = []
        self.systemDelegates = []
        self.uriDelegates = []
        self.systemRewrites = []
        self.uriRewrites = []
        self.catalogs = []
        self.uri = uri
        self.quiet = quiet
        self.bootstrap = bootstrap

        if uri is not None:
            if not Uri.IsAbsolute(uri):
                # Using a relative URI here makes it hard to reliably
                # locate the catalog. Also, if the catalog doesn't set
                # its own base URI with xml:base, then we won't be able
                # to resolve relative URI references within the catalog.
                # So we should warn that this situation is undesirable.
                warnings.warn("Catalog URI '%s' is not absolute.", RuntimeWarning, 2)
            if bootstrap:
                #Don't use a Ft.Xml.InputSource to avoid circular
                #logic before that module's import is complete
                self.isrc = Uri.UrlOpen(uri)
            else:
                # otherwise, set up an InputSource for the catalog URI,
                # then read and parse the catalog immediately
                from Ft.Xml.InputSource import NoCatalogFactory
                factory = NoCatalogFactory
                self.isrc = factory.fromUri(uri)
            self.parse()
        return

    def parse(self):
        """
        Parse the catalog that was referenced either by a URI at
        construction time or by the InputSource self.isrc later.
        """
        if self.isrc is None:
            raise ValueError("Catalog.isrc was not set before parsing.")
        if self.uri is None:
            self.uri = self.isrc.uri
        data = self.isrc.read()
        self.isrc.close()
        # NOTE: re.findall assumes data's encoding is ascii based
        if re.findall('<.*catalog\s', data):
            # it's XML so set the default options and parse the file
            self.parseXmlCat(data)
        else:
            # probably a TR 9401 file
            self.parseTr9401(data)
        if not self.quiet:
            sys.stderr.write('Catalog contents:\n')
            for key in self.__dict__.keys():
                sys.stderr.write('  %s = %r\n' % (key, self.__dict__[key]))
                sys.stderr.flush()
        return

    def parseXmlCat(self, data):
        """
        Parse an XML Catalog, as specified in
        http://www.oasis-open.org/committees/entity/spec-2001-08-06.html.
        Partially implemented.
        """
        self.prefer_public = [1]
        self.base = [self.uri]
        try:
            p = xml.sax.make_parser()
        except xml.sax.SAXReaderNotAvailable:
            #FIXME: l10n
            raise "PyExpat must be installed in order to use 4Suite's XML/TR9401 Catalog support."

        # Set the parser to use the globally defined external entity parsing
        # setting.
        try:
            p.setFeature(xml.sax.handler.feature_external_ges,
                         READ_EXTERNAL_DTD)
        except (xml.sax.SAXNotRecognizedException,
                xml.sax.SAXNotSupportedException):
            pass

        if not self.bootstrap:
            p.setEntityResolver(self.isrc)
        p.setContentHandler(self)

        # Since we have the catalog data already, parse it.
        #
        # Since xml.sax uses buggy urllib for ext. entity resolution,
        # set the base URI of the catalog to one urllib can handle.
        uri = Uri.MakeUrllibSafe(self.uri)
        isrc = xml.sax.xmlreader.InputSource(system_id=uri)
        catalog_file = StringIO(data)
        isrc.setByteStream(catalog_file)
        p.parse(isrc)
        # are these explicit dels needed?
        del self.prefer_public
        del self.base
        return

    def parseTr9401(self, data):
        """
        Parse a TR9401 Catalog, as specified in
        <http://www.oasis-open.org/specs/a401.htm>.
        Partially implemented.
        """
        prefer_public = 1
        base = self.uri
        for cmd in TR9401.findall(data):
            token = string.upper(cmd[0])
            if token == 'PUBLIC':
                if len(cmd) == 3:
                    self.publicIds[cmd[1]] = (Uri.Absolutize(cmd[2], base), prefer_public)
            elif token == 'SYSTEM':
                if len(cmd) == 3:
                    self.systemIds[cmd[1]] = Uri.Absolutize(cmd[2], base)
            elif token == 'BASE':
                base = cmd[1]
            elif token[:8] == 'OVERRIDE':
                prefer_public = string.strip(token[8:]) == 'YES'
            elif token == 'DELEGATE':
                if len(cmd) == 3:
                    self.publicDelegates[cmd[1]] = Uri.Absolutize(cmd[2], base)
            elif token == 'CATALOG':
                if len(cmd) == 2:
                    self.catalogs.append(Catalog(Uri.Absolutize(cmd[1], base), self.quiet))
        return

    # methods used by the XML parser

    def __make_abs(self, attrs, attr_name):
        '''make a URI absolute'''

        # look for a xml:base attribute, otherwise use the current base
        if attrs.has_key('xml:base'):
            base = attrs['xml:base']
        else:
            base = self.base[-1]

        return Uri.Absolutize(attrs[attr_name], base)


    def startElement(self, name, attrs):
        """
        Handle an element start event for the XML parser.
        This is a SAX ContentHandler method.
        """
        if name == 'public':
            # a publicId lookup
            if self.__ensure_attrs(name, attrs, 'publicId', 'uri'):
                # save the state of prefer_public also
                self.publicIds[attrs['publicId']] = (self.__make_abs(attrs, 'uri'), self.prefer_public[-1])
        elif name == 'system':
            # a systemId lookup
            if self.__ensure_attrs(name, attrs, 'systemId', 'uri'):
                self.systemIds[attrs['systemId']] = self.__make_abs(attrs, 'uri')
        elif name == 'uri':
            # a URI lookup
            if self.__ensure_attrs(name, attrs, 'name', 'uri'):
                self.uris[attrs['name']] = self.__make_abs(attrs, 'uri')
        elif name == 'rewriteURI':
            # a URI rewrite
            if self.__ensure_attrs(name, attrs, 'uriStartString', 'rewritePrefix'):
                self.uriRewrites.append((attrs['uriStartString'], self.__make_abs(attrs, 'rewritePrefix')))
        elif name == 'rewriteSystem':
            # a systemId rewrite
            if self.__ensure_attrs(name, attrs, 'systemIdStartString', 'rewritePrefix'):
                self.systemRewrites.append((attrs['systemIdStartString'], self.__make_abs(attrs, 'rewritePrefix')))
        elif name == 'delegateSystem':
            # delegate systemId to specific catalog
            if self.__ensure_attrs(name, attrs, 'systemIdStartString', 'catalog '):
                self.systemRewrites.append((attrs['systemIdStartString'], Catalog(self.__make_abs(attrs, 'catalog'), self.quiet)))
        elif name == 'delegatePublic':
            # delegate publicId to specific catalog
            if self.__ensure_attrs(name, attrs, 'publicIdStartString', 'catalog '):
                # save the state of prefer_public also
                self.systemRewrites.append((attrs['publicIdStartString'], (Catalog(self.__make_abs(attrs, 'catalog'), self.quiet), prefer_public)))
        elif name == 'delegateURI':
            # delegate URI to specific catalog
            if self.__ensure_attrs(name, attrs, 'uriStartString', 'catalog '):
                self.systemRewrites.append((attrs['uriStartString'], Catalog(self.__make_abs(attrs, 'catalog'), self.quiet)))
        elif name == 'nextCatalog':
            # the next catalog in a chain
            if self.__ensure_attrs(name, attrs, 'catalog'):
                self.catalogs.append(Catalog(self.__make_abs(attrs, 'catalog'), self.quiet))
        elif name == 'catalog':
            # look for prefer or xml:base attributes and setup the base and prefer stacks
            if attrs.has_key('prefer'):
                self.prefer_public = [attrs['prefer'] == 'public']
            if attrs.has_key('xml:base'):
                self.base = [attrs['xml:base']]
        elif name == 'group':
            # look for prefer or xml:base attributes and push them onto the right stack
            if attrs.has_key('prefer'):
                self.prefer_public.append(attrs['prefer'] == 'public')
            else:
                self.prefer_public.append(self.prefer_public[-1])

            if attrs.has_key('xml:base'):
                self.base.append(attrs['xml:base'])
            else:
                self.base.append(self.base[-1])


    def __ensure_attrs(self, name, attrs, *attr_names):
        '''ensure that the right attributes exist just in case the parser is a non-validating one'''

        for attr_name in attr_names:
            if not attrs.has_key(attr_name):
                if not self.quiet:
                    print '%s: Malformed %s element, missing %s attribute' % (self.uri, name, attr_name)
                return 0
        return 1


    def endElement(self, name):
        """
        Handle an element end event for the XML parser.
        This is a SAX ContentHandler method.
        """
        if name == 'group':
            # pop the stacks
            self.prefer_public.pop()
            self.base.pop()
        return

    def startElementNS(self, name, qname, attrs):
        """
        Handle an element start event, with namespace support,
        for the XML parser. This is a SAX ContentHandler method.
        """
        # defer to the ordinary method
        return self.startElement(name[1], attrs)


    def endElementNS(self, name, qname):
        """
        Handle an element end event, with namespace support,
        for the XML parser. This is a SAX ContentHandler method.
        """
        # defer to the ordinary method
        return self.endElement(name[1])


def getEnvCatalogs(environ_name='XML_CATALOGS', quiet=1):
    """
    Read catalogs from a list of file paths in environment variable
    XML_CATALOGS, returning a single merged Catalog object.
    """
    catalog = None
    if os.environ.has_key('XML_CATALOGS'):
        for path in string.split(os.environ['XML_CATALOGS'], os.pathsep):
            if not quiet:
                sys.stderr.write('Reading %s\n' % path)
                sys.stderr.flush()
            uri = Uri.OsPathToUri(path)
            #FIXME: Use dict merging rather than this inefficient cascading
            if catalog:
                if not quiet:
                    sys.stderr.write('Appending %s\n' % path)
                    sys.stderr.flush()
                catalog.catalogs.append(Catalog(uri, quiet))
            else:
                catalog = Catalog(uri, quiet=quiet)
                if not quiet:
                    sys.stderr.write('Creating catalog from %s\n' % path)
                    sys.stderr.flush()
    if not quiet:
        sys.stderr.write('Done. Result is %r\n' % catalog)
        sys.stderr.flush()
    return catalog


# Set up default 4Suite catalog, if possible
#
if not HAS_PYEXPAT:
    FT_CATALOG = None
elif os.environ.has_key("XML_CATALOGS"):
    FT_CATALOG = getEnvCatalogs()
else:
    catalog_path = os.path.join(DATADIR, "default.cat")
    catalog_uri = Uri.OsPathToUri(catalog_path)
    try:
        FT_CATALOG = Catalog(catalog_uri, bootstrap=1)
    except (KeyboardInterrupt, SystemExit):
        pass
    except Exception, e:
        import sys
        import traceback; traceback.print_exc(1000, sys.stderr)
        sys.stderr.write("Default catalog %s not found\n" % catalog_path)
        sys.stderr.flush()
        FT_CATALOG = None
    del catalog_path, catalog_uri

