/***************************************************************************
	pseudoDtd.cpp
	copyright			: (C) 2001-2002 by Daniel Naber
	email				: daniel.naber@t-online.de
 ***************************************************************************/

/***************************************************************************
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or ( at your option ) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 ***************************************************************************/

#include "pseudo_dtd.h"

#include <assert.h>

#include <tqdom.h>
#include <tqregexp.h>

#include <klocale.h>
#include <kmessagebox.h>

PseudoDTD::PseudoDTD()
{
  // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
  m_sgmlSupport = true;	  // TODO: make this an run-time option ( maybe automatically set )
}

PseudoDTD::~PseudoDTD()
{
}

void PseudoDTD::analyzeDTD( TQString &metaDtdUrl, TQString &metaDtd )
{
  TQDomDocument doc( "dtdIn_xml" );
  if ( ! doc.setContent( metaDtd) )
  {
    KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
        "Please check that the file is well-formed XML.").arg( metaDtdUrl ),
        i18n( "XML Plugin Error") );
    return;
  }

  if ( doc.doctype().name() != "dtd" )
  {
    KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
        "Please check that the file is of this type:\n"
            "-//Norman Walsh//DTD DTDParse V2.0//EN\n"
            "You can produce such files with dtdparse. "
            "See the Kate Plugin documentation for more information.").arg( metaDtdUrl ),
        i18n("XML Plugin Error") );
    return;
  }

  uint listLength = 0;
  listLength += doc.elementsByTagName( "entity" ).count();
  listLength += doc.elementsByTagName( "element" ).count();
	// count this twice, as it will be iterated twice ( TODO: optimize that? ):
  listLength += doc.elementsByTagName( "attlist" ).count() * 2;

  TQProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), listLength,
                            0, "progress", TRUE );
  progress.setMinimumDuration( 400 );
  progress.setProgress(0);

  // Get information from meta DTD and put it in TQt data structures for fast access:
  if( ! parseEntities( &doc, &progress ) )
    return;

  if( ! parseElements( &doc, &progress ) )
    return;

  if( ! parseAttributes( &doc, &progress ) )
    return;

  if( ! parseAttributeValues( &doc, &progress ) )
    return;

  progress.setProgress( listLength );	// just to make sure the dialog disappears

}

// ========================================================================
// DOM stuff:

/**
 * Iterate through the XML to get a mapping which sub-elements are allowed for
 * all elements.
 */
bool PseudoDTD::parseElements( TQDomDocument *doc, TQProgressDialog *progress )
{

  m_elementsList.clear();
	// We only display a list, i.e. we pretend that the content model is just
	// a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element,
	// which would otherwise display some elements twice.
  TQMap<TQString,bool> subelementList;	// the bool is not used

  TQDomNodeList list = doc->elementsByTagName( "element" );
  uint listLength = list.count();      // speedup (really! )

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

      progress->setProgress( progress->progress()+1 );
    // FIXME!:
    //tqApp->processEvents();

    subelementList.clear();
    TQDomNode node = list.item( i );
    TQDomElement elem = node.toElement();

    if( !elem.isNull() )
    {
      // Enter the expanded content model, which may also include stuff not allowed.
      // We do not care if it's a <sequence-group> or whatever.
      TQDomNodeList contentModelList = elem.elementsByTagName( "content-model-expanded" );
      TQDomNode contentModelNode = contentModelList.item(0);
      TQDomElement contentModelElem = contentModelNode.toElement();
      if( ! contentModelElem.isNull() )
      {
        // check for <pcdata/>:
        TQDomNodeList pcdataList = contentModelElem.elementsByTagName( "pcdata" );

        // check for other sub elements:
        TQDomNodeList subList = contentModelElem.elementsByTagName( "element-name" );
        uint subListLength = subList.count();
        for( uint l = 0; l < subListLength; l++ )
        {
          TQDomNode subNode = subList.item(l);
          TQDomElement subElem = subNode.toElement();
          if( !subElem.isNull() )
            subelementList[subElem.attribute( "name" )] = true;
        }

        // anders: check if this is an EMPTY element, and put "__EMPTY" in the
        // sub list, so that we can insert tags in empty form if required.
        TQDomNodeList emptyList = elem.elementsByTagName( "empty" );
        if ( emptyList.count() )
          subelementList["__EMPTY"] = true;
      }

      // Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a>
      // in the HTML 4.01 Strict DTD):
      TQDomNodeList exclusionsList = elem.elementsByTagName( "exclusions" );
      if( exclusionsList.length() > 0 )
      {	// sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
        TQDomNode exclusionsNode = exclusionsList.item(0);
        TQDomElement exclusionsElem = exclusionsNode.toElement();
        if( ! exclusionsElem.isNull() )
        {
          TQDomNodeList subList = exclusionsElem.elementsByTagName( "element-name" );
          uint subListLength = subList.count();
          for( uint l = 0; l < subListLength; l++ )
          {
            TQDomNode subNode = subList.item(l);
            TQDomElement subElem = subNode.toElement();
            if( !subElem.isNull() )
            {
              TQMap<TQString,bool>::Iterator it = subelementList.find( subElem.attribute( "name" ) );
              if( it != subelementList.end() )
                subelementList.remove(it);
            }
          }
        }
      }

      // turn the map into a list:
      TQStringList subelementListTmp;
      TQMap<TQString,bool>::Iterator it;
      for( it = subelementList.begin(); it != subelementList.end(); ++it )
        subelementListTmp.append( it.key() );

        m_elementsList.insert( elem.attribute( "name" ), subelementListTmp );

    }

  } // end iteration over all <element> nodes
  return true;
}

/**
 * Check which elements are allowed inside a parent element. This returns
 * a list of allowed elements, but it doesn't care about order or if only a certain
 * number of occurences is allowed.
 */
TQStringList PseudoDTD::allowedElements( TQString parentElement )
{
  if( m_sgmlSupport )
  {
    // find the matching element, ignoring case:
    TQMap<TQString,TQStringList>::Iterator it;
    for( it = m_elementsList.begin(); it != m_elementsList.end(); ++it )
    {
      if( it.key().lower() == parentElement.lower() )
        return it.data();
    }
  }
  else if( m_elementsList.contains(parentElement) )
    return m_elementsList[parentElement];

  return TQStringList();
}

/**
 * Iterate through the XML to get a mapping which attributes are allowed inside
 * all elements.
 */
bool PseudoDTD::parseAttributes( TQDomDocument *doc, TQProgressDialog *progress )
{
  m_attributesList.clear();
//   TQStringList allowedAttributes;
  TQDomNodeList list = doc->elementsByTagName( "attlist" );
  uint listLength = list.count();

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

    progress->setProgress( progress->progress()+1 );
    // FIXME!!
    //tqApp->processEvents();

    ElementAttributes attrs;
    TQDomNode node = list.item(i);
    TQDomElement elem = node.toElement();
    if( !elem.isNull() )
    {
      TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
      uint attributeListLength = attributeList.count();
      for( uint l = 0; l < attributeListLength; l++ )
      {
        TQDomNode attributeNode = attributeList.item(l);
        TQDomElement attributeElem = attributeNode.toElement();

        if( ! attributeElem.isNull() )
        {
          if ( attributeElem.attribute("type") == "#REQUIRED" )
            attrs.requiredAttributes.append( attributeElem.attribute("name") );
          else
            attrs.optionalAttributes.append( attributeElem.attribute("name") );
        }
      }
      m_attributesList.insert( elem.attribute("name"), attrs );
    }
  }

  return true;
}

/** Check which attributes are allowed for an element.
 */
TQStringList PseudoDTD::allowedAttributes( TQString element )
{
  if( m_sgmlSupport )
  {
    // find the matching element, ignoring case:
    TQMap<TQString,ElementAttributes>::Iterator it;
    for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) {
      if( it.key().lower() == element.lower() ) {
        return it.data().optionalAttributes + it.data().requiredAttributes;
      }
    }
  }
  else if( m_attributesList.contains(element) )
    return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;

  return TQStringList();
}

TQStringList PseudoDTD::requiredAttributes( const TQString &element ) const
{
  if ( m_sgmlSupport )
  {
    TQMap<TQString,ElementAttributes>::ConstIterator it;
    for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it )
    {
      if( it.key().lower() == element.lower() )
        return it.data().requiredAttributes;
    }
  }
  else if( m_attributesList.contains(element) )
    return m_attributesList[element].requiredAttributes;

  return TQStringList();
}

/**
 * Iterate through the XML to get a mapping which attribute values are allowed
 * for all attributes inside all elements.
 */
bool PseudoDTD::parseAttributeValues( TQDomDocument *doc, TQProgressDialog *progress )
{
  m_attributevaluesList.clear();						// 1 element : n possible attributes
  TQMap<TQString,TQStringList> attributevaluesTmp;		// 1 attribute : n possible values
  TQDomNodeList list = doc->elementsByTagName( "attlist" );
  uint listLength = list.count();

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

    progress->setProgress( progress->progress()+1 );
    // FIXME!
    //tqApp->processEvents();

    attributevaluesTmp.clear();
    TQDomNode node = list.item(i);
    TQDomElement elem = node.toElement();
    if( !elem.isNull() )
    {
      // Enter the list of <attribute>:
      TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
      uint attributeListLength = attributeList.count();
      for( uint l = 0; l < attributeListLength; l++ )
      {
        TQDomNode attributeNode = attributeList.item(l);
        TQDomElement attributeElem = attributeNode.toElement();
        if( ! attributeElem.isNull() )
        {
          TQString value = attributeElem.attribute( "value" );
          attributevaluesTmp.insert( attributeElem.attribute("name"), TQStringList::split(TQRegExp(" "), value) );
        }
      }
      m_attributevaluesList.insert( elem.attribute("name"), attributevaluesTmp );
    }
  }
  return true;
}

/**
 * Check which attributes values are allowed for an attribute in an element
 * (the element is necessary because e.g. "href" inside <a> could be different
 * to an "href" inside <link>):
 */
TQStringList PseudoDTD::attributeValues( TQString element, TQString attribute )
{
  // Direct access would be faster than iteration of course but not always correct,
  // because we need to be case-insensitive.
  if( m_sgmlSupport ) {
    // first find the matching element, ignoring case:
    TQMap< TQString,TQMap<TQString,TQStringList> >::Iterator it;
    for( it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it )
    {
      if( it.key().lower() == element.lower() )
      {
        TQMap<TQString,TQStringList> attrVals = it.data();
        TQMap<TQString,TQStringList>::Iterator itV;
        // then find the matching attribute for that element, ignoring case:
        for( itV = attrVals.begin(); itV != attrVals.end(); ++itV )
        {
          if( itV.key().lower() == attribute.lower() )
            return( itV.data() );
        }
      }
    }
  }
  else if( m_attributevaluesList.contains(element) )
  {
    TQMap<TQString,TQStringList> attrVals = m_attributevaluesList[element];
    if( attrVals.contains(attribute) )
      return attrVals[attribute];
  }

  // no predefined values available:
  return TQStringList();
}

/**
 * Iterate through the XML to get a mapping of all entity names and their expanded
 * version, e.g. nbsp => &#160;. Parameter entities are ignored.
 */
bool PseudoDTD::parseEntities( TQDomDocument *doc, TQProgressDialog *progress )
{
  m_entityList.clear();
  TQDomNodeList list = doc->elementsByTagName( "entity" );
  uint listLength = list.count();

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

    progress->setProgress( progress->progress()+1 );
    //FIXME!!
    //tqApp->processEvents();
    TQDomNode node = list.item(i);
    TQDomElement elem = node.toElement();
    if( !elem.isNull()
         && elem.attribute( "type" ) != "param" )
    { // TODO: what's cdata <-> gen ?
      TQDomNodeList expandedList = elem.elementsByTagName( "text-expanded" );
      TQDomNode expandedNode = expandedList.item(0);
      TQDomElement expandedElem = expandedNode.toElement();
      if( ! expandedElem.isNull() )
      {
        TQString exp = expandedElem.text();
        // TODO: support more than one &#...; in the expanded text
        /* TODO include do this when the unicode font problem is solved:
        if( exp.contains(TQRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
        // hexadecimal numbers, e.g. "&#x236;"
        uint end = exp.find( ";" );
        exp = exp.mid( 3, end-3 );
        exp = TQChar();
      } else if( exp.contains(TQRegExp("^&#[0-9]+;$")) ) {
        // decimal numbers, e.g. "&#236;"
        uint end = exp.find( ";" );
        exp = exp.mid( 2, end-2 );
        exp = TQChar( exp.toInt() );
      }
    */
        m_entityList.insert( elem.attribute("name"), exp );
      }
      else
      {
        m_entityList.insert( elem.attribute("name"), TQString() );
      }
    }
  }
  return true;
}

/**
 * Get a list of all ( non-parameter ) entities that start with a certain string.
 */
TQStringList PseudoDTD::entities( TQString start )
{
  TQStringList entities;
  TQMap<TQString,TQString>::Iterator it;
  for( it = m_entityList.begin(); it != m_entityList.end(); ++it ) {
    if( (*it).startsWith(start) )
    {
      TQString str = it.key();
      /* TODO: show entities as unicode character
      if( !it.data().isEmpty() ) {
      //str += " -- " + it.data();
      TQRegExp re( "&#(\\d+);" );
      if( re.search(it.data()) != -1 ) {
      uint ch = re.cap( 1).toUInt();
      str += " -- " + TQChar( ch).decomposition();
    }
    //kdDebug() << "#" << it.data() << endl;
    }
   */
      entities.append( str );
    // TODO: later use a table view
    }
  }
  return entities;
}

// kate: space-indent on; indent-width 2; replace-tabs on; mixed-indent off;
