#include "index.h"

#include "kvi_file.h"
#include <tqdir.h>
#include <tqstringlist.h>
#include "kvi_pointerhashtable.h"
#include <tqapplication.h>
#include <tqtextstream.h>
#include <ctype.h>


int kvi_compare(const Term * p1,const Term * p2)
{
	if(p1->frequency == p2->frequency)
		return 0;
	if(p1->frequency < p2->frequency)
		return -1;
	return 1;
}

TQDataStream &operator>>( TQDataStream &s, Document &l )
{
    s >> l.docNumber;
    s >> l.frequency;
    return s;
}

TQDataStream &operator<<( TQDataStream &s, const Document &l )
{
    s << (TQ_INT16)l.docNumber;
    s << (TQ_INT16)l.frequency;
    return s;
}

Index::Index( const TQString &dp, const TQString &hp )

    : TQObject( 0, 0 ), dict( 8999 ), docPath( dp )

{
    alreadyHaveDocList = FALSE;
    lastWindowClosed = FALSE;
    connect( tqApp, TQT_SIGNAL( lastWindowClosed() ),
	     this, TQT_SLOT( setLastWinClosed() ) );
}



Index::Index( const TQStringList &dl, const TQString &hp )

    : TQObject( 0, 0 ), dict( 8999 )

{
    docList = dl;
    alreadyHaveDocList = TRUE;
    lastWindowClosed = FALSE;
    connect( tqApp, TQT_SIGNAL( lastWindowClosed() ),
	     this, TQT_SLOT( setLastWinClosed() ) );
}



void Index::setLastWinClosed()

{

    lastWindowClosed = TRUE;

}



void Index::setDictionaryFile( const TQString &f )

{

    dictFile = f;

}



void Index::setDocListFile( const TQString &f )
{
    docListFile = f;
}



int Index::makeIndex()
{
    if ( !alreadyHaveDocList )
	setupDocumentList();
    if ( docList.isEmpty() )
	return 1;
    dict.clear();
    TQStringList::Iterator it = docList.begin();
    int steps = docList.count() / 100;
    if ( !steps )
	steps++;
    int prog = 0;
    for ( int i = 0; it != docList.end(); ++it, ++i ) {
	if ( lastWindowClosed ) {
	    return -1;
	}
	parseDocument( *it, i );
	if ( i%steps == 0 ) {
	    prog++;
	    emit indexingProgress( prog );
	}
    }
    return 0;
}



void Index::setupDocumentList()

{
    docList.clear();
    titleList.clear();
    TQDir d( docPath );
    TQString szCur;
    TQStringList lst = d.entryList( "*.html" );
    TQStringList::ConstIterator it = lst.begin();
    for ( ; it != lst.end(); ++it )
    {
	szCur=docPath + "/" + *it;
	docList.append( szCur );
	titleList.append(getDocumentTitle( szCur ));
    }
}



void Index::insertInDict( const TQString &str, int docNum )
{
    if ( strcmp( str, "amp" ) == 0 || strcmp( str, "nbsp" ) == 0 )
	return;
    Entry *e = 0;
    if ( dict.count() )
	e = dict[ str ];

    if ( e ) {
	if ( e->documents.first().docNumber != docNum )
	    e->documents.prepend( Document( docNum, 1 ) );
	else
	    e->documents.first().frequency++;
    } else {
	dict.insert( str, new Entry( docNum ) );
    }
}



void Index::parseDocument( const TQString &filename, int docNum )
{
    KviFile file( filename );
    if ( !file.openForReading() ) {
	tqWarning( "can not open file %s", filename.ascii() );
	return;
    }
    TQTextStream s( &file );
    TQString text = s.read();
    if (text.isNull())
        return;
    bool valid = TRUE;
    const TQChar *buf = text.unicode();
    TQChar str[64];
    TQChar c = buf[0];
    int j = 0;
    int i = 0;
    while ( (uint)j < text.length() ) {
	if ( c == '<' || c == '&' ) {
	    valid = FALSE;
	    if ( i > 1 )
		insertInDict( TQString(str,i), docNum );
	    i = 0;
	    c = buf[++j];
	    continue;
	}
	if ( ( c == '>' || c == ';' ) && !valid ) {
	    valid = TRUE;
	    c = buf[++j];
	    continue;
	}

	if ( !valid ) {

	    c = buf[++j];

	    continue;

	}

	if ( ( c.isLetterOrNumber() || c == '_' ) && i < 63 ) {

	    str[i] = c.lower();

	    ++i;

	} else {

	    if ( i > 1 )

		insertInDict( TQString(str,i), docNum );

	    i = 0;

	}

	c = buf[++j];

    }

    if ( i > 1 )

	insertInDict( TQString(str,i), docNum );

    file.close();

}



void Index::writeDict()

{

    KviPointerHashTableIterator<TQString,Entry> it( dict );

    KviFile f( dictFile );

    if ( !f.openForWriting() )

	return;

    TQDataStream s( &f );

    for( ; it.current(); ++it ) {

        Entry *e = it.current();

	s << it.currentKey();

	s << e->documents;

    }

    f.close();

    writeDocumentList();

}



void Index::writeDocumentList()

{
    KviFile f( docListFile );
    if ( !f.openForWriting() )
	return;
    TQTextStream s( &f );
    TQString docs = docList.join("[#item#]");
    s << docs;
    
    KviFile f1( docListFile+".titles" );
    if ( !f1.openForWriting() )
	return;
    TQTextStream s1( &f1 );
    docs = titleList.join("[#item#]");
    s1 << docs;
}



void Index::readDict()

{
    KviFile f( dictFile );
    if ( !f.openForReading() )
	return;
    dict.clear();
    TQDataStream s( &f );
    TQString key;
    KviValueList<Document> docs;
    while ( !s.atEnd() ) {
	s >> key;
	s >> docs;
	dict.insert( key, new Entry( docs ) );
    }
    f.close();
    readDocumentList();
}



void Index::readDocumentList()
{
    //reading docs
    KviFile f( docListFile );
    if ( !f.openForReading() )
	return;
    TQTextStream s( &f );
    docList = TQStringList::split("[#item#]",s.read());
    
    //reading titles
    KviFile f1( docListFile+".titles" );
    if ( !f1.openForReading() )
	return;
    TQTextStream s1( &f1 );
    titleList = TQStringList::split("[#item#]",s1.read());
//    debug(titleList);
}



TQStringList Index::query( const TQStringList &terms, const TQStringList &termSeq, const TQStringList &seqWords )

{

    TermList termList;



    TQStringList::ConstIterator it = terms.begin();

    for ( it = terms.begin(); it != terms.end(); ++it ) {

	Entry *e = 0;

	if ( (*it).contains( '*' ) ) {

	    KviValueList<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) );

	    termList.append( new Term( "dummy", wcts.count(), wcts ) );

	} else if ( dict[ *it ] ) {

	    e = dict[ *it ];

	    termList.append( new Term( *it, e->documents.count(), e->documents ) );

	} else {

	    return TQStringList();

	}

    }

    termList.sort();



    Term *minTerm = termList.first();

    if ( !termList.count() )

	return TQStringList();

    termList.removeFirst();



    KviValueList<Document> minDocs = minTerm->documents;

    KviValueList<Document>::iterator C;

    KviValueList<Document>::ConstIterator It;

    Term *t = termList.first();

    for ( ; t; t = termList.next() ) {

	KviValueList<Document> docs = t->documents;

	C = minDocs.begin();

	while ( C != minDocs.end() ) {

	    bool found = FALSE;

	    for ( It = docs.begin(); It != docs.end(); ++It ) {

		if ( (*C).docNumber == (*It).docNumber ) {

		    (*C).frequency += (*It).frequency;

		    found = TRUE;

		    break;

		}

	    }

	    if ( !found )

		C = minDocs.remove( C );

	    else

		++C;

	}

    }



    TQStringList results;

#ifndef COMPILE_USE_QT4
    qHeapSort( minDocs );
#endif
    if ( termSeq.isEmpty() ) {

	for ( C = minDocs.begin(); C != minDocs.end(); ++C )

	    results << docList[ (int)(*C).docNumber ];

	return results;

    }



    TQString fileName;

    for ( C = minDocs.begin(); C != minDocs.end(); ++C ) {

	fileName =  docList[ (int)(*C).docNumber ];

	if ( searchForPattern( termSeq, seqWords, fileName ) )

	    results << fileName;

    }

    return results;

}



TQString Index::getDocumentTitle( const TQString &fileName )

{

    KviFile file( fileName );

    if ( !file.openForReading() ) {

	tqWarning( "cannot open file %s", fileName.ascii() );

	return fileName;

    }

    TQTextStream s( &file );

    TQString text = s.read();



    int start = text.find( "<title>", 0, FALSE ) + 7;

    int end = text.find( "</title>", 0, FALSE );



    TQString title = ( end - start <= 0 ? tr("Untitled") : text.mid( start, end - start ) );

    return title;

}



TQStringList Index::getWildcardTerms( const TQString &term )

{

    TQStringList lst;

    TQStringList terms = split( term );

#ifdef COMPILE_USE_QT4
	TQStringList::Iterator iter;
#else
    KviValueList<TQString>::iterator iter;
#endif


    KviPointerHashTableIterator<TQString,Entry> it( dict );

    for( ; it.current(); ++it ) {

	int index = 0;

	bool found = FALSE;

	TQString text( it.currentKey() );

	for ( iter = terms.begin(); iter != terms.end(); ++iter ) {

	    if ( *iter == "*" ) {

		found = TRUE;

		continue;

	    }

	    if ( iter == terms.begin() && (*iter)[0] != text[0] ) {

		found = FALSE;

		break;

	    }

	    index = text.find( *iter, index );

	    if ( *iter == terms.last() && index != (int)text.length()-1 ) {

		index = text.findRev( *iter );

		if ( index != (int)text.length() - (int)(*iter).length() ) {

		    found = FALSE;

		    break;

		}

	    }

	    if ( index != -1 ) {

		found = TRUE;

		index += (*iter).length();

		continue;

	    } else {

		found = FALSE;

		break;

	    }

	}

	if ( found )

	    lst << text;

    }



    return lst;

}



TQStringList Index::split( const TQString &str )

{

    TQStringList lst;

    int j = 0;

    int i = str.find( '*', j );



    while ( i != -1 ) {

	if ( i > j && i <= (int)str.length() ) {

	    lst << str.mid( j, i - j );

	    lst << "*";

	}

	j = i + 1;

	i = str.find( '*', j );

    }



    int l = str.length() - 1;

    if ( str.mid( j, l - j + 1 ).length() > 0 )

	lst << str.mid( j, l - j + 1 );



    return lst;

}



KviValueList<Document> Index::setupDummyTerm( const TQStringList &terms )

{

    TermList termList;

    TQStringList::ConstIterator it = terms.begin();

    for ( ; it != terms.end(); ++it ) {

	Entry *e = 0;

	if ( dict[ *it ] ) {

	    e = dict[ *it ];

	    termList.append( new Term( *it, e->documents.count(), e->documents ) );

	}

    }

    termList.sort();



    KviValueList<Document> maxList;



    if ( !termList.count() )

	return maxList;

    maxList = termList.last()->documents;

    termList.removeLast();



    KviValueList<Document>::iterator docIt;

    Term *t = termList.first();

    while ( t ) {

	KviValueList<Document> docs = t->documents;

	for ( docIt = docs.begin(); docIt != docs.end(); ++docIt ) {

	    if ( maxList.findIndex( *docIt ) == -1 )

		maxList.append( *docIt );

	}

	t = termList.next();

    }

    return maxList;

}



void Index::buildMiniDict( const TQString &str )

{

    if ( miniDict[ str ] )

	miniDict[ str ]->positions.append( wordNum );

    ++wordNum;

}



bool Index::searchForPattern( const TQStringList &patterns, const TQStringList &words, const TQString &fileName )

{

    KviFile file( fileName );

    if ( !file.openForReading() ) {

	tqWarning( "cannot open file %s", fileName.ascii() );

	return FALSE;

    }



    wordNum = 3;

    miniDict.clear();

    TQStringList::ConstIterator cIt = words.begin();

    for ( ; cIt != words.end(); ++cIt )

	miniDict.insert( *cIt, new PosEntry( 0 ) );



    TQTextStream s( &file );

    TQString text = s.read();

    bool valid = TRUE;

    const TQChar *buf = text.unicode();

    TQChar str[64];

    TQChar c = buf[0];

    int j = 0;

    int i = 0;

    while ( (uint)j < text.length() ) {

	if ( c == '<' || c == '&' ) {

	    valid = FALSE;

	    if ( i > 1 )

		buildMiniDict( TQString(str,i) );

	    i = 0;

	    c = buf[++j];

	    continue;

	}

	if ( ( c == '>' || c == ';' ) && !valid ) {

	    valid = TRUE;

	    c = buf[++j];

	    continue;

	}

	if ( !valid ) {

	    c = buf[++j];

	    continue;

	}

	if ( ( c.isLetterOrNumber() || c == '_' ) && i < 63 ) {

	    str[i] = c.lower();

	    ++i;

	} else {

	    if ( i > 1 )

		buildMiniDict( TQString(str,i) );

	    i = 0;

	}

	c = buf[++j];

    }

    if ( i > 1 )

	buildMiniDict( TQString(str,i) );

    file.close();



    TQStringList::ConstIterator patIt = patterns.begin();

    TQStringList wordLst;

    KviValueList<uint> a, b;

    KviValueList<uint>::iterator aIt;

    for ( ; patIt != patterns.end(); ++patIt ) {

	wordLst = TQStringList::split( ' ', *patIt );

	a = miniDict[ wordLst[0] ]->positions;

	for ( int j = 1; j < (int)wordLst.count(); ++j ) {

	    b = miniDict[ wordLst[j] ]->positions;

	    aIt = a.begin();

	    while ( aIt != a.end() ) {

		if ( b.find( *aIt + 1 ) != b.end() ) {

		    (*aIt)++;

		    ++aIt;

		} else {

		    aIt = a.remove( aIt );

		}

	    }

	}

    }

    if ( a.count() )

	return TRUE;

    return FALSE;

}



#include "index.moc"
