/*
    This file is part of Akregator.

    Copyright (C) 2004 Teemu Rytilahti <tpr@d5k.net>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

    As a special exception, permission is given to link this program
    with any edition of Qt, and distribute the resulting executable,
    without including the source code for Qt in the source distribution.
*/
 
#include <tqregexp.h>
#include <tqstring.h>
#include <tqstringlist.h>
#include <tqvaluelist.h>
#include <kcharsets.h>

#include "feeddetector.h"


using namespace Akregator;

FeedDetectorEntryList FeedDetector::extractFromLinkTags(const TQString& s)	
{
    //reduce all sequences of spaces, newlines etc. to one space:
    TQString str = s.simplifyWhiteSpace();

    // extracts <link> tags
    TQRegExp reLinkTag("<[\\s]?LINK[^>]*REL[\\s]?=[\\s]?\\\"[\\s]?(ALTERNATE|SERVICE\\.FEED)[\\s]?\\\"[^>]*>", false);

    // extracts the URL (href="url")
    TQRegExp reHref("HREF[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false);
    // extracts type attribute
    TQRegExp reType("TYPE[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false);
    // extracts the title (title="title")
    TQRegExp reTitle("TITLE[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false);

    int pos = 0;
    int matchpos = 0;

    // get all <link> tags
    TQStringList linkTags;
    //int strlength = str.length();
    while ( matchpos != -1 )
    {
        matchpos = reLinkTag.search(str, pos);
        if (matchpos != -1)
        {
            linkTags.append( str.mid(matchpos, reLinkTag.matchedLength()) );
            pos = matchpos + reLinkTag.matchedLength();
        }
    }

    FeedDetectorEntryList list;

    for ( TQStringList::Iterator it = linkTags.begin(); it != linkTags.end(); ++it )
    {
        TQString type;
        int pos = reType.search(*it, 0);
        if (pos != -1)
            type = reType.cap(1).lower();

        // we accept only type attributes indicating a feed
        if ( type != "application/rss+xml" && type != "application/rdf+xml"
	      && type != "application/atom+xml" && type != "text/xml" )
            continue;
                
        TQString title;
        pos = reTitle.search(*it, 0);
        if (pos != -1)
        title = reTitle.cap(1);

        title = KCharsets::resolveEntities(title);

        TQString url;
        pos = reHref.search(*it, 0);
        if (pos != -1)
            url = reHref.cap(1);

        url = KCharsets::resolveEntities(url);

        // if feed has no title, use the url as preliminary title (until feed is parsed)
        if ( title.isEmpty() )
            title = url;

        if ( !url.isEmpty() )
            list.append(FeedDetectorEntry(url, title) );		
    }


    return list;
}

TQStringList FeedDetector::extractBruteForce(const TQString& s)
{
    TQString str = s.simplifyWhiteSpace();
    
    TQRegExp reAhrefTag("<[\\s]?A[^>]?HREF=[\\s]?\\\"[^\\\"]*\\\"[^>]*>", false);
    
    // extracts the URL (href="url")
    TQRegExp reHref("HREF[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false);

    TQRegExp rssrdfxml(".*(RSS|RDF|XML)", false);

    int pos = 0;
    int matchpos = 0;
    
    // get all <a href> tags and capture url
    TQStringList list;
    //int strlength = str.length();
    while ( matchpos != -1 )
    {
        matchpos = reAhrefTag.search(str, pos);
        if ( matchpos != -1 )
        {
            TQString ahref = str.mid(matchpos, reAhrefTag.matchedLength());
            int hrefpos = reHref.search(ahref, 0);
            if ( hrefpos != -1 )
            {
                TQString url = reHref.cap(1);

                url = KCharsets::resolveEntities(url);

                if ( rssrdfxml.exactMatch(url) )
                    list.append(url);
            }

            pos = matchpos + reAhrefTag.matchedLength();
        }
    }
    
    return list;
    
}
