///////
   //    Scheduler.cc
   //    Scheduler Class definitions
   //
   //    Class for managing the crawling process
   //
   //    Part of the ht://Check package
   //
   //    Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
   //    Some Portions Copyright (c) 1995-2000 The ht://Dig Group <www.htdig.org>
   //    Some Portions Copyright (c) 2008 Devise.IT srl <http://www.devise.it/>
   //    Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
   //
   //    For copyright details, see the file COPYING in your distribution
   //    or the GNU General Public License version 2 or later 
   //    <http://www.gnu.org/copyleft/gpl.html>
   //
   //    $Id: Scheduler.cc,v 1.88 2009/08/26 12:25:56 angusgb Exp $
   //
   //    G.Bartolini
   //    started: 13.09.1999
///////

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#ifdef HAVE_STD
#include <iostream>
#include <sstream>
#include <iomanip>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <iostream.h>
#include <sstream.h>
#include <iomanip.h>
#endif /* HAVE_STD */

#include "_Server.h"
#include "Scheduler.h"
#include "StringList.h"
#include "WordType.h"
#include "HtmlParser.h"
#include "Htmysql.h"
#include "HtCookieMemJar.h"
#include "HtCookieInFileJar.h"

///////
   //    Construction
///////

Scheduler::Scheduler()
:
DB(0),
servers(),
CurrentServer(0),
CurrentUrl(0),
Proxy(0),
Credentials(),
ProxyCredentials(),
AcceptLanguage(),
TransportConnect(0),
HTTPConnect(0),
CurrentResponse(0),
erase(0),
stop(0), 
drop_database(true), 
deserialized(false),
deserialized_cookies(false),
_cookie_jar(0),
argc(0),
argv(0),
options_list(),
Config(0),
ValidExtensions(),
BadExtensions(),
debug(0),
stats(0),
parsed_urls(0)
{

   // Set the cookie jar manager object
   _cookie_jar = new HtCookieMemJar();
   HtHTTP::SetCookieJar((HtCookieJar *)_cookie_jar);
   
}



///////
   //    Destruction
///////

Scheduler::~Scheduler ()
{
   // Destruction of the Scheduler
   
   // Deserialize the servers stored in memory.
   // Let's write them into the Database

   if (debug > 1)
      cout << " ! Scheduler object is being destructed" << endl;

   if (!deserialized)
   {

      if (debug > 1)
         cout << " ! Deserializing servers" << endl;

      DeserializeServers();

   }

   // Frees memory for the Cookie Jar! Yummy :-P
   if (_cookie_jar)
   {
      // But before print the cookies retrieved
      if (erase && stats)
      	 _cookie_jar->ShowSummary(cout);
	 
      if (!deserialized_cookies)
      {

         if (debug > 1)
            cout << " ! Deserializing cookies" << endl;

         DeserializeCookies();

      }

      if (debug > 1)
         cout << " ! Freeing memory from cookies" << endl;

      delete _cookie_jar;
   }
      
   if (Proxy)
      delete Proxy;
      
   // Close and delete the Database Object
   if (DB)
   {
      if (debug > 0)
         cout << "Database '" << DB->GetDB() << "' closed ..." << endl;      

      DB->Close();

      delete DB;

   }
   
   if (erase && stats)
      HtHTTP::ShowStatistics(cout);
   
   if (debug > 1)
      cout << " ! Scheduler object now destructed" << endl;

}


///////
   //    Initial process of scheduling:
   //    1 - Database Creation
   //    2 - Urls insertion from the configuration <start_url> directive
///////

int Scheduler::Initial(const std::string &list)
{
   //
   // Split the list of urls up into individual urls.
   //
    
   StringList	tokens(list.c_str(), " \t");
   std::string	sig;
   std::string      url;
   unsigned int IDUrl;

   for (int i = 0; i < tokens.Count(); i++)
   {
      switch(AddUrl(tokens[i], IDUrl, false))
      {
         case Scheduler_DBError:
            DB->DisplayError();
            return 0;
            break;
         case Scheduler_MemoryError:
         case Scheduler_Interrupted:
         case Scheduler_OK:
            break;
      }
   }

   return 1;
}


///////
   //    Store a server in memory (dictionary)
///////

_Server *Scheduler::AddServer(_Url &u)
{
   // Let's store it in memory
   
   _Server *server = new _Server(u.host().get(), u.port());

   if(!server) return 0;
   
   servers.insert(std::make_pair(u.signature(), server));

   server->SetID(_Server::IncrementTotServers());
   
   return server;

}


///////
   //    Search for a server in memory (dictionary)
///////

_Server *Scheduler::FindServer(const std::string &signature)
{
    // is it stored in memory?
    ServersDictionary::iterator s(servers.find(signature));

    if (s == servers.end()) {
        return 0;
    }

   return s->second;

}



///////
   //    Select an existing or create a new database and stores a new object
///////

Scheduler::Scheduler_Codes Scheduler::SelectDatabase(const std::string &db_name)
{

///////
   //   Set the database debug level according on Scheduler's one
///////

   Htmysql::SetDebugLevel(debug);

///////
   //    Creating the new database object
///////

#ifdef HAVE_LOAD_DEFAULTS
   const std::string mysql_conf_file_prefix ((*Config)["mysql_conf_file_prefix"].get());
#endif
   const std::string mysql_conf_group ((*Config)["mysql_conf_group"].get());
   const std::string mysql_client_charset ((*Config)["mysql_client_charset"].get());
   const std::string mysql_db_charset ((*Config)["mysql_db_charset"].get());

   DB = new HtmysqlDB (db_name,
#ifdef HAVE_LOAD_DEFAULTS
       mysql_conf_file_prefix,
#endif
       mysql_conf_group,
       mysql_client_charset,
       mysql_db_charset,
       argc, argv);

   if(! DB) return Scheduler_MemoryError;

///////
   //   Database connection
///////

   if (!DB->Connect())         
      return Scheduler_DBError;
         
   if (!erase)
   {

      if (debug > 0)
         cout << "Looking if Database '" << DB->GetDBSignature() 
            << "' exists ... ";      
   
      if (! DB->Exists(DB->GetDB()) )
      {
         // We are looking for an existing database called 'db_name'

         if (debug > 0)
            cout << "Not found" << endl;      

         erase = 1;

      }
      else
      {

         if (debug > 0)
            cout << "Found" << endl;      

         // Let's select it
         
         if (DB->SelectDB(DB->GetDB()))
            return Scheduler_DBError;

         if (debug > 0)
         {
            cout << "Database '" << DB->GetDBSignature() 
               << "' selected ...";
#ifdef HAVE_LOAD_DEFAULTS
            if (DB->GetUser().length())
               cout << " (user: '" << DB->GetUser() << "')";
#endif
               
            cout << endl;      
         }

      }

   }
   else
   {
      // We have to erase the database. However, let's see if we have
      // to keep the structure. In order to do this, we have to check
      // if the database exists.

      if (! drop_database)
      {
         if (debug > 0)
            cout << "Looking if database '" << DB->GetDBSignature() 
               << "' exists ... should we keep it? ";      
   
         if (! DB->Exists(DB->GetDB()) )
         {
            // We are looking for an existing database called 'db_name'

            if (debug > 0)
               cout << "sorry, not found (can't keep it)" << endl;      

            drop_database = true; // We gotta erase the database
         }
         else cout << "yes" << endl;
      }
   }

///////
   //   Database creation
///////

   if (erase)
   {
      // Before creating the database, let's set the
      // index length for Url fields
      DB->SetURL_Index_Length(Config->Value("url_index_length"));
      
      if (debug > 0)
      {
         cout << "Set the length of the index for the Url fields to: ";
         
         if (DB->GetURL_Index_Length()>0)
            cout << DB->GetURL_Index_Length();
         else cout << "unlimited";

         cout << endl;
      }

      // Set the available charsets list
      DB->LoadAvailableCharsets((*Config)["available_charsets"].get());

      // Set the 'drop_database' flag
      DB->SetDropDatabase(drop_database);

      if (!DB->CreateDatabase ())
         return Scheduler_DBError;
   
      if (debug > 0)
         cout << "Database '" << DB->GetDBSignature()
            << "' created ..." << endl;      
   }

///////
   // Set the SQL Big Table Option
///////

   if (Config->Boolean("sql_big_table_option"))
   {
      if (!DB->SetSQLBigTableOption())
      {
         // Only a warning
         cout << "Setting SQL big table option failed. "
            << "Try to set 'sql_big_table_option' to false"
            << endl;
      }
   }   

   // Set the SchedulerEntry debug level
   SchedulerEntry::SetDebugLevel(debug);
   
   return Scheduler_OK;
   
}


///////
   //    Restore a database
///////

Scheduler::Scheduler_Codes Scheduler::RestoreDatabase()
{

   if (!DB)
      return Scheduler_MemoryError;

   // First of all, we gotta load all the servers into memory

   // Then we gotta erase from the db all the entries of tables related
   // with URLs with the ToBeRetrieved flag set on. This means that
   // the program stopped while retrieving a URL.

   return Scheduler_OK;

}


///////
   //    Add a new Url
///////

Scheduler::Scheduler_Codes Scheduler::AddUrl(const std::string &u, unsigned int &IDUrl, bool previous)
{
   int NumRecords;
   static HtmysqlQueryResult scheduletmp;

   // It's a new Url ... Let's store it
   _Url tmp(u); // temporary _Url object
   tmp.normalize();  // Url normalized

   // CurrentLinkSchedule Assignment (it contains the current link examinated
   // by the Scheduler) and it's used by parsing functions like HtmlParser's
   CurrentLinkSchedule.SetNewUrl(tmp.get().get());

   // Check if the occurrence is already present in the DB
   // NumRecords stores the result of the search process
   // If -1 an error has occurred, else it contains the number of records found

   if (debug > 2)
      cout << "    > " << CurrentLinkSchedule << endl;
         
   NumRecords = DB->Search (CurrentLinkSchedule, scheduletmp);

   if ( NumRecords== -1)   // A DB Error occurred
      return Scheduler_DBError;

   if ( NumRecords > 0)
   {
      
      // Try to retrieve it
      if (! DB->GetNextElement(CurrentLinkSchedule, scheduletmp) ) // Something went wrong
         return Scheduler_DBError;

      // At least an occurrence found
      if (debug > 2)
         cout << "    > Schedule entry found, not stored ("
            << CurrentLinkSchedule << " )" << endl;

      // OK ... it's all right
      // Let's store the found ID value
      IDUrl = CurrentLinkSchedule.GetIDSchedule();

      return Scheduler_OK;
      
   }


   _Server *server = 0;
   
   // Do we have a valid host field?
   if (tmp.host().length())
   {
   
      // Look for the server
      server = FindServer(tmp.signature().get());

      if (!server)
      {
         // Not present. We have a new server.
      
         server=AddServer(tmp);
         
         if(!server) // Error. Impossible to store it.
            return Scheduler_MemoryError;
         else
         {
            // Added a new server

            if(debug > 1)
               cout << "    > New server: " << server->host() << " - port "
                  << server->port() << " (" << server->GetID() << ")" << endl;
         }
      }
      else if(debug > 3)
         cout << "    > Server already stored: " << server->host() << " - port "
            << server->port() << " (" << server->GetID() << ")" << endl;
   }
   else
   {
      // That sucks! No host field - probably a malformed URL
      if (debug>3)
         cout << " > Warning! Possible HTTP malformed URL (empty host)" << endl;

      CurrentLinkSchedule.SetMalformed(true);

   }

   // Assign the server ID to the Schedule entry
   CurrentLinkSchedule.SetServer(server);

   // Let's set the new ID
   CurrentLinkSchedule.SetIDSchedule(_Url::IncrementTotUrls());

   // Let's store the found ID value
   IDUrl = CurrentLinkSchedule.GetIDSchedule();

   // Let's get info from the calling schedule (alias referencing URL)
   if (!previous)
   {
      // Here we are when there's no previous URL referring this.
      // Guys, this means that we are fetching a URL which was present
      // in the start_url configuration file. We MUST retrieve it
      // anyway, kinda force it, right?

      CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_ToBeRetrieved);   
      CurrentLinkSchedule.SetDomain(SchedulerEntry::Url_Internal);

   }
   else
   {
      // We got the referring URL in the CurrentSchedule variable
      // Set the referring URL
      CurrentLinkSchedule.SetIDReferer(CurrentSchedule.GetIDSchedule());
      
      // Set the Hop Count
      CurrentLinkSchedule.SetHopCount((CurrentSchedule.GetHopCount()) + 1);

      // Assign the status to the URL depending on the configuration

     switch(IsAValidURL(CurrentLinkSchedule))
     {
      case Scheduler_URL_Valid:
         // Valid URL - It has to be retrieved
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_ToBeRetrieved);   
         CurrentLinkSchedule.SetDomain(SchedulerEntry::Url_Internal);
         ++runinfo.TotUrls;
         break;

      ///////
         //    Not valid URLs
      ///////

      case Scheduler_URL_MaxHopCount:
         // Max Hop Count reached
         if (debug > 2)
            cout << "    > Rejected: max hop count reached" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_MaxHopCount);   
         break;

      case Scheduler_URL_Excludes:
         // It's in the exclude list
         if (debug > 2)
            cout << "    > Rejected: according to exclude list" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_CheckIfExists);   
         if (Config->Boolean("check_external")) ++runinfo.TotUrls;
         break;

      case Scheduler_URL_BadQueryString:
         // It's in the bad query string list
         if (debug > 2)
            cout << "    > Rejected: according to the bad query string list" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_BadQueryString);   
         break;

      case Scheduler_URL_BadExtension:
         // It's in the bad extensions list
         if (debug > 2)
            cout << "    > Rejected: according to the bad extensions list" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_BadExtension);   
         break;
         
      case Scheduler_URL_NotValidExtension:
         // It's NOT in the valid extensions list
         if (debug > 2)
            cout << "    > Rejected: according to the valid extensions list" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_BadExtension);   
         break;

      case Scheduler_URL_OutOfLimits:
         // It's out of bounds --> not in the limits range
         if (debug > 2)
            cout << "    > Rejected: according to the limits list" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_CheckIfExists);   
         CurrentLinkSchedule.SetDomain(SchedulerEntry::Url_External);   
         if (Config->Boolean("check_external")) ++runinfo.TotUrls;
         break;

      case Scheduler_URL_FileProtocol:
         // Hey! There's a 'file://' call. It's an error !!!
         if (debug > 2)
            cout << "    > Rejected: file:// call - error!" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_FileProtocol);   
         break;

      case Scheduler_URL_EMail:
         // Hey! There's an e-mail address
         if (debug > 2)
            cout << "    > Rejected: E_Mail address" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_EMail);   
         break;

      case Scheduler_URL_Javascript:
         // Hey! There's a Javascript using the pseudo-protocol 'javascript:'
         if (debug > 2)
            cout << "    > Rejected: Javascript pseudo-protocol" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_Javascript);   
         break;

      case Scheduler_URL_NotValidService:
         // It's a URL of a service which is not managed by ht://Check
         if (debug > 2)
            cout << "    > Rejected: not a valid service for ht://Check" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_NotValidService);   
         break;

      case Scheduler_URL_Malformed:
         // It's a malformed URL
         if (debug > 2)
            cout << "    > Rejected: it's a malformed URL" << endl;
         CurrentLinkSchedule.SetStatus(SchedulerEntry::Url_Malformed);   
         break;

      case Scheduler_URL_MaxUrlsCount:
	 // Never occurs here
         break;

     }
   }

   ++runinfo.ScheduledUrls;
   
   // Add to the Database
   if (!DB->Insert(CurrentLinkSchedule))
      return Scheduler_DBError;

   else if(debug > 1)
      cout << " > Schedule entry stored: " << CurrentLinkSchedule << endl;

   return Scheduler_OK;
   
}



///////
   //    Deserialize the servers dictionary and stores the info into the DB
///////

Scheduler::Scheduler_Codes Scheduler::DeserializeServers()
{

   if (debug > 0)
      cout << "Updating database info about Servers seen." << endl;      

   for (ServersDictionary::iterator s(servers.begin()); s != servers.end(); ++s) 
   {
       _Server* server (s->second);

      if(debug>3)
         cout << "Deserializing: " << server->host() << " - port "
            << server->port() << " (" << server->GetID() << ")" << endl;
      
      if (!DB->Insert(*server))
      	 return Scheduler_DBError;
      
   }
   
   deserialized = true;
   return Scheduler_OK;
   
}


///////
   //    Deserialize the cookies dictionary and stores the info into the DB
///////

Scheduler::Scheduler_Codes Scheduler::DeserializeCookies()
{

   if (!_cookie_jar)
      return Scheduler_MemoryError;
   
   if (debug > 0)
      cout << "Updating database info about cookies found." << endl;      

   _cookie_jar->ResetIterator();
   
   while (const HtCookie* cookie = _cookie_jar->NextCookie())
   {
      if(debug>3)
         cout << "Deserializing cookie: " << cookie->GetName() << endl;
      
      if (!DB->Insert(*cookie))
      	 return Scheduler_DBError;
      
   }
   
   deserialized_cookies = true;
   return Scheduler_OK;
   
}


///////
   //    Set the options of the scheduler from the configuration file
///////

void Scheduler::SetOptions(Configuration &config)
{

   // Set the URL class static configuration variable
   // IMPORTANT! This must be set before any deal with a URL object
   URL::SetConfiguration(config);
      
   // Set the URL class static configuration variable
   WordType::Initialize(config);

   // Set the debug level for the other classes
   Transport::SetDebugLevel(debug);   
   HtCookieJar::SetDebugLevel(debug);
   
   // Set the default parser content-type string
   Transport::SetDefaultParserContentType ("text/html");
   
   // Temporary variables
   StringList l;
   String t;
   String lowerp;
   register char *p;

   // Set limits
   l.Create(config["limit_urls_to"], " \t");
   Limits.setEscaped(l);
   l.Release();

   // Set limits (normalized)
   l.Create(config["limit_normalized"], " \t");
   LimitsNormalized.setEscaped(l);
   l.Release();

   // Set Exclusion
   l.Create(config["exclude_urls"], " \t");
   Excludes.setEscaped(l);
   l.Release();

   // Set Bad query string
   l.Create(config["bad_querystr"], " \t");
   BadQueryString.setEscaped(l);
   l.Release();

   // Valid Extensions
   t = config["valid_extensions"];

   p = strtok(t, " \t");
   while (p)
   {
      // Extensions are case insensitive
      lowerp = p;
      lowerp.lowercase();
      ValidExtensions.insert(lowerp.get());
      p = strtok(0, " \t");
   }

   // Bad Extensions
   t = config["bad_extensions"];

   p = strtok(t, " \t");
   while (p)
   {
      // Extensions are case insensitive
      lowerp = p;
      lowerp.lowercase();
      BadExtensions.insert(lowerp.get());
      p = strtok(0, " \t");
   }

   // Set the Proxy (if exists)
   const std::string proxyURL (config["http_proxy"].get());
   if (proxyURL.length())
   {
      Proxy = new _Url(proxyURL);
      Proxy->normalize();

      if (debug>0)
         cout << "   Setting HTTP Proxy to "
            << Proxy->host() << ":" << Proxy->port() << endl;
   }

   // Set Proxy Exclusion
   l.Create(config["http_proxy_exclude"], " \t");
   ExcludeProxy.setEscaped(l);
   l.Release();

   // Set the credentials for the authentication
   Credentials = config["authorization"].get();

   // Set the credentials for the authentication of the HTTP Proxy
   ProxyCredentials = config["http_proxy_authorization"].get();

   // Set the Accept-Language directive to be sent via HTTP
   l.Create(config["accept_language"], " \t");
   AcceptLanguage.clear(); // zeroes the contents (should be already empty)
   
   for (int i = 0; i < l.Count(); i++)
   {
      if (i>0)
      	 AcceptLanguage += ',';

      	 AcceptLanguage += l[i];
   }
   
   if (debug>0)
      cout << "   Setting language for negotiation: "
      	 << ((AcceptLanguage.length()>0)?
	     AcceptLanguage: "servers default") << endl;

   if (!config.Boolean("disable_cookies"))
   {
       // Imports the cookies file
       const std::string CookiesInputFile(config["cookies_input_file"].get());
       if (CookiesInputFile.length())
       {
          if (debug>0)
             cout << "   Importing Cookies input file "
                 << CookiesInputFile << endl;

	   int result;
	    if (HtCookieInFileJar* cookie_file = (new HtCookieInFileJar(CookiesInputFile.c_str(), result)))
	    {
		if (!result)
		{
		    if (debug>0)
			cookie_file->ShowSummary();
		    delete _cookie_jar;	// Deletes previous cookie jar
		    _cookie_jar = (HtCookieJar*) cookie_file; // set the imported one
		    HtHTTP::SetCookieJar(_cookie_jar);
		}
		else
		    cout << "! Import failed: " << CookiesInputFile << endl;
	    }
	}
    }

   // Set the COnfiguration pointer
   Config = &config;

}




///////
   //    Check if an URL should be crawled. It checks the limits and the extension.
   //    Returns:
///////

Scheduler::Scheduler_URL_Validation Scheduler::IsAValidURL(const SchedulerEntry &s)
{
   static std::string url;
   static unsigned int max_hop_count( (unsigned int) Config->Value("max_hop_count") );

   // MaxHopCount
   if (s.GetHopCount() > max_hop_count)
      return(Scheduler_URL_MaxHopCount);
   
   // Initialization of the string
   url = s.GetScheduleUrl();

   // Check the protocol
   if (url.compare(0, 5, "http:")) // not HTTP
   {
      if (! url.compare(0, 5, "file:"))
         return (Scheduler_URL_FileProtocol);
      else if (! url.compare(0, 7, "mailto:"))
         return (Scheduler_URL_EMail);
      else if (! url.compare(0, 11, "javascript:"))
         return (Scheduler_URL_Javascript);
      else
	  return (Scheduler_URL_NotValidService);
   }
      
   // Check for malformed URLs
   if (s.IsMalformed())
      return (Scheduler_URL_Malformed);

   //
   // If the URL contains any of the patterns in the exclude list,
   // mark it as invalid
   //

   if (Excludes.match(url.c_str(), 0, 0) != 0)
      return(Scheduler_URL_Excludes);

    //
    // If the URL has a query string and it is in the bad query list
    // mark it as invalid
    //

   if (url.find_last_of('?') != std::string::npos && BadQueryString.match(url.c_str(), 0, 0) != 0)
      return(Scheduler_URL_BadQueryString);

   //
   // See if the file extension is in the list of invalid ones
   //
   
   std::string::size_type ext (url.find_last_of('.'));
   if (ext != std::string::npos) {
       std::string lowerext;
       while (ext < url.length()) {
	   lowerext.push_back(tolower(url[ext]));
	   ++ext;
       }

    if (BadExtensions.size() && BadExtensions.find(lowerext) != BadExtensions.end())
        return (Scheduler_URL_BadExtension);

       //
       // Or NOT in the list of valid ones
       //
       if (ValidExtensions.size() && ValidExtensions.find(lowerext) != ValidExtensions.end())
	   return (Scheduler_URL_NotValidExtension);
   }

   //
   // If any of the limits are met, we allow the URL
   //

   if ( Limits.match(url.c_str(), 1, 0) != 0)
   {
      
      URL aUrl (url.c_str());
      aUrl.normalize();
      
      if (LimitsNormalized.match(aUrl.get(), 1, 0) != 0)  // Yep! It's valid
      {
         return(Scheduler_URL_Valid);
      }
      else
         if (debug>2)
            cout << url << ": Out of normalized limits" << endl;
   }
   else
      if (debug>2)
         cout << url << ": Out of limits" << endl;

   // Nooo ... out of limits
   return (Scheduler_URL_OutOfLimits);
    
}


///////
   //    Check if an URL should be crawled. It checks the limits and the extension.
   //    Returns:
///////

Scheduler::Scheduler_Codes Scheduler::Run()
{
   int NumRetries;
   int Result = 0;
   std::string SQLStatement;
   Transport::DocStatus DocumentStatus;
   HtmlParser HtmlParserOB;
   const int max_urls_count( (int) Config->Value("max_urls_count") );
   char crawling_sign = '+';

   // Get next schedule until the list of Urls to be retrieved is empty
   // or until we stop the running process (SIGINT)
   
   while ( !stop && (Result=GetNext()) > 0)
   {

      ++runinfo.RetrievedUrls;
      
      // Let's build a new _Url object
      CurrentUrl = new _Url(CurrentSchedule.GetScheduleUrl());
      CurrentUrl->SetID(CurrentSchedule.GetIDSchedule());
      CurrentUrl->SetIDServer(CurrentSchedule.GetIDServer());
      // CurrentUrl->normalize();

      // Shows info
      if (debug>1)
         cout << crawling_sign << ' ' << runinfo.RetrievedUrls << "/" << runinfo.TotUrls
            << " - " << CurrentUrl->get() << " ID: "
            << CurrentUrl->GetID() << endl;

      // Find the server in memory
      // Look for the server
      if (! (CurrentServer = (_Server *) FindServer(CurrentUrl->signature().get())) )
         return Scheduler_MemoryError;

      CurrentUrl->SetServer(CurrentServer);

      // Reset the counter

      NumRetries = 0;                  

      // Retrieve the URL
      do
      {
         DocumentStatus = Retrieve (CurrentSchedule, *CurrentUrl);
         if (NumRetries++)
            if (debug>1)
               cout << "    Unable to connect. Attempts n. " << NumRetries << endl;
      } while (ShouldWeRetry(DocumentStatus) &&
            NumRetries <= Config->Value("max_retries"));

      // Set the current transport response
      if (TransportConnect)
      {
         CurrentResponse = TransportConnect->GetResponse();

         // If we have a response we store the return status codes
         if(CurrentResponse)
         {
            // We have a response
            CurrentUrl->SetStatusCode(CurrentResponse->GetStatusCode());
            CurrentUrl->SetReasonPhrase(CurrentResponse->GetReasonPhrase().get());
            CurrentUrl->SetLastAccess(CurrentResponse->GetAccessTime());
            CurrentUrl->SetHTTPContentType(CurrentResponse->GetContentType().get());

            CurrentUrl->SetSize(CurrentResponse->GetContentLength());
            CurrentUrl->SetLastModified(CurrentResponse->GetModificationTime());

      	    // HTTP response specific matters
      	    if (HTTPConnect == TransportConnect)
	        {
               // Transfer Encoding
               CurrentUrl->SetTransferEncoding(
                  ((HtHTTP_Response *)CurrentResponse)->GetTransferEncoding().get());

               // Content Language
               CurrentUrl->SetContentLanguage(
                  ((HtHTTP_Response *)CurrentResponse)->GetContentLanguage().get());
	        }

            // We store the server info if it's the first request
            if (CurrentServer->GetRequests() == 0)
            {
			   CurrentServer->SetIPAddress(TransportConnect->GetHostIPAddress().get());
               if (HTTPConnect == TransportConnect)
               {
                  // HTTP Info

                  // Server               
                  CurrentServer->SetHttpServer(((HtHTTP_Response *)CurrentResponse)->GetServer().get());
                  // Server version
                  CurrentServer->SetHttpVersion(((HtHTTP_Response *)CurrentResponse)->GetVersion().get());
               
                  HTTPConnect->isPersistentConnectionPossible()?
                     CurrentServer->AllowPersistentConnection() :
                     CurrentServer->AvoidPersistentConnection();
               }
            }
          }

          // Add the requests number of the server
          CurrentServer->IncrementRequests();

      }

      if (debug>0)
      {
         cout << runinfo.RetrievedUrls << "/" << runinfo.TotUrls
            << " - " << CurrentUrl->get() << " ID: "
            << CurrentUrl->GetID();

         if (DocumentStatus == Transport::Document_ok)
            cout << " - Size: " << CurrentUrl->GetSize();

         cout << endl;
      }
               

      switch(DocumentStatus)
      {
         ///////
            //    Document found
         ///////

         case Transport::Document_ok:
            if(debug>2)
               cout << "   > Document found" << endl;

		CurrentUrl->SetConnStatus("OK");
		if (CurrentSchedule.GetStatus() != SchedulerEntry::Url_CheckIfExists)
		{
		    // If it is valid, we check the max urls count option
		    if (max_urls_count > 0 && parsed_urls >= max_urls_count) {
			CurrentSchedule.SetStatus(SchedulerEntry::Url_MaxUrlsCount);
			crawling_sign = '-';
		    }
		    else {
			++parsed_urls; // increment the number of parsed URLs
			switch(HtmlParserOB(*this))
			{
			    case HtmlParser::HtmlParser_StatementFailed:
			    case HtmlParser::HtmlParser_LinkFailed:
			    case HtmlParser::HtmlParser_AttributeFailed:
			    case HtmlParser::HtmlParser_AccessibilityCheckFailed:
				return Scheduler_DBError;
				break;

			    default:
				if (Config->Boolean("store_url_contents"))
                        CurrentUrl->SetContents(CurrentResponse->GetContents().get());
				    break;
			}
		    }
		}
		break;


         ///////
            //    Document not changed <tobedone>
         ///////
            
         case Transport::Document_not_changed:
            if(debug>2)
               cout << "   > Document not changed" << endl;
            CurrentUrl->SetConnStatus("OK");
            break;

            
         ///////
            //    Document not found
         ///////

         case Transport::Document_not_found:
            if(debug>0)
               cout << "   > Document not found" << endl;
            CurrentUrl->SetConnStatus("OK");
            // We don't wanna show the last modified value in this case
            CurrentUrl->HideLastModified();
            break;
            
         case Transport::Document_not_parsable:
            if(debug>1)
               cout << "   > Document found but not parsable" << endl;
            CurrentUrl->SetConnStatus("OK");
            break;
            

         ///////
            //    Redirection of the document
         ///////

         case Transport::Document_redirect:

            // Location must point to another URL <tobedone>

            if (TransportConnect == HTTPConnect)
            {
               unsigned int IDUrl;
               static Link redirectedlink;

               CurrentUrl->SetLocation(((HtHTTP_Response *)CurrentResponse)->GetLocation().get());
               CurrentUrl->SetHTTPContentType(((HtHTTP_Response *)CurrentResponse)->GetContentType().get());

               _Url RedirectedUrl (CurrentUrl->GetLocation(),
                  *CurrentUrl);

               AddUrl(RedirectedUrl.get().get(), IDUrl);

               if (debug>0)
                  cout << "   > Redirection: Adding "
                  << RedirectedUrl.get()
                  << " (" << IDUrl << ")" << endl;
                  
               // Let's store the redirection as a special link
               
               redirectedlink.Reset();

               // Set the source Url ID
               redirectedlink.SetIDUrlSrc(CurrentUrl->GetID());            

               // Set the dest Url ID
               redirectedlink.SetIDUrlDest(IDUrl);
            
               // Set the tag position
               redirectedlink.SetTagPosition(0);

               // Set the attribute position
               redirectedlink.SetAttrPosition(0);
               
               // Set the type
               redirectedlink.SetLinkType("Redirection");

               // Write the link object
               if (!GetDB()->Insert(redirectedlink))
               {
                  // Insert failed
                  if (debug>0)
                     cout << "Link insert Failed: " << redirectedlink << endl;

      	          return Scheduler_DBError;
               }

            }
            
            CurrentUrl->SetConnStatus("OK");
            // We don't wanna show the last modified value in this case
            CurrentUrl->HideLastModified();
            break;
            
         case Transport::Document_not_authorized:
            if(debug>0)
               cout << "   > Document not authorized" << endl;
            CurrentUrl->SetConnStatus("OK");
            // We don't wanna show the last modified value in this case
            CurrentUrl->HideLastModified();
            break;
            
         case Transport::Document_connection_down:
            if(debug>0)
               cout << "   > Connection down" << endl;
            CurrentUrl->SetConnStatus("ConnectionDown");
            break;
            
         case Transport::Document_no_connection:
            if(debug>0)
               cout << "   > No connection" << endl;
            CurrentUrl->SetConnStatus("NoConnection");
            break;
            
         case Transport::Document_no_header:
            if(debug>0)
               cout << "   > No header" << endl;
            CurrentUrl->SetConnStatus("NoHeader");
            break;
            
         case Transport::Document_no_host:
            if(debug>0)
               cout << "   > No host" << endl;
            CurrentUrl->SetConnStatus("NoHost");
            break;
            
         case Transport::Document_no_port:
            if(debug>0)
               cout << "   > No port" << endl;
            CurrentUrl->SetConnStatus("NoPort");
            break;
            
         case Transport::Document_not_local:
            if(debug>0)
               cout << "   > Not local" << endl;
            break;
            
         case Transport::Document_not_recognized_service:
            // Transport service not recognized
            if(debug>0)
               cout << "   > Service not valid" << endl;
            CurrentUrl->SetConnStatus("ServiceNotValid");
            break;
            
         case Transport::Document_server_error:
            if(debug>0)
               cout << "   > Server Error" << endl;
            CurrentUrl->SetConnStatus("ServerError");
            break;
            
         case Transport::Document_other_error:
            // General error (memory)
            return Scheduler_MemoryError;
            break;
      }

      // Store info
      if(debug>1)
         cout << "    >> Stored: " << CurrentSchedule << endl;
      
      if (!DB->Insert(*CurrentUrl))
         return Scheduler_DBError;
      
      // Update it on Schedule table
      switch(CurrentSchedule.GetStatus())
      {
         case SchedulerEntry::Url_CheckIfExists :
            CurrentSchedule.SetStatus(SchedulerEntry::Url_Checked);
            break;
         case SchedulerEntry::Url_MaxUrlsCount :
            CurrentSchedule.SetStatus(SchedulerEntry::Url_MaxUrlsCount);
            break;
         case SchedulerEntry::Url_ToBeRetrieved :
         default:
            CurrentSchedule.SetStatus(SchedulerEntry::Url_Retrieved);
            break;
      }
      
      if (DB->UpdateStatus(CurrentSchedule) == -1)
         return Scheduler_DBError;

   }

   // Create the indexes for the Link table
   if (DB->CreateLinkTableIndexes() == -1)
      return Scheduler_DBError;

   // Calculate Urls Size Add
   if (CalculateUrlSizeAdd() != Scheduler_OK)
      return Scheduler_DBError;

   // Set the link results
   if (DB->SetLinkResults() == -1)
      return Scheduler_DBError;

   // Set the HTML anchors results
   if (SetHTMLAnchorsResults() != Scheduler_OK)
      return Scheduler_DBError;

   // Deserialize the servers stored in memory.
   // Let's write them into the Database

   if (!deserialized)
      DeserializeServers();

   // Set the SQL Big Table Option
   if (Config->Boolean("optimize_db"))
   {
      if (!DB->Optimize())
      {
         // Only a warning
         cout << "Optimization failed. Try to set 'optimize_db' option to false"
            << endl;
      }
   }   

   // Set finish time
   SetFinishTime();

   // Set general info
   runinfo.TCPConnections = HtHTTP::GetTotOpen();
   runinfo.ServerChanges = HtHTTP::GetTotServerChanges();
   runinfo.HTTPSeconds= HtHTTP::GetTotSeconds();
   runinfo.HTTPRequests= HtHTTP::GetTotRequests();
   runinfo.HTTPBytes= HtHTTP::GetTotBytes();
   runinfo.HTTPBytes= HtHTTP::GetTotBytes();

   // Updates accessibility checks info
   if (!Config->Boolean("accessibility_checks"))
      runinfo.AccessibilityChecks = 0;
   
   // Store general info into the'htCheck' table
   if (! DB->Insert(runinfo))
      return Scheduler_DBError;

   // Result may be 0 or -1
   // If -1 a db error has occured
      
   if (Result == -1)   // A database error occured
      return Scheduler_DBError;

   // Free the memory for the HTTP object
   if (HTTPConnect)
      delete HTTPConnect;

   if (CurrentUrl)
      delete CurrentUrl;

   // Stopped the process
   if (stop) return Scheduler_Interrupted;
   else return Scheduler_OK;
    
}


Transport::DocStatus Scheduler::Retrieve (const SchedulerEntry &s, _Url &url)
{

   TransportConnect = 0;
   bool useproxy = UseProxy(s);  // Check for the proxy to be used
   
   if (mystrncasecmp (url.service(), "http", 4) == 0)
   {
      if (debug>4)
         cout << "Retrieving " << url.get() << " - via HTTP" << endl;
      
      if (!HTTPConnect)
      {
         if (debug>5)
            cout << "Creating a new object for HTTP Connections" << endl;
         
         HTTPConnect = new HtHTTPBasic();
         
         if (!HTTPConnect)
	    return Transport::Document_other_error;

      	 // Set the properties that are valid for every request

         // Let's disable cookies
         if (Config->Boolean("disable_cookies"))
            HTTPConnect->DisableCookies();

         // Set the credentials for the authentication
         if (Credentials.length())
            HTTPConnect->SetCredentials(Credentials.c_str());

      	 // Set the accept language directive
         if (AcceptLanguage.length())
            HTTPConnect->SetAcceptLanguage(AcceptLanguage.c_str());

      }

      if (debug>6)
         cout << "Setting the URL to be retrieved" << endl;
      
      HTTPConnect->SetRequestURL(url);

      if (Config->Boolean("persistent_connections"))
      {
         if (! (url.GetServer()->IsPersistentConnectionAllowed()))
            HTTPConnect->DisablePersistentConnection();
         else
         {
            HTTPConnect->AllowPersistentConnection();
            if (Config->Boolean("head_before_get"))
               HTTPConnect->EnableHeadBeforeGet();
            else
               HTTPConnect->DisableHeadBeforeGet();
         }
      }
      else HTTPConnect->DisablePersistentConnection();

      // We retrieve the whole document (GET) only if it's marked
      // with the "ToBeRetrieved" flag.
      
      if(s.GetStatus() == SchedulerEntry::Url_ToBeRetrieved)
         HTTPConnect->SetRequestMethod(HtHTTP::Method_GET);
      else
         HTTPConnect->SetRequestMethod(HtHTTP::Method_HEAD);
         
      // Look for the referer
      if (CurrentSchedule.GetIDReferer())
      {
         static HtmysqlQueryResult referertmp;

         // We have a referer for the Current scheduler URL
         Referer.Reset();
         Referer.SetIDSchedule(CurrentSchedule.GetIDReferer());

         if (debug > 3)
            cout << "    > Looking for the referer (ID: "
               << Referer.GetIDSchedule() << ")" << endl;

         int NumRecords = DB->Search (Referer, referertmp);

         if (NumRecords==1)
         {
            // Found, let's get it
            if (DB->GetNextElement(Referer, referertmp))
            {
               HTTPConnect->SetRefererURL(Referer.GetScheduleUrl().c_str());
               if (debug > 2)
                  cout << "    > Found the referer ("
                     << Referer.GetScheduleUrl() << ")" << endl;
            }
         }
         
      }

      // Set the TransportConnect to HTTP
      TransportConnect = HTTPConnect;

   }
   else
   {
      if (debug>0)
         cout << '"' << url.service()
            << "\" not a recognized transport service. Ignoring.\n";
      
      return Transport::Document_not_recognized_service;
      
   }
   
   // Let's connect
   if (TransportConnect)
   {
      // Set the parameters
      
      if (debug>4)
         cout << "Set the connection" << endl;

      // Check for the HTTP proxy use
      if (useproxy && HTTPConnect==TransportConnect)
      {
         if (debug>3)
            cout << "Set the proxy to "
               << Proxy->host() << ":" << Proxy->port() << endl;

         HTTPConnect->SetProxy(useproxy); // Set the flag for HTTP proxy
         TransportConnect->SetConnection(Proxy);

         // Set the credentials for the authentication
         if (ProxyCredentials.length())
	 {
            if (debug>3)
               cout << "Set the authorization for the proxy" << endl;

            HTTPConnect->SetProxyCredentials(ProxyCredentials.c_str());
	 }
         
      }
      else TransportConnect->SetConnection(url);

      
      // Set the timeout
      if (debug>4)
         cout << "Set the connection timeout to "
            << Config->Value("timeout") << endl;
         
      TransportConnect->SetTimeOut(Config->Value("timeout"));
      
      // Set the max document size
      if (debug>4)
         cout << "Set the max document size to "
            << Config->Value("max_doc_size") << endl;
         
      // Set the biggest size to be retrieved
      TransportConnect->SetRequestMaxDocumentSize(Config->Value("max_doc_size"));

      // Set the number of tcp retries
      if (debug>4)
         cout << "Set the number of retries to "
            << Config->Value("tcp_max_retries") << endl;
         
      TransportConnect->SetRetry(Config->Value("tcp_max_retries"));
      
      // Set the time to wait after a tcp failure
      if (debug>4)
         cout << "Set the time to wait after a tcp failure to "
            << Config->Value("tcp_wait_time") << endl;
         
      TransportConnect->SetWaitTime(Config->Value("tcp_wait_time"));
      
      // Modification time <tobedone>
      
      if (debug>4)
      {
         cout << "Make the request";

            if (useproxy)
            cout << " via proxy (" << Proxy->host() << ":" << Proxy->port() << ")";

         cout << endl;

      }
      
      // Make the request
      return TransportConnect->Request();
      
   }
   
   // Unknown error
   return Transport::Document_other_error;
}


int Scheduler::ShouldWeRetry(Transport::DocStatus DocumentStatus)
{

   if (DocumentStatus == Transport::Document_connection_down)
      return 1;
      
   if (DocumentStatus == Transport::Document_no_connection)
      return 1;
      
   return 0;
}


int Scheduler::GetNext()
{

   static int type = 1;
   int Result = 0;
   
   if (type == 1)
   {
      Result = GetNext("ToBeRetrieved");
      if (!Result)   // No more Urls to be retrieved
      {
         ScheduleTmp.Free();
         type=0;
      }
   }

   if (type == 0 && Config->Boolean("check_external"))
      Result = GetNext("CheckIfExists");
   
   return Result;

}


int Scheduler::GetNext(const std::string &StrStatus)
{

   int NumRecords = 0;
   const std::string SQLCommonStatement = "Select IDUrl, IDServer, Url, Status, IDReferer, HopCount from Schedule";

   if (!ScheduleTmp.Empty() && ScheduleTmp.Type() == Htmysql::Htmysql_Stored)
   {
      // We got a previous stored query

      if (debug>2)
         cout << "Using previous list of Urls of server "
            << CurrentServer->host() << ":" << CurrentServer->port() << endl;
   
      if (DB->GetNextElement(CurrentSchedule, ScheduleTmp)) // found an item
      {
         if (debug>0)
            cout << "- ";

         return 1;
      }
      
   }

   // We haven't got a previous queue
      
   // First time or "queue" is empty

   // Is this a request method different than ToBeRetrieved

   if (StrStatus == "ToBeRetrieved")
   {
      // These Urls won't increase the list of Urls to be retrieved
      // So we query the DB only once. Example: 'CheckIfExists'
         
      if (debug>2)
         cout << "Creating a new list of Urls of type: " << StrStatus << endl;

      std::string SQLStatement = SQLCommonStatement + " where Status='" + StrStatus
	  + '\'' + " ORDER by IDServer, HopCount ASC";

      // Executing Select query (stored query, default)
      NumRecords = DB->Query (SQLStatement, &ScheduleTmp);

      if (NumRecords == -1)
         return -1;   // An error occured

      if (NumRecords > 0)
      {
         // We found at least one record

         if (debug>0)
            cout << "+ " << NumRecords << " Urls." << endl;

         if (DB->GetNextElement(CurrentSchedule, ScheduleTmp) )
         {
            if (debug>0)
               cout << "- ";
            return 1;
         }
      }
   }
   else
   {
      if (CurrentServer && CurrentServer->IsPersistentConnectionAllowed())
      {
         
         if (debug>2)
            cout << "Creating a new list of Urls for "
               << CurrentServer->host() << ":" << CurrentServer->port()
               << " - persistent connections - type: " << StrStatus << endl;

         std::ostringstream SQLStatement;
	 
	 SQLStatement << SQLCommonStatement << " where Status='" << StrStatus << '\''
	    << " AND IDServer = " << CurrentServer->GetID() << " ORDER BY HopCount ASC";

         // Executing Select query (stored query, default)

         NumRecords = DB->Query (SQLStatement.str(), &ScheduleTmp);

         if (NumRecords == -1)
            return -1;   // An error occured

         if (debug>0)
            cout << "+ " << NumRecords << " Urls for " 
               << CurrentServer->host() << ":" << CurrentServer->port()
               << endl;

         if (NumRecords > 0)
         {
            // We found at least one record

            if (DB->GetNextElement(CurrentSchedule, ScheduleTmp))
            {
               if (debug>0)
                  cout << "- ";
               return 1;
            }
            
         }

      }
   }
      
   // We take the first records, as is with a temporary query

   std::string SQLStatement = SQLCommonStatement + " where Status='" + StrStatus
       + '\'' + " ORDER BY HopCount ASC";

   if (debug>2)
      cout << "Getting next Url - type: " << StrStatus<< endl;

   if (DB->Query (SQLStatement, &ScheduleTmp, Htmysql::Htmysql_Temporary) == -1)
      return -1; // an error occured

   NumRecords=DB->GetNextElement(CurrentSchedule, ScheduleTmp);
      
   if (NumRecords & debug>0)
      cout << "+ ";
         
   ScheduleTmp.Free();

   return NumRecords; // Can be 0 or 1

}


///////
   //    Show Anchor not found summary
///////

Scheduler::Scheduler_Codes Scheduler::ShowAnchorNotFound(ostream &output)
{

   if(DB->AnchorsNotFound(output) == -1) // A database error occured
      return Scheduler_DBError;

   return Scheduler_OK;
   
}


///////
   //    Show the broken links summary
///////

Scheduler::Scheduler_Codes Scheduler::ShowBrokenLinks(ostream &output)
{

   if(DB->ShowBrokenLinks(output) == -1) // A database error occured
      return Scheduler_DBError;

   return Scheduler_OK;
   
}


///////
   //    Show the status codes retrieved
///////

Scheduler::Scheduler_Codes Scheduler::ShowStatusCode(ostream &output)
{

   if(DB->ShowStatusCode(output) == -1) // A database error occured
      return Scheduler_DBError;

   return Scheduler_OK;
   
}


Scheduler::Scheduler_Codes Scheduler::ShowContentTypesPerServer(ostream &output)
{

   if(DB->ShowContentTypesPerServer(output) == -1) // A database error occured
      return Scheduler_DBError;

   return Scheduler_OK;
   
}


///////
   //    Calculate the size to be added to URLs (links of the 'Direct' type)
   //    After executing a query, it updates SizeAdd field of the URL table
   //	 A value of bytes to be added to a URL (an HTML document for now)
   //	 depends on the attributes used to link to another URL. For example:
   //    images are called usually with <IMG src="URL A">. This is considered
   //    as a direct link and the size of URL A is being added to the SizeAdd
   //    field of the URL calling it. But this is added only once, even if
   //    inside the document it's called twice, 3 times, a hundred times.
   //    Indeed we suppose the user has a cache system on his computer.
   //    By adding a URL size with the SizeAdd field, we obtain an approximate
   //    URL weight.
///////

Scheduler::Scheduler_Codes Scheduler::CalculateUrlSizeAdd(ostream &output)
{
   
   if(DB->CalculateUrlSizeAdd(output) == -1) // A database error occured
      return Scheduler_DBError;

   return Scheduler_OK;

}


///////
   //    Check the HTML anchors
///////

Scheduler::Scheduler_Codes Scheduler::SetHTMLAnchorsResults(ostream &output)
{

   if (debug>0)
      output << endl << "Setting HTML Anchors results" << endl;

   // Create the table with all the anchors;

   if(DB->AnchorsTable(output) == -1) // A database error occured
      return Scheduler_DBError;

   return Scheduler_OK;
   
}


///////
   //    Check if a URL needs the proxy
///////

bool Scheduler::UseProxy(const SchedulerEntry &s)
{
   static std::string url;

   if (!Proxy) return false;
   
   // Initialization of the string
   url = s.GetScheduleUrl();

   if (ExcludeProxy.match(url.c_str(), 0, 0) == 0)
      return true;    // if the exclude pattern is empty, use the proxy

   return false;
    
}

///////
   //   Set the user agent depending on the configuration and machine values
///////

void Scheduler::SetUserAgent(const std::string &ua)
{
   // Set the request user agent for HTTP connections
   HtHTTP::SetRequestUserAgent(ua.c_str());

}
