Sophie

Sophie

distrib > Mandriva > 10.0-com > i586 > by-pkgid > 21280410b6ea906d791d7a12afae2579 > files > 1822

libace5-doc-5.4-2mdk.i586.rpm

/* -*- C++ -*- */
// URL_Visitor.h,v 1.10 2003/11/09 04:12:09 dhinton Exp

// ============================================================================
//
// = LIBRARY
//    examples/Web_Crawler
//
// = FILENAME
//    URL_Visitor.h
//
// = AUTHOR
//    Douglas C.Schmidt <schmidt@cs.wustl.edu>
//    Kirthika Parameswaran <kirthika@cs.wustl.edu>
// ============================================================================

#ifndef _URL_VISITOR_H
#define _URL_VISITOR_H
#include /**/ "ace/pre.h"

#include "ace/Strategies_T.h"

#if !defined (ACE_LACKS_PRAGMA_ONCE)
#pragma once
#endif /* ACE_LACKS_PRAGMA_ONCE */


#include "HTTP_URL.h"
#include "Iterators.h"
#include "ace/Hash_Map_Manager_T.h"
#include "ace/Caching_Strategies_T.h"
#include "ace/Cached_Connect_Strategy_T.h"
#include "Options.h"
#include "ace/Pair_T.h"

// Forward declarations.
class URL_Validation_Visitor;

class URL_Processing_Strategy
{
  // = TITLE
  //   Abstract base class for the URL processing strategy.
  //
  // = DESCRIPTION
public:
  URL_Processing_Strategy (URL &,
                           URL_Iterator &);
  // Constructor.

  virtual int execute (void) = 0;
  // Perform the strategy.

  virtual int destroy (void);

  // Close down the resources.

protected:
  URL &url_;
  // A reference to the URL "context" that we're processing.

  URL_Iterator &iterator_;
  // Iterator for the URL that we're processing.
};

class HTTP_Header_Processing_Strategy : public URL_Processing_Strategy
{
  // = TITLE
  //   Defines the HTTP header processing strategy.
  //
  // = DESCRIPTION
public:
  HTTP_Header_Processing_Strategy (URL &,
                                   URL_Iterator &);
  // Constructor.

  virtual int execute (void);
  // Perform the strategy for processing an HTTP header.
};

class HTML_Body_Validation_Strategy : public URL_Processing_Strategy
{
  // = TITLE
  //   Defines the HTML body processing strategy.
  //
  // = DESCRIPTION
  //   This class iterates through the body of an HTML file and
  //   recursively visits embedded links.
public:
  HTML_Body_Validation_Strategy (URL &,
                                 URL_Iterator &,
                                 URL_Validation_Visitor &);
  // Constructor.

  virtual int execute (void);
  // Perform the strategy for processing an HTML file.  This strategy
  // iterates over the HTML file and recursively visits embedded links
  // to process them, as well.

private:
  URL_Validation_Visitor &visitor_context_;
  // This is the context of the visit.
};

class URL_Download_Strategy : public URL_Processing_Strategy
{
  // = TITLE
  //   Defines a URL downloading strategy.
  //
  // = DESCRIPTION
  //   This class downloads a URL's contents into a temporary file.
public:
  URL_Download_Strategy (URL &,
                         URL_Iterator &);
  // Constructor.

  virtual int execute (void);
  // Perform the strategy for downloading a URL to a temporary file.
};

class URL_Visitation_Strategy_Factory
{
  // = TITLE
  //   Abstract Factory for the URL visitation strategy.
  //
  // = DESCRIPTION
public:
  URL_Visitation_Strategy_Factory (URL *);

  // = Factory Methods.
  virtual URL_Iterator *make_header_iterator (void) = 0;
  // Factory Method that makes the header iterator.

  virtual URL_Iterator *make_body_iterator (void) = 0;
  // Factory Method that makes the body iterator.

  virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &) = 0;
  // Factory Method that makes the header processing strategy.

  virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &) = 0;
  // Factory Method that makes the body processing strategy .

  virtual int destroy (void) = 0;
  // Close down the resources.

protected:
  URL *url_;
  // Stash the URL so we don't have to pass it around.
};

class URL_Download_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
{
  // = TITLE
  //   Concrete Factory for the URL validation visitation strategy.
  //
  // = DESCRIPTION
public:
  URL_Download_Visitation_Strategy_Factory (URL *);
  // Constructor.

  // = Factory Methods.
  virtual URL_Iterator *make_header_iterator (void);
  // Factory Method that makes an <HTTP_Header_Iterator>.

  virtual URL_Iterator *make_body_iterator (void);
  // Factory Method that makes an <HTML_Body_Iterator>.

  virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
  // Factory Method that makes the header processing strategy.

  virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
  // Factory Method that makes the body processing strategy .

  virtual int destroy (void);
  // Close down the resources.
};

class URL_Validation_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
{
  // = TITLE
  //   Concrete Factory for the URL validation visitation strategy.
  //
  // = DESCRIPTION
public:
  URL_Validation_Visitation_Strategy_Factory (URL *,
                                              URL_Validation_Visitor &);
  // Constructor.

  // = Factory Methods.
  virtual URL_Iterator *make_header_iterator (void);
  // Factory Method that makes an <HTTP_Header_Iterator>.

  virtual URL_Iterator *make_body_iterator (void);
  // Factory Method that makes an <HTML_Body_Iterator>.

  virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
  // Factory Method that makes the header processing strategy.

  virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
  // Factory Method that makes the body processing strategy .

  virtual int destroy (void);
  // Close down the resources.

private:
  URL_Validation_Visitor &visitor_context_;
  // Context of the visitor.
};

class URL_Visitor
{
  // = TITLE
  //   Base class for the URL Visitor.
  //
  // = DESCRIPTION
  //   This class plays the "visitor" role in the Visitor pattern.
public:
  virtual int visit (HTTP_URL &http_url) = 0;
  // Visit an <HTTP_URL>.

  // @@
  // virtual int visit (FTP_URL &http_url) = 0;

  virtual int destroy (void) = 0;
  // Cleanup the resources.

protected:
  virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &) = 0;
  // Make the appropriate <URL_Visitation_Strategy_Factory>.
};

typedef int ATTRIBUTES;
typedef ACE_Svc_Handler <ACE_SOCK_STREAM, ACE_NULL_SYNCH>
        Client_Svc_Handler;
typedef ACE_Pair<Client_Svc_Handler *, ATTRIBUTES>
        CACHED_HANDLER;
typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
        ACE_ADDR;
typedef ACE_Hash<ACE_ADDR> H_KEY;
typedef ACE_Equal_To<ACE_ADDR> C_KEYS;

typedef ACE_Hash_Map_Manager_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
        HASH_MAP;
typedef ACE_Hash_Map_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
        HASH_MAP_ITERATOR;
typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
        HASH_MAP_REVERSE_ITERATOR;

typedef ACE_Recyclable_Handler_Cleanup_Strategy<ACE_ADDR, CACHED_HANDLER, HASH_MAP>
        CLEANUP_STRATEGY;
typedef ACE_Recyclable_Handler_Caching_Utility<ACE_ADDR, CACHED_HANDLER, HASH_MAP, HASH_MAP_ITERATOR, ATTRIBUTES>
        CACHING_UTILITY;

typedef ACE_LRU_Caching_Strategy<ATTRIBUTES, CACHING_UTILITY>
        LRU_CACHING_STRATEGY;

//#if defined (ACE_HAS_BROKEN_EXTENDED_TEMPLATES)

typedef LRU_CACHING_STRATEGY
        CACHING_STRATEGY;

//#else

//typedef ACE_Caching_Strategy_Adapter<ATTRIBUTES, CACHING_UTILITY, LRU_CACHING_STRATEGY>
//      LRU_CACHING_STRATEGY_ADAPTER;
//typedef ACE_Caching_Strategy<ATTRIBUTES, CACHING_UTILITY>
//      CACHING_STRATEGY;

//#endif /* ACE_HAS_BROKEN_EXTENDED_TEMPLATES */


typedef ACE_Strategy_Connector<Client_Svc_Handler, ACE_SOCK_CONNECTOR>
        STRATEGY_CONNECTOR;

typedef ACE_NOOP_Creation_Strategy<Client_Svc_Handler>
        NULL_CREATION_STRATEGY;

typedef ACE_NOOP_Concurrency_Strategy<Client_Svc_Handler>
        NULL_ACTIVATION_STRATEGY;

typedef ACE_Cached_Connect_Strategy_Ex<Client_Svc_Handler, ACE_SOCK_CONNECTOR, CACHING_STRATEGY, ATTRIBUTES, ACE_SYNCH_NULL_MUTEX>
        CACHED_CONNECT_STRATEGY;

class URL_Validation_Visitor : public URL_Visitor
{
  // = TITLE
  //   Subclass that defines the URL validation visitor.
  //
  // = DESCRIPTION
  //   This class checks to make sure that the <HTTP_URL> is valid.
  //   If the <HTTP_URL> is an <HTML> file, it can also be used to
  //   recursively check that all embedded links in this file are
  //   valid.
public:
  typedef ACE_Hash_Map_Manager <ACE_URL_Addr, URL_Status, ACE_Null_Mutex>
          URL_CACHE;

  virtual int visit (HTTP_URL &http_url);
  // Visit an <HTTP_URL> to make sure that it's valid.  If the content
  // type of the <HTTP_URL> is "text/html" and the <recursion> option
  // is enabled then <visit> recursively checks each link embedded in
  // the HTML page.

  // @@
  // virtual int visit (FTP_URL &http_url);

  URL_Validation_Visitor (void);
  virtual int destroy (void);
  // Cleanup the resources.

  URL_CACHE &url_cache (void);
  // Returns a reference to the URL cache.


  /*


  typedef ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>
          Svc_Handler;
  typedef ACE_Strategy_Connector<Svc_Handler, ACE_SOCK_CONNECTOR>
          STRAT_CONNECTOR;
  typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
          REFCOUNTED_HASH_RECYCLABLE_ADDRESS;
  typedef ACE_NOOP_Creation_Strategy<Svc_Handler>
          NULL_CREATION_STRATEGY;
  typedef ACE_NOOP_Concurrency_Strategy<Svc_Handler>
          NULL_ACTIVATION_STRATEGY;

  typedef ACE_Hash_Map_Manager_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
                                  ACE_Pair<Svc_Handler *, int>,\
                                  ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
                                  ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
                                  ACE_Null_Mutex>
          CONNECTION_HASH_MAP;
  typedef ACE_Hash_Map_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
                                  ACE_Pair<Svc_Handler *, int>,\
                                  ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
                                  ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
                                  ACE_Null_Mutex>
          CONNECTION_HASH_MAP_ITERATOR;
  typedef ACE_Hash_Map_Reverse_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
                                  ACE_Pair<Svc_Handler *, int>,\
                                  ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
                                  ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
                                  ACE_Null_Mutex>
          CONNECTION_HASH_MAP_REVERSE_ITERATOR;
  typedef ACE_Pair_Caching_Utility <REFCOUNTED_HASH_RECYCLABLE_ADDRESS, \
                                            ACE_Pair<Svc_Handler *, int>, \
                                            CONNECTION_HASH_MAP, CONNECTION_HASH_MAP_ITERATOR, int >
          CACHING_STRATEGY_UTILITY;
  typedef ACE_LRU_Caching_Strategy<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
                                   ACE_Pair<Svc_Handler *, int>,\
                                   CONNECTION_HASH_MAP, int,\
                                   CACHING_STRATEGY_UTILITY >
          LRU;
  typedef ACE_Cached_Connect_Strategy_Ex<Svc_Handler,ACE_SOCK_CONNECTOR, LRU,int, ACE_SYNCH_NULL_MUTEX>
          CACHED_CONNECT_STRATEGY;
  */
protected:
  virtual ~URL_Validation_Visitor (void);
  virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
  // Factory Method that makes a
  // <URL_Validation_Visitation_Strategy_Factory>.

  URL_CACHE url_cache_;
  // Cache the status of URLs we've already validated.

  int in_cache (const ACE_URL_Addr &url_addr);
  // Check to see if the reply status of this <url_addr> is in the
  // cache.  Returns 1 if so, 0 if not.

  NULL_CREATION_STRATEGY creation_strategy_;
  NULL_ACTIVATION_STRATEGY activation_strategy_;

  // Configure the Strategy Connector with a strategy that caches
  // connection.
  CACHED_CONNECT_STRATEGY *caching_connect_strategy_;

  STRATEGY_CONNECTOR *strat_connector_;

  CACHING_STRATEGY caching_strategy_;
};


class URL_Download_Visitor : public URL_Visitor
{
  // = TITLE
  //   Subclass for the URL validtion visitor.
  //
  // = DESCRIPTION
  //   This class checks to make sure that the <HTTP_URL> is valid.
public:
  virtual int visit (HTTP_URL &http_url);
  // Visit an <HTTP_URL> to make sure that it's valid.  If the content
  // type of the <HTTP_URL> is "text/html" and the <recursion> option
  // is enabled then <visit> recursively checks each link embedded in
  // the HTML page.

  // @@
  // virtual int visit (FTP_URL &http_url);

  virtual int destroy (void);
  // Cleanup the resources.

protected:
  URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
  // Factory Method that makes a <URL_Download_Visitation_Strategy_Factory>.
};

template <class T>
class Auto_Destroyer
{
  // = TITLE
  //   Simple class that ensures the <destroy> method is called on our
  //   <URL_*> objects when they go out of scope.
  //
  // = DESCRIPTION
  //   This class is similar to an auto_ptr<> and should be used to
  //   simplify blocks of code that must create/destroy pointers to
  //   various <URL_*> related strategies and iterators.
public:
  Auto_Destroyer (T *t): t_ (t) {}
  T *operator-> (void) { return this->t_; }
  T *operator *(void) { return this->t_; }
  void operator= (T *t)
  {
    if (this->t_ != 0)
      this->t_->destroy ();
    this->t_ = t;
  }
  ~Auto_Destroyer (void)
  {
    if (this->t_ != 0)
      t_->destroy ();
  }
private:
  T *t_;
};

#include /**/ "ace/post.h"
#endif /* _URL_VISITOR_H */