Sophie

Sophie

distrib > * > 2010.0 > * > by-pkgid > e7ce49c3b1c0711bfa288ad4094ed286 > files > 63

libomniorb-devel-4.1.0-5mdv2009.1.i586.rpm

// -*- Mode: C++; -*-
//                            Package   : omniORB2
// codeSets.h                 Created on: 18/10/2000
//                            Author    : Duncan Grisby (dpg1)
//
//    Copyright (C) 2000 AT&T Laboratories, Cambridge
//
//    This file is part of the omniORB library
//
//    The omniORB library is free software; you can redistribute it and/or
//    modify it under the terms of the GNU Library General Public
//    License as published by the Free Software Foundation; either
//    version 2 of the License, or (at your option) any later version.
//
//    This library is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//    Library General Public License for more details.
//
//    You should have received a copy of the GNU Library General Public
//    License along with this library; if not, write to the Free
//    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
//    02111-1307, USA
//
//
// Description:
//	*** PROPRIETARY INTERFACE ***
//

/*
  $Log: codeSets.h,v $
  Revision 1.1.4.2  2005/12/08 14:22:31  dgrisby
  Better string marshalling performance; other minor optimisations.

  Revision 1.1.4.1  2003/03/23 21:04:17  dgrisby
  Start of omniORB 4.1.x development branch.

  Revision 1.1.2.9  2003/03/03 12:32:32  dgrisby
  EBCDIC code sets. Thanks Coleman Corrigan.

  Revision 1.1.2.8  2003/01/14 11:48:15  dgrisby
  Remove warnings from gcc -Wshadow. Thanks Pablo Mejia.

  Revision 1.1.2.7  2002/12/19 13:56:58  dgrisby
  New Windows 1251 code set. (Thanks Vasily Tchekalkin).

  Revision 1.1.2.6  2001/06/13 20:06:18  sll
  Minor fix to make the ORB compile with MSVC++.

  Revision 1.1.2.5  2000/11/22 14:37:58  dpg1
  Code set marshalling functions now take a string length argument.

  Revision 1.1.2.4  2000/11/16 12:33:43  dpg1
  Minor fixes to permit use of UShort as WChar.

  Revision 1.1.2.3  2000/11/15 17:15:42  sll
  Added definition for the TAG_CODE_SETS component.

  Revision 1.1.2.2  2000/11/09 12:23:04  dpg1
  Update to compile on Solaris

  Revision 1.1.2.1  2000/10/27 15:42:03  dpg1
  Initial code set conversion support. Not yet enabled or fully tested.

*/

#ifndef __CODESETS_H__
#define __CODESETS_H__


//#include <CONV_FRAME.hh>

class cdrStream;

OMNI_NAMESPACE_BEGIN(omni)

class CONV_FRAME {
public:
  typedef _CORBA_ULong CodeSetId;
  typedef _CORBA_Unbounded_Sequence_w_FixSizeElement<CodeSetId,4,4> CodeSetIdList;

  struct CodeSetComponent {
    CodeSetId     native_code_set;
    CodeSetIdList conversion_code_sets;

    void operator>>= (cdrStream& s) const;
    void operator<<= (cdrStream& s);
  };

  struct CodeSetComponentInfo {
    CodeSetComponent  ForCharData;
    CodeSetComponent  ForWcharData;

    void operator>>= (cdrStream& s) const;
    void operator<<= (cdrStream& s);
  };
};


// Code set conversion framework
// =============================
//
// For any transmission involving char or wchar data, there are two
// code sets involved -- the `native' code set, which is the code set
// the application is using, and the `transmission' code set, which is
// the code set negotiated with the other end, used to send the data.
// At any given time, there are four code sets in force, since char
// and wchar code sets are separate. These are termed NCS-C, TCS-C,
// NCS-W, and TCS-W, for native char, transmission char, native wchar,
// transmission wchar respectively.
//
// These four code sets are represented by four objects, derived from
// the NCS_C, TCS_C, NCS_W, and TCS_W classes. To marshal or unmarshal
// char data (i.e. char and string), for example, the ORB calls
// methods of the NCS_C object, passing a pointer to the TCS_C object
// to be used. The ORB will not normally call methods of the TCS_C
// object directly.
//
// For each supported code set, there is a single object registered
// with the system. These objects are usually statically allocated.
// Since all interactions involving a particular code set use the same
// code set object, all objects must be stateless.
//
// By default, the NCS object comunicates with the TCS object via
// UTF-16 Unicode, so all NCS objects can translate to and from all
// TCS objects. For efficiency, either object is permitted to bypass
// the UTF-16 step, and use some more efficient translation. NCS
// implementations must always call the TCS's fast...() function
// before resorting to UTF-16, in case the TCS can deal with the NCS
// data directly.
//
// In all functions involving string lengths, the length does not
// include the terminating null.

class omniCodeSet {
public:

  // Common code set id constants
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_1;  // ISO 8859
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_2;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_3;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_4;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_5;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_6;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_7;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_8;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_9;
  static _core_attr const CONV_FRAME::CodeSetId ID_8859_10;
  static _core_attr const CONV_FRAME::CodeSetId ID_UCS_2;   // Unicode
  static _core_attr const CONV_FRAME::CodeSetId ID_UCS_4;
  static _core_attr const CONV_FRAME::CodeSetId ID_UTF_8;
  static _core_attr const CONV_FRAME::CodeSetId ID_UTF_16;  // Unicode with
                                                            //  surrogates
  static _core_attr const CONV_FRAME::CodeSetId ID_CP1251;  // windows-1251
  static _core_attr const CONV_FRAME::CodeSetId ID_037;     // IBM Ebcdic
  static _core_attr const CONV_FRAME::CodeSetId ID_500;
  static _core_attr const CONV_FRAME::CodeSetId ID_EDF_4;


  class NCS_C; // Native code set for char and string
  class NCS_W; // Native code set for wide char and wide string
  class TCS_C; // Transmission code set for char and string
  class TCS_W; // Transmission code set for wide char and wide string

  static NCS_C* getNCS_C(CONV_FRAME::CodeSetId id);
  static NCS_W* getNCS_W(CONV_FRAME::CodeSetId id);
  static TCS_C* getTCS_C(CONV_FRAME::CodeSetId id, GIOP::Version v);
  static TCS_W* getTCS_W(CONV_FRAME::CodeSetId id, GIOP::Version v);
  // Static functions to get the code set object for a particular DCE
  // code set id. Return null if no suitable code set object exists.

  static NCS_C* getNCS_C(const char* name);
  static NCS_W* getNCS_W(const char* name);
  static TCS_C* getTCS_C(const char* name, GIOP::Version v);
  static TCS_W* getTCS_W(const char* name, GIOP::Version v);
  // Static functions to get the code set object for a particular
  // name. Return null if no suitable code set object exists.

  static void registerNCS_C(NCS_C* cs);
  static void registerNCS_W(NCS_W* cs);
  static void registerTCS_C(TCS_C* cs);
  static void registerTCS_W(TCS_W* cs);
  // Static functions to register new code set objects.
  // NO concurrency control. It is expected that these functions will
  // be called from static initialisers.

  static void logCodeSets();
  // Print out the supported code sets

#if (SIZEOF_WCHAR == 2)
  typedef _CORBA_WChar  UniChar; // Unicode code point (UTF-16)
#else
  typedef _CORBA_UShort UniChar;
#endif

  // Special code set kinds
  enum Kind {
    CS_Other, // Don't know anything special about this code set
    CS_8bit,  // Simple 8 bit set with one-to-one Unicode mapping
              //  Class must be NCS_C_8bit or TCS_C_8bit
    CS_16bit  // Simple 16 bit set with one-to-one Unicode mapping
              //  (not Unicode itself, though)
              //  Class must be NCS_W_16bit or TCS_W_16bit
  };


  // Base class for all code sets
  class Base {
  public:

    inline CONV_FRAME::CodeSetId id()     const { return pd_id; }
    inline const char*           name()   const { return pd_name; }
    inline Kind                  kind()   const { return pd_kind; }
    // DCE code set identifier, descriptive name string, code set kind

  protected:
    Base(CONV_FRAME::CodeSetId id_, const char* name_, Kind kind_)
      : pd_id(id_), pd_name(name_), pd_kind(kind_), pd_next(0) {}

    virtual ~Base() {}

  private:
    CONV_FRAME::CodeSetId pd_id;
    const char*           pd_name;
    Kind                  pd_kind;
    Base*                 pd_next; // Linked list
    friend class omniCodeSet;
  };


  class NCS_C : public Base {
  public:

    virtual void marshalChar(cdrStream& stream, TCS_C* tcs, _CORBA_Char c) = 0;

    virtual void marshalString(cdrStream& stream, TCS_C* tcs,
			       _CORBA_ULong bound, _CORBA_ULong len,
			       const char* s) = 0;
    // Marshal string. len is the length of the string or zero to
    // indicate that the length is not known (or actually zero).

    virtual _CORBA_Char unmarshalChar(cdrStream& stream, TCS_C* tcs) = 0;

    virtual _CORBA_ULong unmarshalString(cdrStream& stream, TCS_C* tcs,
					 _CORBA_ULong bound, char*& s) = 0;

    NCS_C(CONV_FRAME::CodeSetId id, const char* name, Kind kind);
    virtual ~NCS_C() {}
  };

  class TCS_C : public Base {
  public:

    inline GIOP::Version giopVersion() const { return pd_giopVersion; }

    // Unicode based marshalling
    virtual void marshalChar  (cdrStream& stream, UniChar uc) = 0;
    virtual void marshalString(cdrStream& stream, _CORBA_ULong bound,
			       _CORBA_ULong len, const UniChar* us) = 0;
    // len is the length of the unicode string in us. It may only be
    // zero if the string is of zero length.

    virtual UniChar unmarshalChar(cdrStream& stream) = 0;

    virtual _CORBA_ULong unmarshalString(cdrStream& stream,
					 _CORBA_ULong bound, UniChar*& us) = 0;

    // Fast marshalling functions. Return false if no fast case is
    // possible and the Unicode functions should be used.
    virtual _CORBA_Boolean fastMarshalChar    (cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_Char   c);

    virtual _CORBA_Boolean fastMarshalString  (cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_ULong  bound,
					       _CORBA_ULong  len,
					       const char*   s);
    // len may be zero if length is not known.

    virtual _CORBA_Boolean fastUnmarshalChar  (cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_Char&  c);

    virtual _CORBA_Boolean fastUnmarshalString(cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_ULong  bound,
					       _CORBA_ULong& length,
					       char*&        s);

    TCS_C(CONV_FRAME::CodeSetId id, const char* name, Kind kind,
	  GIOP::Version giopVersion);
    virtual ~TCS_C() {}

  private:
    GIOP::Version pd_giopVersion;
  };

  class NCS_W : public Base {
  public:

    virtual void marshalWChar(cdrStream& stream, TCS_W* tcs,
			      _CORBA_WChar c) = 0;

    virtual void marshalWString(cdrStream& stream, TCS_W* tcs,
				_CORBA_ULong bound, _CORBA_ULong len,
				const _CORBA_WChar* s) = 0;

    virtual _CORBA_WChar unmarshalWChar(cdrStream& stream, TCS_W* tcs) = 0;

    virtual _CORBA_ULong unmarshalWString(cdrStream& stream, TCS_W* tcs,
					  _CORBA_ULong bound,
					  _CORBA_WChar*& s) = 0;

    NCS_W(CONV_FRAME::CodeSetId id, const char* name, Kind kind);
    virtual ~NCS_W() {}
  };

  class TCS_W : public Base {
  public:

    inline GIOP::Version giopVersion() const { return pd_giopVersion; }

    // Unicode based marshalling
    virtual void marshalWChar  (cdrStream& stream, UniChar uc) = 0;
    virtual void marshalWString(cdrStream& stream, _CORBA_ULong bound,
				_CORBA_ULong len, const UniChar* us) = 0;

    virtual UniChar unmarshalWChar(cdrStream& stream) = 0;

    virtual _CORBA_ULong unmarshalWString(cdrStream& stream,
					  _CORBA_ULong bound,
					  UniChar*& us) = 0;

    // Fast marshalling functions. Return false if no fast case is
    // possible and Unicode functions should be used.
    virtual _CORBA_Boolean fastMarshalWChar    (cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_WChar        c);

    virtual _CORBA_Boolean fastMarshalWString  (cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_ULong        bound,
						_CORBA_ULong        len,
						const _CORBA_WChar* s);

    virtual _CORBA_Boolean fastUnmarshalWChar  (cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_WChar&       c);

    virtual _CORBA_Boolean fastUnmarshalWString(cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_ULong        bound,
						_CORBA_ULong&       length,
						_CORBA_WChar*&      s);

    TCS_W(CONV_FRAME::CodeSetId id, const char* name, Kind kind,
	  GIOP::Version giopVersion);
    virtual ~TCS_W() {}

  private:
    GIOP::Version pd_giopVersion;
  };



  //
  // Classes for code sets we know something special about
  //

  //
  // Single byte code sets.
  //

  // Single byte codes sets contain two mapping tables, to and from
  // UCS-2. The table to UCS-2 is a simple 256 element arrray of UCS-2
  // values. The table from UCS-2 is a two-level table. The high octet
  // of the UCS-2 value is looked-up in the top-level table; the low
  // octet is looked-up in the table found in the first look-up. The
  // majority of second-level table entries are usually the same empty
  // table.
  //
  // In both tables, an entry with value zero in any place except
  // index zero means there is no mapping for the character in
  // question, and a DATA_CONVERSION exception should be thrown.

  class NCS_C_8bit : public NCS_C {
  public:

    // Inherited virtual functions
    virtual void marshalChar(cdrStream& stream, TCS_C* tcs,
			     _CORBA_Char c);

    virtual void marshalString(cdrStream& stream, TCS_C* tcs,
			       _CORBA_ULong bound, _CORBA_ULong len,
			       const char* s);

    virtual _CORBA_Char unmarshalChar(cdrStream& stream, TCS_C* tcs);

    virtual _CORBA_ULong unmarshalString(cdrStream& stream,
					 TCS_C* tcs,
					 _CORBA_ULong bound, char*& s);

    inline const UniChar*      toU()   const { return pd_toU; }
    inline const _CORBA_Char** fromU() const { return pd_fromU; }

    NCS_C_8bit(CONV_FRAME::CodeSetId id_,
	       const char*           name_,
	       const UniChar*        toU_,
	       const _CORBA_Char**   fromU_)

      : NCS_C(id_, name_, CS_8bit),
	pd_toU(toU_),
	pd_fromU(fromU_)
    { }

    virtual ~NCS_C_8bit() {};

  private:
    const UniChar*      pd_toU;
    const _CORBA_Char** pd_fromU;
  };

  class TCS_C_8bit : public TCS_C {
  public:

    // Inherited virtual functions
    virtual void marshalChar  (cdrStream& stream, UniChar uc);
    virtual void marshalString(cdrStream& stream, _CORBA_ULong bound,
			       _CORBA_ULong len, const UniChar* us);

    virtual UniChar unmarshalChar(cdrStream& stream);

    virtual _CORBA_ULong unmarshalString(cdrStream& stream,
					 _CORBA_ULong bound,
					 UniChar*& us);

    virtual _CORBA_Boolean fastMarshalChar    (cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_Char   c);

    virtual _CORBA_Boolean fastMarshalString  (cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_ULong  bound,
					       _CORBA_ULong  len,
					       const char*   s);

    virtual _CORBA_Boolean fastUnmarshalChar  (cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_Char&  c);

    virtual _CORBA_Boolean fastUnmarshalString(cdrStream&    stream,
					       NCS_C*        ncs,
					       _CORBA_ULong  bound,
					       _CORBA_ULong& length,
					       char*&        s);
    // Translation tables:
    inline const UniChar*      toU()   const { return pd_toU; }
    inline const _CORBA_Char** fromU() const { return pd_fromU; }

    TCS_C_8bit(CONV_FRAME::CodeSetId id_,
	       const char*           name_,
	       GIOP::Version         giopVersion_,
	       const UniChar*        toU_,
	       const _CORBA_Char**   fromU_)

      : TCS_C(id_, name_, CS_8bit, giopVersion_),
	pd_toU(toU_),
	pd_fromU(fromU_)
    { }

    virtual ~TCS_C_8bit() {}

  private:
    const UniChar*      pd_toU;
    const _CORBA_Char** pd_fromU;
  };

  static _core_attr const _CORBA_Char empty8BitTable[];
  // Empty 8 bit table to be used as a second-level table in
  // Unicode -> 8 bit mappings


  //
  // Two byte code sets.
  //
  // If sizeof(whar_t) == 4, the native code set makes sure each wide
  // character uses only the low 16 bits.
  //

  // Two byte code sets contain two two-level tables, one mapping to
  // UCS-2, the other mapping from it.

  class NCS_W_16bit : public NCS_W {
  public:

    virtual void marshalWChar(cdrStream& stream, TCS_W* tcs,
			      _CORBA_WChar c);

    virtual void marshalWString(cdrStream& stream, TCS_W* tcs,
				_CORBA_ULong bound, _CORBA_ULong len,
				const _CORBA_WChar* s);

    virtual _CORBA_WChar unmarshalWChar(cdrStream& stream, TCS_W* tcs);

    virtual _CORBA_ULong unmarshalWString(cdrStream& stream, TCS_W* tcs,
					  _CORBA_ULong bound,
					  _CORBA_WChar*& s);

    inline const UniChar**       toU()   const { return pd_toU; }
    inline const _CORBA_UShort** fromU() const { return pd_fromU; }

    NCS_W_16bit(CONV_FRAME::CodeSetId id_,
		const char*           name_,
		const UniChar**       toU_,
		const _CORBA_UShort** fromU_)

      : NCS_W(id_, name_, CS_16bit),
	pd_toU(toU_),
	pd_fromU(fromU_)
    { }
    virtual ~NCS_W_16bit() {}

  private:
    const UniChar**       pd_toU;
    const _CORBA_UShort** pd_fromU;
  };

  class TCS_W_16bit : public TCS_W {
  public:

    // Unicode based marshalling
    virtual void marshalWChar  (cdrStream& stream, UniChar uc);
    virtual void marshalWString(cdrStream& stream, _CORBA_ULong bound,
				_CORBA_ULong len, const UniChar* us);

    virtual UniChar unmarshalWChar(cdrStream& stream);

    virtual _CORBA_ULong unmarshalWString(cdrStream& stream,
					  _CORBA_ULong bound,
					  UniChar*& us);

    // Fast marshalling functions. Return false if no fast case is
    // possible and Unicode functions should be used.
    virtual _CORBA_Boolean fastMarshalWChar    (cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_WChar        c);

    virtual _CORBA_Boolean fastMarshalWString  (cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_ULong        bound,
						_CORBA_ULong        len,
						const _CORBA_WChar* s);

    virtual _CORBA_Boolean fastUnmarshalWChar  (cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_WChar&       c);

    virtual _CORBA_Boolean fastUnmarshalWString(cdrStream&          stream,
						NCS_W*              ncs,
						_CORBA_ULong        bound,
						_CORBA_ULong&       length,
						_CORBA_WChar*&      s);

    inline const UniChar**       toU()   const { return pd_toU; }
    inline const _CORBA_UShort** fromU() const { return pd_fromU; }

    TCS_W_16bit(CONV_FRAME::CodeSetId id_,
		const char*           name_,
		GIOP::Version         giopVersion_,
		const UniChar**       toU_,
		const _CORBA_UShort** fromU_)

      : TCS_W(id_, name_, CS_16bit, giopVersion_),
	pd_toU(toU_),
	pd_fromU(fromU_)
    { }
    virtual ~TCS_W_16bit() {}

  private:
    const UniChar**       pd_toU;
    const _CORBA_UShort** pd_fromU;
  };

  static _core_attr const _CORBA_UShort empty16BitTable[];
  // Empty 16 bit table to be used as a second-level table in 16 bit
  // mappings

};

OMNI_NAMESPACE_END(omni)

#endif // __CODESETS_H__