Sophie

Sophie

distrib > Mandriva > 2007.0 > i586 > media > contrib-release > by-pkgid > 4c9f17ec5da473f7fb52041bb9197c5a > files > 71

kaffe-devel-1.1.8-0.20060723.1mdv2007.0.i586.rpm

Using Various Charsets on Kaffe
===============================

Author: Ito Kazumitsu <kaz@maczuka.gcd.org>

Supported charsets
------------------

  Kaffe's default charset provider is gnu.java.nio.charset.Provider.
  To get a list of supported charsets, run the method
  java.nio.charset.Charset.availableCharsets() or see the source
  code libraries/javalib/gnu/java/nio/charset/Provider.java.

  You can optionally use charsets supported by libiconv, which are
  provided by gnu.java.nio.charset.iconv.IconvProvider.  In order to
  enable gnu.java.nio.charset.iconv.IconvProvider, you have to make
  a configuration file named java.nio.charset.spi.CharsetProvider
  in the resource directory META-INF/services.  The configuration
  file should contain a line which reads

       gnu.java.nio.charset.iconv.IconvProvider

How about Japanese charsets?
----------------------------

  EUC-JP and ISO-2022-JP are supported by libiconv.

  Shift_JIS is supported by libiconv, but you will be annoyed to find
  that '\\' is converted to Yen sign.

  Windows-31J aka MS932, which is commonly used in Japan, is not supported
  by libiconv.

  In order to handle these charsets properly,  you should either

    (1) Modify libiconv.

  or

    (2) Use a wrapper class of gnu.java.nio.charset.iconv package.

  If you are to modify libiconv, make or find a patch yourself.

  If you are to use a wrapper class, compile the source file below
  and put the name of the wrapper class IconvJapaneseCharsetProvider
  in the configuration file.  In this case, you do not have to put
  gnu.java.nio.charset.iconv.IconvProvider in the configuration file
  because IconvJapaneseCharsetProvider loads it internally.

  If you are using not only Japanese charsets but also some other charsets
  supported by libiconv,  you must put IconvJapaneseCharsetProvider
  before gnu.java.nio.charset.iconv.IconvProvider, otherwise the wrapper
  cannot wrap what is to be wrapped.

------------------------------------------------------------------------
import java.nio.charset.spi.CharsetProvider;
import java.nio.charset.*;
import java.nio.CharBuffer;
import java.nio.ByteBuffer;
import java.nio.BufferOverflowException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import gnu.java.nio.charset.iconv.IconvProvider;

public final class IconvJapaneseCharsetProvider extends CharsetProvider
{

  /**
    * List of supported charsets.
    * Key: canonical name in lower case characters
    * Value: CharsetInfo
    */
  private static HashMap charsets;

  /**
    * Alias -> canonical name mapping
    */
  private static HashMap alias2cname;

  static
  {
      charsets = new HashMap();
      charsets.put("euc-jp", new CharsetInfo(
          "EUC-JP", "EUC-JP",
              new String[] {"EUC_JP"}));
      charsets.put("iso-2022-jp", new CharsetInfo(
          "ISO-2022-JP", "ISO-2022-JP",
              new String[] {}));
      charsets.put("shift_jis", new CharsetInfo(
          "Shift_JIS", "Shift_JIS",
              null, new Shift_JISCharset()));
      charsets.put("windows-31j",new CharsetInfo(
          "Windows-31J", "CP932",
              null, new MS932Charset()));
      alias2cname = new HashMap();
      
      for (Iterator it = charsets.keySet().iterator(); it.hasNext();)
        {
          String key = (String)(it.next());
          CharsetInfo info = (CharsetInfo)(charsets.get(key));
          String[] als = (String[])(info.aliases);
          for (int i = 0; i < als.length; i++)
            {
              alias2cname.put(als[i].toLowerCase(), key);
            }
        }
  }

  public IconvJapaneseCharsetProvider ()
  {
  }

  public Iterator charsets ()
  {
      HashMap map = new HashMap();
      for (Iterator i = charsets.keySet().iterator (); i.hasNext (); )
        {
          String name = (String)(i.next ());
          map.put(name, charsetForName(name));
        }
      return Collections.unmodifiableCollection (map.values ())
                      .iterator ();
  }

  public Charset charsetForName (String charsetName)
  {
      String keyName = charsetName.toLowerCase();
      String cName = (String)(alias2cname.get(keyName));
      if (cName != null)
        {
          keyName = cName;
        }
      CharsetInfo info = (CharsetInfo)(charsets.get(keyName));
      if (info == null)
        {
          return null;
        }
      Charset iconvCharset = IconvProvider.provider().
          charsetForName((String)(info.iconvName));

      if (info.wrapper != null)
        {
          info.wrapper.setIconvCharset(iconvCharset);  
          return info.wrapper;
        }
      else
        {
          return iconvCharset;
        }
  }

  private static class CharsetInfo
  {
        public String canonicalName;
        public String iconvName;
        public String[] aliases;
        public IconvCharsetWrapper wrapper;

        public CharsetInfo(String canonicalName, String iconvName,
            String[] aliases)
          {
            this.canonicalName = canonicalName;
            this.iconvName = iconvName;
            this.aliases = aliases;
            this.wrapper = null;
          }

        public CharsetInfo(String canonicalName, String iconvName,
            String[] aliases, IconvCharsetWrapper wrapper)
          {
            this.canonicalName = canonicalName;
            this.iconvName = iconvName;
            this.wrapper = wrapper;
            if (aliases == null)
              {
                Set aliasesSet = wrapper.aliases();
                this.aliases = new String[aliasesSet.size()];
                int i = 0;
                for (Iterator it = aliasesSet.iterator(); it.hasNext(); )
                  {
                    this.aliases[i++] = (String)(it.next());
                  }
              }
            else
              {
                this.aliases = aliases;
              }
          }
  }

  private abstract static class IconvCharsetWrapper extends Charset
  {
      protected Charset iconvCharset;

      protected IconvCharsetWrapper(String name, String[] aliases)
        {
          super(name, aliases);
        }

      public void setIconvCharset(Charset iconvCharset)
        {
          this.iconvCharset = iconvCharset;
        }

      public boolean contains(Charset cs)
        {
          return false;
        }

  }

  public static class Shift_JISCharset extends IconvCharsetWrapper
  {

      public Shift_JISCharset()
        {
          super("Shift_JIS", new String[] {"SJIS", "Shift-JIS"});
        }

      public CharsetDecoder newDecoder()
        {
          return new Shift_JISDecoder(this, iconvCharset.newDecoder());
        }

      public CharsetEncoder newEncoder()
        {
          return new Shift_JISEncoder(this, iconvCharset.newEncoder());
        }

  }

  public static class Shift_JISDecoder extends CharsetDecoder
  {
      private CharsetDecoder iconvDecoder;
      public Shift_JISDecoder(Charset cs, CharsetDecoder iconvDecoder)
        {
          super(cs, iconvDecoder.averageCharsPerByte(),
            iconvDecoder.maxCharsPerByte());
          this.iconvDecoder = iconvDecoder;
        }

      protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out)
        {
          int p0 = out.position();
          CoderResult result = iconvDecoder.decode(in, out, false);
          if (! result.isError())
            {
              int p = out.position();
              for (int i = p0; i < p; i++)
                {
                  char c = out.get(i);
                  if (c == (char)0x00a5)
                    out.put(i, (char)'\\');
                  else if (c == (char)0x203e)
                    out.put(i, (char)'~');
                }
            }
          return result;
        }

  }

  public static class Shift_JISEncoder extends CharsetEncoder
  {
      private CharsetEncoder iconvEncoder;
      public Shift_JISEncoder(Charset cs, CharsetEncoder iconvEncoder)
        {
          super(cs, iconvEncoder.averageBytesPerChar(),
            iconvEncoder.maxBytesPerChar());
          this.iconvEncoder = iconvEncoder;
        }

      protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out)
        {
          CoderResult result = iconvEncoder.encode(in, out, false);
          if (result.isError())
            {
              int l = result.length();
              int p = in.position();
              for (int i = 0; i < l; i++)
                {
                  char c = in.get(p);
                  if (c == '\\' || c == '~')
                    {
                      try
                        {
                          out.put((byte)c);
                          p++;
                          in.position(p);
                        }
                      catch (BufferOverflowException _)
                        {
                          return CoderResult.OVERFLOW;
                        }
                    }
                  else
                    {
                      return CoderResult.unmappableForLength(1);
                    }
                }
              return this.encodeLoop(in, out);
            }
         else
           {
              return result;
           }
        }
  }

  // Windows-31J
  public static class MS932Charset extends IconvCharsetWrapper
  {

      public MS932Charset()
        {
          super("Windows-31J", new String[] {"MS932"});
        }

      public CharsetDecoder newDecoder()
        {
          return new MS932Decoder(this, iconvCharset.newDecoder());
        }

      public CharsetEncoder newEncoder()
        {
          return new MS932Encoder(this, iconvCharset.newEncoder());
        }

  }

  public static class MS932Decoder extends CharsetDecoder
  {
      private CharsetDecoder iconvDecoder;
      public MS932Decoder(Charset cs, CharsetDecoder iconvDecoder)
        {
          super(cs, iconvDecoder.averageCharsPerByte(),
            iconvDecoder.maxCharsPerByte());
          this.iconvDecoder = iconvDecoder;
        }

      protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out)
        {
          int p0 = out.position();
          CoderResult result = iconvDecoder.decode(in, out, false);
          if (! result.isError())
            {
              int p = out.position();
              for (int i = p0; i < p; i++)
                {
                  char c = out.get(i);
                  if (c == (char)0x301c)           // WAVE DASH
                    out.put(i, (char)0xff5e);
                  else if (c == (char)0x2016)      // DOUBLE VERTICAL LINE
                    out.put(i, (char)0x2225);
                  else if (c == (char)0x2212)      // MINUS SIGN
                    out.put(i, (char)0xff0d);
                  else if (c == (char)0x00a2)      // CENT SIGN
                    out.put(i, (char)0xffe0);
                  else if (c == (char)0x00a3)      // POND SIGN
                    out.put(i, (char)0xffe1);
                  else if (c == (char)0x00ac)      // NOT SIGN
                    out.put(i, (char)0xffe2);
                }
            }
          return result;
        }

  }

  public static class MS932Encoder extends CharsetEncoder
  {
      private CharsetEncoder iconvEncoder;
      public MS932Encoder(Charset cs, CharsetEncoder iconvEncoder)
        {
          super(cs, iconvEncoder.averageBytesPerChar(),
            iconvEncoder.maxBytesPerChar());
          this.iconvEncoder = iconvEncoder;
        }

      protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out)
        {
          CharBuffer tmp = CharBuffer.allocate(in.remaining());
          for (int i = in.position(), j = 0; i < in.limit(); i++, j++)
            {
              char c = in.get(i);
              if (c == '\u2212') c = '\ufffd';          // MINUS SIGN
              else if (c == '\u2225') c = '\u2016';     // DOUBLE VERTICAL LINE
              else if (c == '\u301c') c = '\ufffd';     // WAVE DASH
              else if (c == '\uffe2') c = '\u00ac';     // NOT SIGN
              else if (c == '\uff0d') c = '\u2212';     // MINUS SIGN
              else if (c == '\uff5e') c = '\u301c';     // WAVE DASH
              else if (c == '\uffe0') c = '\u00a2';     // CENT SIGN
              else if (c == '\uffe1') c = '\u00a3';     // POUND SIGN
              else if (c == '\uffe2') c = '\u00ac';     // NOT SIGN
              tmp.put(j, c);
            }
          CoderResult result = iconvEncoder.encode(tmp, out, false);
          int p = in.position();
          in.position(p + tmp.position());
          return result;
        }
  }

}