Module LibXML::XML::Encoding

  1. ext/libxml/libxml.c

The encoding class exposes the encodings that libxml supports via constants.

LibXML converts all data sources to UTF8 internally before processing them. By default, LibXML determines a data source’s encoding using the algorithm described on its website.

However, you may override a data source’s encoding by using the encoding constants defined in this module.

Example 1:

io = File.open('some_file', 'rb')
parser = XML::Parser.io(io, :encoding => XML::Encoding::ISO_8859_1)
doc = parser.parse

Example 2:

parser = XML::HTMLParser.file("some_file", :encoding => XML::Encoding::ISO_8859_1)
doc = parser.parse

Example 3:

document = XML::Document.new
document.encoding = XML::Encoding::ISO_8859_1
doc << XML::Node.new

Methods

public class

  1. s_to_encoding
  2. encoding_to_s

Constants

ERROR = -1   No char encoding detected.
NONE = 0   No char encoding detected.
UTF_8 = 1   UTF-8
UTF_16LE = 2   UTF-16 little endian.
UTF_16BE = 3   UTF-16 big endian.
UCS_4LE = 4   UCS-4 little endian.
UCS_4BE = 5   UCS-4 big endian.
EBCDIC = 6   EBCDIC uh!
UCS_4_2143 = 7   UCS-4 unusual ordering.
UCS_4_3412 = 8   UCS-4 unusual ordering.
UCS_2 = 9   UCS-2.
ISO_8859_1 = 10   ISO-8859-1 ISO Latin 1.
ISO_8859_2 = 11   ISO-8859-2 ISO Latin 2.
ISO_8859_3 = 12   ISO-8859-3.
ISO_8859_4 = 13   ISO-8859-4.
ISO_8859_5 = 14   ISO-8859-5.
ISO_8859_6 = 15   ISO-8859-6.
ISO_8859_7 = 16   ISO-8859-7.
ISO_8859_8 = 17   ISO-8859-8.
ISO_8859_9 = 18   ISO-8859-9.
ISO_2022_JP = 19   ISO-2022-JP.
SHIFT_JIS = 20   Shift_JIS.
EUC_JP = 21   EUC-JP.
ASCII = 22   pure ASCII.

Public class methods

Input.s_to_encoding("UTF_8") → XML::Encoding::UTF_8

Converts an encoding string to an encoding constant defined on the XML::Encoding class.

[show source]
static VALUE rxml_encoding_from_s(VALUE klass, VALUE encoding)
{
  xmlCharEncoding xencoding;
  
  if (encoding == Qnil)
    return Qnil;

  xencoding = xmlParseCharEncoding(StringValuePtr(encoding));
  return NUM2INT(xencoding);
}
Input.encoding_to_s(Input::ENCODING) → "encoding"

Converts an encoding constant defined on the XML::Encoding class to its text representation.

[show source]
static VALUE rxml_encoding_to_s(VALUE klass, VALUE encoding)
{
  const char* xencoding = xmlGetCharEncodingName(NUM2INT(encoding));

  if (!xencoding)
    return Qnil;
  else
    return rxml_str_new2(xencoding, xencoding);
}