The encoding class exposes the encodings that libxml supports via constants.
LibXML converts all data sources to UTF8 internally before processing them. By default, LibXML determines a data source’s encoding using the algorithm described on its website.
However, you may override a data source’s encoding by using the encoding constants defined in this module.
Example 1:
io = File.open('some_file', 'rb')
parser = XML::Parser.io(io, :encoding => XML::Encoding::ISO_8859_1)
doc = parser.parse
Example 2:
parser = XML::HTMLParser.file("some_file", :encoding => XML::Encoding::ISO_8859_1)
doc = parser.parse
Example 3:
document = XML::Document.new document.encoding = XML::Encoding::ISO_8859_1 doc << XML::Node.new
Constants
| ERROR | = | -1 | No char encoding detected. | |
| NONE | = | 0 | No char encoding detected. | |
| UTF_8 | = | 1 | UTF-8 | |
| UTF_16LE | = | 2 | UTF-16 little endian. | |
| UTF_16BE | = | 3 | UTF-16 big endian. | |
| UCS_4LE | = | 4 | UCS-4 little endian. | |
| UCS_4BE | = | 5 | UCS-4 big endian. | |
| EBCDIC | = | 6 | EBCDIC uh! | |
| UCS_4_2143 | = | 7 | UCS-4 unusual ordering. | |
| UCS_4_3412 | = | 8 | UCS-4 unusual ordering. | |
| UCS_2 | = | 9 | UCS-2. | |
| ISO_8859_1 | = | 10 | ISO-8859-1 ISO Latin 1. | |
| ISO_8859_2 | = | 11 | ISO-8859-2 ISO Latin 2. | |
| ISO_8859_3 | = | 12 | ISO-8859-3. | |
| ISO_8859_4 | = | 13 | ISO-8859-4. | |
| ISO_8859_5 | = | 14 | ISO-8859-5. | |
| ISO_8859_6 | = | 15 | ISO-8859-6. | |
| ISO_8859_7 | = | 16 | ISO-8859-7. | |
| ISO_8859_8 | = | 17 | ISO-8859-8. | |
| ISO_8859_9 | = | 18 | ISO-8859-9. | |
| ISO_2022_JP | = | 19 | ISO-2022-JP. | |
| SHIFT_JIS | = | 20 | Shift_JIS. | |
| EUC_JP | = | 21 | EUC-JP. | |
| ASCII | = | 22 | pure ASCII. |
Public class methods
Converts an encoding string to an encoding constant defined on the XML::Encoding class.
static VALUE rxml_encoding_from_s(VALUE klass, VALUE encoding)
{
xmlCharEncoding xencoding;
if (encoding == Qnil)
return Qnil;
xencoding = xmlParseCharEncoding(StringValuePtr(encoding));
return NUM2INT(xencoding);
}
Converts an encoding constant defined on the XML::Encoding class to its text representation.
static VALUE rxml_encoding_to_s(VALUE klass, VALUE encoding)
{
const char* xencoding = xmlGetCharEncodingName(NUM2INT(encoding));
if (!xencoding)
return Qnil;
else
return rxml_str_new2(xencoding, xencoding);
}