The XML::Document class provides a tree based API for working with xml documents. You may directly create a document and manipulate it, or create a document from a data source by using an XML::Parser object.
To read a document from a file:
doc = XML::Document.file('my_file')
To use a parser to read a document:
parser = XML::Parser.file('my_file')
doc = parser.parse
To create a document from scratch:
doc = XML::Document.new()
doc.root = XML::Node.new('root_node')
doc.root << XML::Node.new('elem1')
doc.save(filename, :indent => true, :encoding => 'UTF-8')
To write a document to a file:
doc = XML::Document.new()
doc.root = XML::Node.new('root_node')
root = doc.root
root << elem1 = XML::Node.new('elem1')
elem1['attr1'] = 'val1'
elem1['attr2'] = 'val2'
root << elem2 = XML::Node.new('elem2')
elem2['attr1'] = 'val1'
elem2['attr2'] = 'val2'
root << elem3 = XML::Node.new('elem3')
elem3 << elem4 = XML::Node.new('elem4')
elem3 << elem5 = XML::Node.new('elem5')
elem5 << elem6 = XML::Node.new('elem6')
elem6 << 'Content for element 6'
elem3['attr'] = 'baz'
doc.save(filename, :indent => true, :encoding => 'UTF-8')
Methods
public class
- document
-
file
:options -
io
:options
:base_uri="http://libxml.org") - initialize
-
string
:options
:base_uri="http://libxml.org")
public instance
Public class methods
Creates a new document based on the specified document.
Parameters:
document - A preparsed document.
# File lib/libxml/document.rb, line 12 12: def self.document(value) 13: Parser.document(value).parse 14: end
XML::Document.file(path, :encoding => XML::Encoding::UTF_8,
:options => XML::Parser::Options::NOENT) → XML::Document
Creates a new document from the specified file or uri.
You may provide an optional hash table to control how the parsing is performed. Valid options are:
encoding - The document encoding, defaults to nil. Valid values
are the encoding constants defined on XML::Encoding.
options - Parser options. Valid values are the constants defined on
XML::Parser::Options. Mutliple options can be combined
by using Bitwise OR (|).
# File lib/libxml/document.rb, line 31 31: def self.file(value, options = {}) 32: Parser.file(value, options).parse 33: end
XML::Document.io(io, :encoding => XML::Encoding::UTF_8,
:options => XML::Parser::Options::NOENT
:base_uri="http://libxml.org") → XML::Document
Creates a new document from the specified io object.
Parameters:
io - io object that contains the xml to parser
base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
are the encoding constants defined on XML::Encoding.
options - Parser options. Valid values are the constants defined on
XML::Parser::Options. Mutliple options can be combined
by using Bitwise OR (|).
# File lib/libxml/document.rb, line 52 52: def self.io(value, options = {}) 53: Parser.io(value, options).parse 54: end
Initializes a new XML::Document, optionally specifying the XML version.
static VALUE rxml_document_initialize(int argc, VALUE *argv, VALUE self)
{
xmlDocPtr xdoc;
VALUE xmlver;
switch (argc)
{
case 0:
xmlver = rb_str_new2("1.0");
break;
case 1:
rb_scan_args(argc, argv, "01", &xmlver);
break;
default:
rb_raise(rb_eArgError, "wrong number of arguments (need 0 or 1)");
}
Check_Type(xmlver, T_STRING);
xdoc = xmlNewDoc((xmlChar*) StringValuePtr(xmlver));
xdoc->_private = (void*) self;
DATA_PTR(self) = xdoc;
return self;
}
XML::Document.string(string, :encoding => XML::Encoding::UTF_8,
:options => XML::Parser::Options::NOENT
:base_uri="http://libxml.org") → XML::Document
Creates a new document from the specified string.
You may provide an optional hash table to control how the parsing is performed. Valid options are:
base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
are the encoding constants defined on XML::Encoding.
options - Parser options. Valid values are the constants defined on
XML::Parser::Options. Mutliple options can be combined
by using Bitwise OR (|).
# File lib/libxml/document.rb, line 73 73: def self.string(value, options = {}) 74: Parser.string(value, options).parse 75: end
Public instance methods
Get this document’s child node.
static VALUE rxml_document_child_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->children == NULL)
return (Qnil);
return rxml_node_wrap(xdoc->children);
}
Determine whether this document has a child node.
static VALUE rxml_document_child_q(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->children == NULL)
return (Qfalse);
else
return (Qtrue);
}
Obtain this document’s compression mode identifier.
static VALUE rxml_document_compression_get(VALUE self)
{
#ifdef HAVE_ZLIB_H
xmlDocPtr xdoc;
int compmode;
Data_Get_Struct(self, xmlDoc, xdoc);
compmode = xmlGetDocCompressMode(xdoc);
if (compmode == -1)
return(Qnil);
else
return(INT2NUM(compmode));
#else
rb_warn("libxml not compiled with zlib support");
return (Qfalse);
#endif
}
Set this document’s compression mode.
static VALUE rxml_document_compression_set(VALUE self, VALUE num)
{
#ifdef HAVE_ZLIB_H
xmlDocPtr xdoc;
int compmode;
Check_Type(num, T_FIXNUM);
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc == NULL)
{
return(Qnil);
}
else
{
xmlSetDocCompressMode(xdoc, NUM2INT(num));
compmode = xmlGetDocCompressMode(xdoc);
if (compmode == -1)
return(Qnil);
else
return(INT2NUM(compmode));
}
#else
rb_warn("libxml compiled without zlib support");
return (Qfalse);
#endif
}
Determine whether this document is compressed.
static VALUE rxml_document_compression_q(VALUE self)
{
#ifdef HAVE_ZLIB_H
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->compression != -1)
return(Qtrue);
else
return(Qfalse);
#else
rb_warn("libxml compiled without zlib support");
return (Qfalse);
#endif
}
Returns a new XML::XPathContext for the document.
Namespaces is an optional array of XML::NS objects
# File lib/libxml/document.rb, line 83 83: def context(nslist = nil) 84: context = XPath::Context.new(self) 85: context.node = self.root 86: context.register_namespaces_from_node(self.root) 87: context.register_namespaces(nslist) if nslist 88: context 89: end
Print libxml debugging information to stdout. Requires that libxml was compiled with debugging enabled.
static VALUE rxml_document_debug(VALUE self)
{
#ifdef LIBXML_DEBUG_ENABLED
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
xmlDebugDumpDocument(NULL, xdoc);
return Qtrue;
#else
rb_warn("libxml was compiled without debugging support.")
return Qfalse;
#endif
}
Obtain the encoding specified by this document.
static VALUE rxml_document_encoding_get(VALUE self)
{
xmlDocPtr xdoc;
const char *xencoding;
Data_Get_Struct(self, xmlDoc, xdoc);
xencoding = (const char*)xdoc->encoding;
return INT2NUM(xmlParseCharEncoding(xencoding));
}
Set the encoding for this document.
static VALUE rxml_document_encoding_set(VALUE self, VALUE encoding)
{
xmlDocPtr xdoc;
const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding));
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->encoding != NULL)
xmlFree((xmlChar *) xdoc->encoding);
xdoc->encoding = xmlStrdup((xmlChar *)xencoding);
return self;
}
Return the nodes matching the specified xpath expression, optionally using the specified namespace. For more information about working with namespaces, please refer to the XML::XPath documentation.
Parameters:
- xpath - The xpath expression as a string
- namespaces - An optional list of namespaces (see XML::XPath for information).
- Returns - XML::XPath::Object
document.find('/foo', 'xlink:http://www.w3.org/1999/xlink')
IMPORTANT - The returned XML::Node::Set must be freed before its associated document. In a running Ruby program this will happen automatically via Ruby’s mark and sweep garbage collector. However, if the program exits, Ruby does not guarantee the order in which objects are freed (see blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/17700). As a result, the associated document may be freed before the node list, which will cause a segmentation fault. To avoid this, use the following (non-ruby like) coding style:
nodes = doc.find('/header')
nodes.each do |node|
... do stuff ...
end
# nodes = nil # GC.start
# File lib/libxml/document.rb, line 118 118: def find(xpath, nslist = nil) 119: self.context(nslist).find(xpath) 120: end
Return the first node matching the specified xpath expression. For more information, please refer to the documentation for XML::Document#find.
# File lib/libxml/document.rb, line 125 125: def find_first(xpath, nslist = nil) 126: find(xpath, nslist).first 127: end
Creates a copy of the node that can be inserted into the current document.
static VALUE rxml_document_import(VALUE self, VALUE node)
{
xmlDocPtr xdoc;
xmlNodePtr xnode, xresult;
Data_Get_Struct(self, xmlDoc, xdoc);
Data_Get_Struct(node, xmlNode, xnode);
xresult = xmlDocCopyNode(xnode, xdoc, 1);
if (xresult == NULL)
rxml_raise(&xmlLastError);
return rxml_node_wrap(xresult);
}
Obtain the last node.
static VALUE rxml_document_last_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->last == NULL)
return (Qnil);
return rxml_node_wrap(xdoc->last);
}
Determine whether there is a last node.
static VALUE rxml_document_last_q(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->last == NULL)
return (Qfalse);
else
return (Qtrue);
}
Obtain the next node.
static VALUE rxml_document_next_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->next == NULL)
return (Qnil);
return rxml_node_wrap(xdoc->next);
}
Determine whether there is a next node.
static VALUE rxml_document_next_q(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->next == NULL)
return (Qfalse);
else
return (Qtrue);
}
Obtain this node’s type identifier.
static VALUE rxml_document_node_type(VALUE self)
{
xmlNodePtr xnode;
Data_Get_Struct(self, xmlNode, xnode);
return (INT2NUM(xnode->type));
}
Returns this node’s type name
# File lib/libxml/document.rb, line 130 130: def node_type_name 131: case node_type 132: when XML::Node::DOCUMENT_NODE 133: 'document_xml' 134: when XML::Node::DOCB_DOCUMENT_NODE 135: 'document_docbook' 136: when XML::Node::HTML_DOCUMENT_NODE 137: 'document_html' 138: else 139: raise(UnknownType, "Unknown node type: %n", node.node_type); 140: end 141: end
Call this routine to speed up XPath computation on static documents. This stamps all the element nodes with the document order.
static VALUE rxml_document_order_elements(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
return LONG2FIX(xmlXPathOrderDocElems(xdoc));
}
Obtain the parent node.
static VALUE rxml_document_parent_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->parent == NULL)
return (Qnil);
return rxml_node_wrap(xdoc->parent);
}
Determine whether there is a parent node.
static VALUE rxml_document_parent_q(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->parent == NULL)
return (Qfalse);
else
return (Qtrue);
}
Obtain the previous node.
static VALUE rxml_document_prev_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->prev == NULL)
return (Qnil);
return rxml_node_wrap(xdoc->prev);
}
Determine whether there is a previous node.
static VALUE rxml_document_prev_q(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->prev == NULL)
return (Qfalse);
else
return (Qtrue);
}
Obtain the root node.
static VALUE rxml_document_root_get(VALUE self)
{
xmlDocPtr xdoc;
xmlNodePtr root;
Data_Get_Struct(self, xmlDoc, xdoc);
root = xmlDocGetRootElement(xdoc);
if (root == NULL)
return (Qnil);
return rxml_node_wrap(root);
}
Set the root node.
static VALUE rxml_document_root_set(VALUE self, VALUE node)
{
xmlDocPtr xdoc;
xmlNodePtr xroot, xnode;
if (rb_obj_is_kind_of(node, cXMLNode) == Qfalse)
rb_raise(rb_eTypeError, "must pass an XML::Node type object");
Data_Get_Struct(self, xmlDoc, xdoc);
Data_Get_Struct(node, xmlNode, xnode);
xroot = xmlDocSetRootElement(xdoc, xnode);
return node;
}
document.save(filename, :indent => true, :encoding => 'UTF-8') → int
Saves a document to a file. You may provide an optional hash table to control how the string is generated. Valid options are:
:indent - Specifies if the string should be indented. The default value is true. Note that indentation is only added if both :indent is true and XML.indent_tree_output is true. If :indent is set to false, then both indentation and line feeds are removed from the result.
:encoding - Specifies the output encoding of the string. It defaults to the original encoding of the document (see encoding. To override the orginal encoding, use one of the XML::Encoding encoding constants.
static VALUE rxml_document_save(int argc, VALUE *argv, VALUE self)
{
VALUE options = Qnil;
VALUE filename = Qnil;
xmlDocPtr xdoc;
int indent = 1;
const char *xfilename;
const char *xencoding;
int length;
rb_scan_args(argc, argv, "11", &filename, &options);
Check_Type(filename, T_STRING);
xfilename = StringValuePtr(filename);
Data_Get_Struct(self, xmlDoc, xdoc);
xencoding = xdoc->encoding;
if (!NIL_P(options))
{
VALUE rencoding, rindent;
Check_Type(options, T_HASH);
rencoding = rb_hash_aref(options, ID2SYM(rb_intern("encoding")));
rindent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));
if (rindent == Qfalse)
indent = 0;
if (rencoding != Qnil)
{
xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(rencoding));
if (!xencoding)
rb_raise(rb_eArgError, "Unknown encoding value: %d", NUM2INT(rencoding));
}
}
length = xmlSaveFormatFileEnc(xfilename, xdoc, xencoding, indent);
if (length == -1)
rxml_raise(&xmlLastError);
return (INT2NUM(length));
}
Determine whether this is a standalone document.
static VALUE rxml_document_standalone_q(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->standalone)
return (Qtrue);
else
return (Qfalse);
}
document.to_s(:indent => true, :encoding => 'UTF-8') → "string"
Converts a document, and all of its children, to a string representation. You may provide an optional hash table to control how the string is generated. Valid options are:
:indent - Specifies if the string should be indented. The default value is true. Note that indentation is only added if both :indent is true and XML.indent_tree_output is true. If :indent is set to false, then both indentation and line feeds are removed from the result.
:encoding - Specifies the output encoding of the string. It defaults to XML::Encoding::UTF8. To change it, use one of the XML::Encoding encoding constants.
static VALUE rxml_document_to_s(int argc, VALUE *argv, VALUE self)
{
VALUE result;
VALUE options = Qnil;
xmlDocPtr xdoc;
int indent = 1;
const char *xencoding = "UTF-8";
xmlChar *buffer;
int length;
rb_scan_args(argc, argv, "01", &options);
if (!NIL_P(options))
{
VALUE rencoding, rindent;
Check_Type(options, T_HASH);
rencoding = rb_hash_aref(options, ID2SYM(rb_intern("encoding")));
rindent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));
if (rindent == Qfalse)
indent = 0;
if (rencoding != Qnil)
{
xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(rencoding));
if (!xencoding)
rb_raise(rb_eArgError, "Unknown encoding value: %d", NUM2INT(rencoding));
}
}
Data_Get_Struct(self, xmlDoc, xdoc);
xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, xencoding, indent);
result = rb_str_new((const char*) buffer, length);
xmlFree(buffer);
return result;
}
Obtain this document’s source URL, if any.
static VALUE rxml_document_url_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->URL == NULL)
return (Qnil);
else
return (rb_str_new2((const char*) xdoc->URL));
}
Validate this document against the specified XML::DTD.
static VALUE rxml_document_validate_dtd(VALUE self, VALUE dtd)
{
VALUE error = Qnil;
xmlValidCtxt ctxt;
xmlDocPtr xdoc;
xmlDtdPtr xdtd;
Data_Get_Struct(self, xmlDoc, xdoc);
Data_Get_Struct(dtd, xmlDtd, xdtd);
ctxt.userData = &error;
ctxt.error = (xmlValidityErrorFunc) LibXML_validity_error;
ctxt.warning = (xmlValidityWarningFunc) LibXML_validity_warning;
ctxt.nodeNr = 0;
ctxt.nodeTab = NULL;
ctxt.vstateNr = 0;
ctxt.vstateTab = NULL;
if (xmlValidateDtd(&ctxt, xdoc, xdtd))
{
return (Qtrue);
}
else
{
rxml_raise(&xmlLastError);
return Qfalse;
}
}
Validate this document against the specified XML::RelaxNG.
If a block is provided it is used as an error handler for validaten errors. The block is called with two argument, the message and a flag indication if the message is an error (true) or a warning (false).
static VALUE rxml_document_validate_relaxng(VALUE self, VALUE relaxng)
{
xmlRelaxNGValidCtxtPtr vptr;
xmlDocPtr xdoc;
xmlRelaxNGPtr xrelaxng;
int is_invalid;
Data_Get_Struct(self, xmlDoc, xdoc);
Data_Get_Struct(relaxng, xmlRelaxNG, xrelaxng);
vptr = xmlRelaxNGNewValidCtxt(xrelaxng);
xmlRelaxNGSetValidErrors(vptr,
(xmlRelaxNGValidityErrorFunc) LibXML_validity_error,
(xmlRelaxNGValidityWarningFunc) LibXML_validity_warning, NULL);
is_invalid = xmlRelaxNGValidateDoc(vptr, xdoc);
xmlRelaxNGFreeValidCtxt(vptr);
if (is_invalid)
{
rxml_raise(&xmlLastError);
return Qfalse;
}
else
{
return Qtrue;
}
}
Validate this document against the specified XML::Schema.
If a block is provided it is used as an error handler for validaten errors. The block is called with two argument, the message and a flag indication if the message is an error (true) or a warning (false).
static VALUE rxml_document_validate_schema(VALUE self, VALUE schema)
{
xmlSchemaValidCtxtPtr vptr;
xmlDocPtr xdoc;
xmlSchemaPtr xschema;
int is_invalid;
Data_Get_Struct(self, xmlDoc, xdoc);
Data_Get_Struct(schema, xmlSchema, xschema);
vptr = xmlSchemaNewValidCtxt(xschema);
xmlSchemaSetValidErrors(vptr,
(xmlSchemaValidityErrorFunc) LibXML_validity_error,
(xmlSchemaValidityWarningFunc) LibXML_validity_warning, NULL);
is_invalid = xmlSchemaValidateDoc(vptr, xdoc);
xmlSchemaFreeValidCtxt(vptr);
if (is_invalid)
{
rxml_raise(&xmlLastError);
return Qfalse;
}
else
{
return Qtrue;
}
}
Obtain the XML version specified by this document.
static VALUE rxml_document_version_get(VALUE self)
{
xmlDocPtr xdoc;
Data_Get_Struct(self, xmlDoc, xdoc);
if (xdoc->version == NULL)
return (Qnil);
else
return (rb_str_new2((const char*) xdoc->version));
}
Determine whether this is an XHTML document.
static VALUE rxml_document_xhtml_q(VALUE self)
{
xmlDocPtr xdoc;
xmlDtdPtr xdtd;
Data_Get_Struct(self, xmlDoc, xdoc);
xdtd = xmlGetIntSubset(xdoc);
if (xdtd != NULL && xmlIsXHTML(xdtd->SystemID, xdtd->ExternalID) > 0)
return (Qtrue);
else
return (Qfalse);
}
Process xinclude directives in this document.
static VALUE rxml_document_xinclude(VALUE self)
{
#ifdef LIBXML_XINCLUDE_ENABLED
xmlDocPtr xdoc;
int ret;
Data_Get_Struct(self, xmlDoc, xdoc);
ret = xmlXIncludeProcess(xdoc);
if (ret >= 0)
{
return(INT2NUM(ret));
}
else
{
rxml_raise(&xmlLastError);
return Qnil;
}
#else
rb_warn(
"libxml was compiled without XInclude support. Please recompile libxml and ruby-libxml");
return (Qfalse);
#endif
}