Class LibXML::XML::Document

  1. ext/libxml/libxml.c
  2. lib/libxml/document.rb
  3. show all
Parent: Object

The XML::Document class provides a tree based API for working with xml documents. You may directly create a document and manipulate it, or create a document from a data source by using an XML::Parser object.

To read a document from a file:

doc = XML::Document.file('my_file')

To use a parser to read a document:

parser = XML::Parser.file('my_file')
doc = parser.parse

To create a document from scratch:

doc = XML::Document.new()
doc.root = XML::Node.new('root_node')
doc.root << XML::Node.new('elem1')
doc.save(filename, :indent => true, :encoding => 'UTF-8')

To write a document to a file:

doc = XML::Document.new()
doc.root = XML::Node.new('root_node')
root = doc.root

root << elem1 = XML::Node.new('elem1')
elem1['attr1'] = 'val1'
elem1['attr2'] = 'val2'

root << elem2 = XML::Node.new('elem2')
elem2['attr1'] = 'val1'
elem2['attr2'] = 'val2'

root << elem3 = XML::Node.new('elem3')
elem3 << elem4 = XML::Node.new('elem4')
elem3 << elem5 = XML::Node.new('elem5')

elem5 << elem6 = XML::Node.new('elem6')
elem6 << 'Content for element 6'

elem3['attr'] = 'baz'

doc.save(filename, :indent => true, :encoding => 'UTF-8')

Public class methods

XML::Document.document(document) → XML::Document

Creates a new document based on the specified document.

Parameters:

document - A preparsed document.
[show source]
    # File lib/libxml/document.rb, line 12
12:       def self.document(value)
13:         Parser.document(value).parse
14:       end
XML::Document.file(path) → XML::Document
XML::Document.file(path, :encoding => XML::Encoding::UTF_8,
:options => XML::Parser::Options::NOENT) → XML::Document

Creates a new document from the specified file or uri.

You may provide an optional hash table to control how the parsing is performed. Valid options are:

encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).
[show source]
    # File lib/libxml/document.rb, line 31
31:       def self.file(value, options = {})
32:         Parser.file(value, options).parse
33:       end
XML::Document.io(io) → XML::Document
XML::Document.io(io, :encoding => XML::Encoding::UTF_8,
:options => XML::Parser::Options::NOENT
:base_uri="http://libxml.org") → XML::Document

Creates a new document from the specified io object.

Parameters:

io - io object that contains the xml to parser
base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).
[show source]
    # File lib/libxml/document.rb, line 52
52:       def self.io(value, options = {})
53:         Parser.io(value, options).parse
54:       end
XML::Document.initialize(xml_version = 1.0) → document

Initializes a new XML::Document, optionally specifying the XML version.

[show source]
static VALUE rxml_document_initialize(int argc, VALUE *argv, VALUE self)
{
  xmlDocPtr xdoc;
  VALUE xmlver;

  switch (argc)
  {
  case 0:
    xmlver = rb_str_new2("1.0");
    break;
  case 1:
    rb_scan_args(argc, argv, "01", &xmlver);
    break;
  default:
    rb_raise(rb_eArgError, "wrong number of arguments (need 0 or 1)");
  }

  Check_Type(xmlver, T_STRING);
  xdoc = xmlNewDoc((xmlChar*) StringValuePtr(xmlver));
  xdoc->_private = (void*) self;
  DATA_PTR(self) = xdoc;

  return self;
}
XML::Document.string(string)
XML::Document.string(string, :encoding => XML::Encoding::UTF_8,
:options => XML::Parser::Options::NOENT
:base_uri="http://libxml.org") → XML::Document

Creates a new document from the specified string.

You may provide an optional hash table to control how the parsing is performed. Valid options are:

base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).
[show source]
    # File lib/libxml/document.rb, line 73
73:       def self.string(value, options = {})
74:         Parser.string(value, options).parse
75:       end

Public instance methods

document.child → node

Get this document’s child node.

[show source]
static VALUE rxml_document_child_get(VALUE self)
{
  xmlDocPtr xdoc;
  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->children == NULL)
    return (Qnil);

  return rxml_node_wrap(xdoc->children);
}
document.child? → (true|false)

Determine whether this document has a child node.

[show source]
static VALUE rxml_document_child_q(VALUE self)
{
  xmlDocPtr xdoc;
  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->children == NULL)
    return (Qfalse);
  else
    return (Qtrue);
}
document.compression → num

Obtain this document’s compression mode identifier.

[show source]
static VALUE rxml_document_compression_get(VALUE self)
{
#ifdef HAVE_ZLIB_H
  xmlDocPtr xdoc;

  int compmode;
  Data_Get_Struct(self, xmlDoc, xdoc);

  compmode = xmlGetDocCompressMode(xdoc);
  if (compmode == -1)
  return(Qnil);
  else
  return(INT2NUM(compmode));
#else
  rb_warn("libxml not compiled with zlib support");
  return (Qfalse);
#endif
}
document.compression = num

Set this document’s compression mode.

[show source]
static VALUE rxml_document_compression_set(VALUE self, VALUE num)
{
#ifdef HAVE_ZLIB_H
  xmlDocPtr xdoc;

  int compmode;
  Check_Type(num, T_FIXNUM);
  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc == NULL)
  {
    return(Qnil);
  }
  else
  {
    xmlSetDocCompressMode(xdoc, NUM2INT(num));

    compmode = xmlGetDocCompressMode(xdoc);
    if (compmode == -1)
    return(Qnil);
    else
    return(INT2NUM(compmode));
  }
#else
  rb_warn("libxml compiled without zlib support");
  return (Qfalse);
#endif
}
document.compression? → (true|false)

Determine whether this document is compressed.

[show source]
static VALUE rxml_document_compression_q(VALUE self)
{
#ifdef HAVE_ZLIB_H
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->compression != -1)
  return(Qtrue);
  else
  return(Qfalse);
#else
  rb_warn("libxml compiled without zlib support");
  return (Qfalse);
#endif
}
document.context(namespaces=nil) → XPath::Context

Returns a new XML::XPathContext for the document.

Namespaces is an optional array of XML::NS objects

[show source]
    # File lib/libxml/document.rb, line 83
83:       def context(nslist = nil)
84:         context = XPath::Context.new(self)
85:         context.node = self.root
86:         context.register_namespaces_from_node(self.root)
87:         context.register_namespaces(nslist) if nslist
88:         context
89:       end
node.debug → true|false

Print libxml debugging information to stdout. Requires that libxml was compiled with debugging enabled.

[show source]
static VALUE rxml_document_debug(VALUE self)
{
#ifdef LIBXML_DEBUG_ENABLED
  xmlDocPtr xdoc;
  Data_Get_Struct(self, xmlDoc, xdoc);
  xmlDebugDumpDocument(NULL, xdoc);
  return Qtrue;
#else
  rb_warn("libxml was compiled without debugging support.")
  return Qfalse;
#endif
}
document.encoding → XML::Encoding::UTF_8

Obtain the encoding specified by this document.

[show source]
static VALUE rxml_document_encoding_get(VALUE self)
{
  xmlDocPtr xdoc;
  const char *xencoding;
  Data_Get_Struct(self, xmlDoc, xdoc);

  xencoding = (const char*)xdoc->encoding;
  return INT2NUM(xmlParseCharEncoding(xencoding));
}
document.encoding = XML::Encoding::UTF_8

Set the encoding for this document.

[show source]
static VALUE rxml_document_encoding_set(VALUE self, VALUE encoding)
{
  xmlDocPtr xdoc;
  const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding));

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->encoding != NULL)
    xmlFree((xmlChar *) xdoc->encoding);

  xdoc->encoding = xmlStrdup((xmlChar *)xencoding);
  return self;
}
find (xpath, nslist = nil)

Return the nodes matching the specified xpath expression, optionally using the specified namespace. For more information about working with namespaces, please refer to the XML::XPath documentation.

Parameters:

  • xpath - The xpath expression as a string
  • namespaces - An optional list of namespaces (see XML::XPath for information).
  • Returns - XML::XPath::Object
document.find('/foo', 'xlink:http://www.w3.org/1999/xlink')

IMPORTANT - The returned XML::Node::Set must be freed before its associated document. In a running Ruby program this will happen automatically via Ruby’s mark and sweep garbage collector. However, if the program exits, Ruby does not guarantee the order in which objects are freed (see blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/17700). As a result, the associated document may be freed before the node list, which will cause a segmentation fault. To avoid this, use the following (non-ruby like) coding style:

nodes = doc.find('/header')
nodes.each do |node|
  ... do stuff ...
end

# nodes = nil # GC.start

[show source]
     # File lib/libxml/document.rb, line 118
118:       def find(xpath, nslist = nil)
119:         self.context(nslist).find(xpath)
120:       end
find_first (xpath, nslist = nil)

Return the first node matching the specified xpath expression. For more information, please refer to the documentation for XML::Document#find.

[show source]
     # File lib/libxml/document.rb, line 125
125:       def find_first(xpath, nslist = nil)
126:         find(xpath, nslist).first
127:       end
document.import(node) → XML::Node

Creates a copy of the node that can be inserted into the current document.

[show source]
static VALUE rxml_document_import(VALUE self, VALUE node)
{
  xmlDocPtr xdoc;
  xmlNodePtr xnode, xresult;

  Data_Get_Struct(self, xmlDoc, xdoc);
  Data_Get_Struct(node, xmlNode, xnode);

  xresult = xmlDocCopyNode(xnode, xdoc, 1);

  if (xresult == NULL)
    rxml_raise(&xmlLastError);

  return rxml_node_wrap(xresult);
}
document.last → node

Obtain the last node.

[show source]
static VALUE rxml_document_last_get(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->last == NULL)
    return (Qnil);

  return rxml_node_wrap(xdoc->last);
}
document.last? → (true|false)

Determine whether there is a last node.

[show source]
static VALUE rxml_document_last_q(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->last == NULL)
    return (Qfalse);
  else
    return (Qtrue);
}
document.next → node

Obtain the next node.

[show source]
static VALUE rxml_document_next_get(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->next == NULL)
    return (Qnil);

  return rxml_node_wrap(xdoc->next);
}
document.next? → (true|false)

Determine whether there is a next node.

[show source]
static VALUE rxml_document_next_q(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->next == NULL)
    return (Qfalse);
  else
    return (Qtrue);
}
node.type → num

Obtain this node’s type identifier.

[show source]
static VALUE rxml_document_node_type(VALUE self)
{
  xmlNodePtr xnode;
  Data_Get_Struct(self, xmlNode, xnode);
  return (INT2NUM(xnode->type));
}
node_type_name ()

Returns this node’s type name

[show source]
     # File lib/libxml/document.rb, line 130
130:       def node_type_name
131:         case node_type
132:           when XML::Node::DOCUMENT_NODE
133:             'document_xml'
134:           when XML::Node::DOCB_DOCUMENT_NODE
135:             'document_docbook'
136:           when XML::Node::HTML_DOCUMENT_NODE
137:             'document_html'
138:           else
139:             raise(UnknownType, "Unknown node type: %n", node.node_type);
140:         end
141:       end
document.order_elements!

Call this routine to speed up XPath computation on static documents. This stamps all the element nodes with the document order.

[show source]
static VALUE rxml_document_order_elements(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);
  return LONG2FIX(xmlXPathOrderDocElems(xdoc));
}
document.parent → node

Obtain the parent node.

[show source]
static VALUE rxml_document_parent_get(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->parent == NULL)
    return (Qnil);

  return rxml_node_wrap(xdoc->parent);
}
document.parent? → (true|false)

Determine whether there is a parent node.

[show source]
static VALUE rxml_document_parent_q(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->parent == NULL)
    return (Qfalse);
  else
    return (Qtrue);
}
document.prev → node

Obtain the previous node.

[show source]
static VALUE rxml_document_prev_get(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->prev == NULL)
    return (Qnil);

  return rxml_node_wrap(xdoc->prev);
}
document.prev? → (true|false)

Determine whether there is a previous node.

[show source]
static VALUE rxml_document_prev_q(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);

  if (xdoc->prev == NULL)
    return (Qfalse);
  else
    return (Qtrue);
}
document.root → node

Obtain the root node.

[show source]
static VALUE rxml_document_root_get(VALUE self)
{
  xmlDocPtr xdoc;

  xmlNodePtr root;

  Data_Get_Struct(self, xmlDoc, xdoc);
  root = xmlDocGetRootElement(xdoc);

  if (root == NULL)
    return (Qnil);

  return rxml_node_wrap(root);
}
document.root = node

Set the root node.

[show source]
static VALUE rxml_document_root_set(VALUE self, VALUE node)
{
  xmlDocPtr xdoc;
  xmlNodePtr xroot, xnode;

  if (rb_obj_is_kind_of(node, cXMLNode) == Qfalse)
    rb_raise(rb_eTypeError, "must pass an XML::Node type object");

  Data_Get_Struct(self, xmlDoc, xdoc);
  Data_Get_Struct(node, xmlNode, xnode);

  xroot = xmlDocSetRootElement(xdoc, xnode);
  return node;
}
document.save(filename) → int
document.save(filename, :indent => true, :encoding => 'UTF-8') → int

Saves a document to a file. You may provide an optional hash table to control how the string is generated. Valid options are:

:indent - Specifies if the string should be indented. The default value is true. Note that indentation is only added if both :indent is true and XML.indent_tree_output is true. If :indent is set to false, then both indentation and line feeds are removed from the result.

:encoding - Specifies the output encoding of the string. It defaults to the original encoding of the document (see encoding. To override the orginal encoding, use one of the XML::Encoding encoding constants.

[show source]
static VALUE rxml_document_save(int argc, VALUE *argv, VALUE self)
{ 
  VALUE options = Qnil;
  VALUE filename = Qnil;
  xmlDocPtr xdoc;
  int indent = 1;
  const char *xfilename;
  const char *xencoding;
  int length;

  rb_scan_args(argc, argv, "11", &filename, &options);

  Check_Type(filename, T_STRING);
  xfilename = StringValuePtr(filename);

  Data_Get_Struct(self, xmlDoc, xdoc);
  xencoding = xdoc->encoding;

  if (!NIL_P(options))
  {
    VALUE rencoding, rindent;
    Check_Type(options, T_HASH);
    rencoding = rb_hash_aref(options, ID2SYM(rb_intern("encoding")));
    rindent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));

    if (rindent == Qfalse)
      indent = 0;

    if (rencoding != Qnil)
    {
      xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(rencoding));
      if (!xencoding)
        rb_raise(rb_eArgError, "Unknown encoding value: %d", NUM2INT(rencoding));
    }
  }

  length = xmlSaveFormatFileEnc(xfilename, xdoc, xencoding, indent);

  if (length == -1)
    rxml_raise(&xmlLastError);
  
  return (INT2NUM(length));
}
document.standalone? → (true|false)

Determine whether this is a standalone document.

[show source]
static VALUE rxml_document_standalone_q(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);
  if (xdoc->standalone)
    return (Qtrue);
  else
    return (Qfalse);
}
document.to_s → "string"
document.to_s(:indent => true, :encoding => 'UTF-8') → "string"

Converts a document, and all of its children, to a string representation. You may provide an optional hash table to control how the string is generated. Valid options are:

:indent - Specifies if the string should be indented. The default value is true. Note that indentation is only added if both :indent is true and XML.indent_tree_output is true. If :indent is set to false, then both indentation and line feeds are removed from the result.

:encoding - Specifies the output encoding of the string. It defaults to XML::Encoding::UTF8. To change it, use one of the XML::Encoding encoding constants.

[show source]
static VALUE rxml_document_to_s(int argc, VALUE *argv, VALUE self)
{ 
  VALUE result;
  VALUE options = Qnil;
  xmlDocPtr xdoc;
  int indent = 1;
  const char *xencoding = "UTF-8";
  xmlChar *buffer; 
  int length;

  rb_scan_args(argc, argv, "01", &options);

  if (!NIL_P(options))
  {
    VALUE rencoding, rindent;
    Check_Type(options, T_HASH);
    rencoding = rb_hash_aref(options, ID2SYM(rb_intern("encoding")));
    rindent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));

    if (rindent == Qfalse)
      indent = 0;

    if (rencoding != Qnil)
    {
      xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(rencoding));
      if (!xencoding)
        rb_raise(rb_eArgError, "Unknown encoding value: %d", NUM2INT(rencoding));
    }
  }

  Data_Get_Struct(self, xmlDoc, xdoc);
  xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, xencoding, indent);

  result = rb_str_new((const char*) buffer, length);
  xmlFree(buffer);
  return result;
}
document.url → "url"

Obtain this document’s source URL, if any.

[show source]
static VALUE rxml_document_url_get(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);
  if (xdoc->URL == NULL)
    return (Qnil);
  else
    return (rb_str_new2((const char*) xdoc->URL));
}
document.validate(dtd) → (true|false)

Validate this document against the specified XML::DTD.

[show source]
static VALUE rxml_document_validate_dtd(VALUE self, VALUE dtd)
{
  VALUE error = Qnil;
  xmlValidCtxt ctxt;
  xmlDocPtr xdoc;
  xmlDtdPtr xdtd;

  Data_Get_Struct(self, xmlDoc, xdoc);
  Data_Get_Struct(dtd, xmlDtd, xdtd);

  ctxt.userData = &error;
  ctxt.error = (xmlValidityErrorFunc) LibXML_validity_error;
  ctxt.warning = (xmlValidityWarningFunc) LibXML_validity_warning;

  ctxt.nodeNr = 0;
  ctxt.nodeTab = NULL;
  ctxt.vstateNr = 0;
  ctxt.vstateTab = NULL;

  if (xmlValidateDtd(&ctxt, xdoc, xdtd))
  {
    return (Qtrue);
  }
  else
  {
    rxml_raise(&xmlLastError);
    return Qfalse;
  }
}
document.validate_schema(relaxng) → (true|false)

Validate this document against the specified XML::RelaxNG.

If a block is provided it is used as an error handler for validaten errors. The block is called with two argument, the message and a flag indication if the message is an error (true) or a warning (false).

[show source]
static VALUE rxml_document_validate_relaxng(VALUE self, VALUE relaxng)
{
  xmlRelaxNGValidCtxtPtr vptr;
  xmlDocPtr xdoc;
  xmlRelaxNGPtr xrelaxng;
  int is_invalid;

  Data_Get_Struct(self, xmlDoc, xdoc);
  Data_Get_Struct(relaxng, xmlRelaxNG, xrelaxng);

  vptr = xmlRelaxNGNewValidCtxt(xrelaxng);

  xmlRelaxNGSetValidErrors(vptr,
      (xmlRelaxNGValidityErrorFunc) LibXML_validity_error,
      (xmlRelaxNGValidityWarningFunc) LibXML_validity_warning, NULL);

  is_invalid = xmlRelaxNGValidateDoc(vptr, xdoc);
  xmlRelaxNGFreeValidCtxt(vptr);
  if (is_invalid)
  {
    rxml_raise(&xmlLastError);
    return Qfalse;
  }
  else
  {
    return Qtrue;
  }
}
document.validate_schema(schema) → (true|false)

Validate this document against the specified XML::Schema.

If a block is provided it is used as an error handler for validaten errors. The block is called with two argument, the message and a flag indication if the message is an error (true) or a warning (false).

[show source]
static VALUE rxml_document_validate_schema(VALUE self, VALUE schema)
{
  xmlSchemaValidCtxtPtr vptr;
  xmlDocPtr xdoc;
  xmlSchemaPtr xschema;
  int is_invalid;

  Data_Get_Struct(self, xmlDoc, xdoc);
  Data_Get_Struct(schema, xmlSchema, xschema);

  vptr = xmlSchemaNewValidCtxt(xschema);

  xmlSchemaSetValidErrors(vptr,
      (xmlSchemaValidityErrorFunc) LibXML_validity_error,
      (xmlSchemaValidityWarningFunc) LibXML_validity_warning, NULL);

  is_invalid = xmlSchemaValidateDoc(vptr, xdoc);
  xmlSchemaFreeValidCtxt(vptr);
  if (is_invalid)
  {
    rxml_raise(&xmlLastError);
    return Qfalse;
  }
  else
  {
    return Qtrue;
  }
}
document.version → "version"

Obtain the XML version specified by this document.

[show source]
static VALUE rxml_document_version_get(VALUE self)
{
  xmlDocPtr xdoc;

  Data_Get_Struct(self, xmlDoc, xdoc);
  if (xdoc->version == NULL)
    return (Qnil);
  else
    return (rb_str_new2((const char*) xdoc->version));
}
document.xhtml? → (true|false)

Determine whether this is an XHTML document.

[show source]
static VALUE rxml_document_xhtml_q(VALUE self)
{
  xmlDocPtr xdoc;
        xmlDtdPtr xdtd;
  Data_Get_Struct(self, xmlDoc, xdoc);
        xdtd = xmlGetIntSubset(xdoc);
  if (xdtd != NULL && xmlIsXHTML(xdtd->SystemID, xdtd->ExternalID) > 0)
    return (Qtrue);
  else
    return (Qfalse);
}
document.xinclude → num

Process xinclude directives in this document.

[show source]
static VALUE rxml_document_xinclude(VALUE self)
{
#ifdef LIBXML_XINCLUDE_ENABLED
  xmlDocPtr xdoc;

  int ret;

  Data_Get_Struct(self, xmlDoc, xdoc);
  ret = xmlXIncludeProcess(xdoc);
  if (ret >= 0)
  {
    return(INT2NUM(ret));
  }
  else
  {
    rxml_raise(&xmlLastError);
    return Qnil;
  }
#else
  rb_warn(
      "libxml was compiled without XInclude support.  Please recompile libxml and ruby-libxml");
  return (Qfalse);
#endif
}