# XML Parsing
# Parsing with XML::Twig
#!/usr/bin/env perl
use strict;
use warnings 'all';
use XML::Twig;
my $twig = XML::Twig->parse( \*DATA );
#we can use the 'root' method to find the root of the XML.
my $root = $twig->root;
#first_child finds the first child element matching a value.
my $title = $root->first_child('title');
#text reads the text of the element.
my $title_text = $title->text;
print "Title is: ", $title_text, "\n";
#The above could be combined:
print $twig ->root->first_child_text('title'), "\n";
## You can use the 'children' method to iterate multiple items:
my $list = $twig->root->first_child('list');
#children can optionally take an element 'tag' - otherwise it just returns all of them.
foreach my $element ( $list->children ) {
#the 'att' method reads an attribute
print "Element with ID: ", $element->att('id') // 'none here', " is ", $element->text,
"\n";
}
#And if we need to do something more complicated, we an use 'xpath'.
#get_xpath or findnodes do the same thing:
#return a list of matches, or if you specify a second numeric argument, just that numbered match.
#xpath syntax is fairly extensive, but in this one - we search:
# anywhere in the tree: //
#nodes called 'item'
#with an id attribute [@id]
#and with that id attribute equal to "1000".
#by specifying '0' we say 'return just the first match'.
print "Item 1000 is: ", $twig->get_xpath( '//item[@id="1000"]', 0 )->text, "\n";
#this combines quite well with `map` to e.g. do the same thing on multiple items
print "All IDs:\n", join ( "\n", map { $_ -> att('id') } $twig -> get_xpath('//item'));
#note how this also finds the item under 'summary', because of //
__DATA__
<?xml version="1.0" encoding="utf-8"?>
<root>
<title>some sample xml</title>
<first key="value" key2="value2">
<second>Some text</second>
</first>
<third>
<fourth key3="value">Text here too</fourth>
</third>
<list>
<item id="1">Item1</item>
<item id="2">Item2</item>
<item id="3">Item3</item>
<item id="66">Item66</item>
<item id="88">Item88</item>
<item id="100">Item100</item>
<item id="1000">Item1000</item>
<notanitem>Not an item at all really.</notanitem>
</list>
<summary>
<item id="no_id">Test</item>
</summary>
</root>
# Consuming XML with XML::Rabbit
With XML::Rabbit
(opens new window) it is possible to consume XML files easily. You define in a declarative way and with an XPath syntax what you are looking for in the XML and XML::Rabbit
will return objects according to the given definition.
Definition:
package Bookstore;
use XML::Rabbit::Root;
has_xpath_object_list books => './book' => 'Bookstore::Book';
finalize_class();
package Bookstore::Book;
use XML::Rabbit;
has_xpath_value bookid => './@id';
has_xpath_value author => './author';
has_xpath_value title => './title';
has_xpath_value genre => './genre';
has_xpath_value price => './price';
has_xpath_value publish_date => './publish_date';
has_xpath_value description => './description';
has_xpath_object purchase_data => './purchase_data' => 'Bookstore::Purchase';
finalize_class();
package Bookstore::Purchase;
use XML::Rabbit;
has_xpath_value price => './price';
has_xpath_value date => './date';
finalize_class();
XML Consumption:
use strict;
use warnings;
use utf8;
package Library;
use feature qw(say);
use Carp;
use autodie;
say "Showing data information";
my $bookstore = Bookstore->new( file => './sample.xml' );
foreach my $book( @{$bookstore->books} ) {
say "ID: " . $book->bookid;
say "Title: " . $book->title;
say "Author: " . $book->author, "\n";
}
Notes:
Please be careful with the following:
foreach my $book( @{$bookstore->books} ) {
say "ID: " . $book->bookid;
say "Title: " . $book->title;
say "Author: " . $book->author;
try {
say "Purchase price: ". $book->purchase_data->price, "\n";
} catch {
say "No purchase price available\n";
}
}
sample.xml
<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
<purchase_data>
<date>2001-12-21</date>
<price>20</price>
</purchase_data>
</book>
</catalog>
# Parsing with XML::LibXML
# This uses the 'sample.xml' given in the XML::Twig example.
# Module requirements (1.70 and above for use of load_xml)
use XML::LibXML '1.70';
# let's be a good perl dev
use strict;
use warnings 'all';
# Create the LibXML Document Object
my $xml = XML::LibXML->new();
# Where we are retrieving the XML from
my $file = 'sample.xml';
# Load the XML from the file
my $dom = XML::LibXML->load_xml(
location => $file
);
# get the docroot
my $root = $dom->getDocumentElement;
# if the document has children
if($root->hasChildNodes) {
# getElementsByLocalName returns a node list of all elements who's
# localname matches 'title', and we want the first occurrence
# (via get_node(1))
my $title = $root->getElementsByLocalName('title');
if(defined $title) {
# Get the first matched node out of the nodeList
my $node = $title->get_node(1);
# Get the text of the target node
my $title_text = $node->textContent;
print "The first node with name 'title' contains: $title_text\n";
}
# The above calls can be combined, but is possibly prone to errors
# (if the getElementsByLocalName() failed to match a node).
#
# my $title_text = $root->getElementsByLocalName('title')->get_node(1)->textContent;
}
# Using Xpath, get the price of the book with id 'bk104'
#
# Set our xpath
my $xpath = q!/catalog/book[@id='bk104']/price!;
# Does that xpath exist?
if($root->exists($xpath)) {
# Pull in the twig
my $match = $root->find($xpath);
if(defined $match) {
# Get the first matched node out of the nodeList
my $node = $match->get_node(1);
# pull in the text of that node
my $match_text = $node->textContent;
print "The price of the book with id bk104 is: $match_text\n";
}
}