XML Parsing
Parsing with XML::Twig
Section titled “Parsing with XML::Twig”#!/usr/bin/env perl
use strict;use warnings 'all';
use XML::Twig;
my $twig = XML::Twig->parse( \*DATA );
#we can use the 'root' method to find the root of the XML.my $root = $twig->root;
#first_child finds the first child element matching a value.my $title = $root->first_child('title');
#text reads the text of the element.my $title_text = $title->text;
print "Title is: ", $title_text, "\n";
#The above could be combined:print $twig ->root->first_child_text('title'), "\n";
## You can use the 'children' method to iterate multiple items:my $list = $twig->root->first_child('list');
#children can optionally take an element 'tag' - otherwise it just returns all of them.foreach my $element ( $list->children ) {
#the 'att' method reads an attribute print "Element with ID: ", $element->att('id') // 'none here', " is ", $element->text, "\n";}
#And if we need to do something more complicated, we an use 'xpath'.#get_xpath or findnodes do the same thing:#return a list of matches, or if you specify a second numeric argument, just that numbered match.
#xpath syntax is fairly extensive, but in this one - we search:# anywhere in the tree: //#nodes called 'item'#with an id attribute [@id]#and with that id attribute equal to "1000".#by specifying '0' we say 'return just the first match'.
print "Item 1000 is: ", $twig->get_xpath( '//item[@id="1000"]', 0 )->text, "\n";
#this combines quite well with `map` to e.g. do the same thing on multiple itemsprint "All IDs:\n", join ( "\n", map { $_ -> att('id') } $twig -> get_xpath('//item'));#note how this also finds the item under 'summary', because of //
__DATA__<?xml version="1.0" encoding="utf-8"?><root> <title>some sample xml</title> <first key="value" key2="value2"> <second>Some text</second> </first> <third> <fourth key3="value">Text here too</fourth> </third> <list> <item id="1">Item1</item> <item id="2">Item2</item> <item id="3">Item3</item> <item id="66">Item66</item> <item id="88">Item88</item> <item id="100">Item100</item> <item id="1000">Item1000</item> <notanitem>Not an item at all really.</notanitem> </list> <summary> <item id="no_id">Test</item> </summary></root>Consuming XML with XML::Rabbit
Section titled “Consuming XML with XML::Rabbit”With XML::Rabbit it is possible to consume XML files easily. You define in a declarative way and with an XPath syntax what you are looking for in the XML and XML::Rabbit will return objects according to the given definition.
Definition:
package Bookstore;use XML::Rabbit::Root;has_xpath_object_list books => './book' => 'Bookstore::Book';finalize_class();
package Bookstore::Book;use XML::Rabbit;has_xpath_value bookid => './@id';has_xpath_value author => './author';has_xpath_value title => './title';has_xpath_value genre => './genre';has_xpath_value price => './price';has_xpath_value publish_date => './publish_date';has_xpath_value description => './description';has_xpath_object purchase_data => './purchase_data' => 'Bookstore::Purchase';finalize_class();
package Bookstore::Purchase;use XML::Rabbit;has_xpath_value price => './price';has_xpath_value date => './date';finalize_class();XML Consumption:
use strict;use warnings;use utf8;
package Library;use feature qw(say);use Carp;use autodie;
say "Showing data information";my $bookstore = Bookstore->new( file => './sample.xml' );
foreach my $book( @{$bookstore->books} ) { say "ID: " . $book->bookid; say "Title: " . $book->title; say "Author: " . $book->author, "\n";}Notes:
Please be careful with the following:
foreach my $book( @{$bookstore->books} ) { say "ID: " . $book->bookid; say "Title: " . $book->title; say "Author: " . $book->author; try { say "Purchase price: ". $book->purchase_data->price, "\n"; } catch { say "No purchase price available\n"; }}sample.xml
<?xml version="1.0"?><catalog> <book id="bk101"> <author>Gambardella, Matthew</author> <title>XML Developer's Guide</title> <genre>Computer</genre> <price>44.95</price> <publish_date>2000-10-01</publish_date> <description>An in-depth look at creating applications with XML.</description> </book> <book id="bk102"> <author>Ralls, Kim</author> <title>Midnight Rain</title> <genre>Fantasy</genre> <price>5.95</price> <publish_date>2000-12-16</publish_date> <description>A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.</description> </book> <book id="bk103"> <author>Corets, Eva</author> <title>Maeve Ascendant</title> <genre>Fantasy</genre> <price>5.95</price> <publish_date>2000-11-17</publish_date> <description>After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society.</description> </book> <book id="bk104"> <author>Corets, Eva</author> <title>Oberon's Legacy</title> <genre>Fantasy</genre> <price>5.95</price> <publish_date>2001-03-10</publish_date> <description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description> <purchase_data> <date>2001-12-21</date> <price>20</price> </purchase_data> </book></catalog>Parsing with XML::LibXML
Section titled “Parsing with XML::LibXML”# This uses the 'sample.xml' given in the XML::Twig example.
# Module requirements (1.70 and above for use of load_xml)use XML::LibXML '1.70';
# let's be a good perl devuse strict;use warnings 'all';
# Create the LibXML Document Objectmy $xml = XML::LibXML->new();
# Where we are retrieving the XML frommy $file = 'sample.xml';
# Load the XML from the filemy $dom = XML::LibXML->load_xml( location => $file);
# get the docrootmy $root = $dom->getDocumentElement;
# if the document has childrenif($root->hasChildNodes) {
# getElementsByLocalName returns a node list of all elements who's # localname matches 'title', and we want the first occurrence # (via get_node(1)) my $title = $root->getElementsByLocalName('title');
if(defined $title) { # Get the first matched node out of the nodeList my $node = $title->get_node(1);
# Get the text of the target node my $title_text = $node->textContent;
print "The first node with name 'title' contains: $title_text\n"; }
# The above calls can be combined, but is possibly prone to errors # (if the getElementsByLocalName() failed to match a node). # # my $title_text = $root->getElementsByLocalName('title')->get_node(1)->textContent;}
# Using Xpath, get the price of the book with id 'bk104'#
# Set our xpathmy $xpath = q!/catalog/book[@id='bk104']/price!;
# Does that xpath exist?if($root->exists($xpath)) {
# Pull in the twig my $match = $root->find($xpath);
if(defined $match) { # Get the first matched node out of the nodeList my $node = $match->get_node(1);
# pull in the text of that node my $match_text = $node->textContent;
print "The price of the book with id bk104 is: $match_text\n"; }}