
/* $Id: xml_examp.q,v 1.4 2006/04/25 18:15:51 agraef Exp $
   xml_examp.q: Demonstration of the xml interface. */

import xml;

/* Load a sample xml file. */

def DOC = xml_load_file "sample.xml" 0;
def DOC2 = xml_load_file "recipes.xml" XML_DTDVALID;

/* Our own tree data structure. Each node is encoded with the node constructor
   which takes two arguments: INFO is the xml_node_info of the node, and
   CHILDREN is the list of its child nodes. */

type Node = const node INFO CHILDREN;

/* Translate an XMLDoc or XMLNode to the corresponding tree.
   Example: tree DOC. */

tree DOC:XMLDoc		= tree (xml_root DOC);
tree NODE:XMLNode	= node (xml_node_info NODE)
			  (map tree (xml_children NODE));

/* A variation which constructs a tidy tree, leaving away all blank nodes.
   Example: tidy DOC. */

tidy DOC:XMLDoc		= tidy (xml_root DOC);
tidy NODE:XMLNode	= node (xml_node_info NODE)
			  (map tidy
			   (filter (neg xml_is_blank_node) (xml_children NODE)));

/* Translate a tree back to an XMLDoc. Example: def T = tidy DOC; doc T. */

doc (node INFO CHILDREN)
			= do (add_node (xml_root DOC)) CHILDREN || DOC
			    where DOC:XMLDoc = xml_new_doc "1.0" () INFO;

add_node PARENT:XMLNode (node INFO CHILDREN)
			= do (add_node NODE) CHILDREN
			    where NODE:XMLNode = xml_add_last PARENT INFO;

/* Print a tree, or a list of tree nodes. The contents of each node is printed
   on a single line with proper indentation to indicate the nested structure.

   Example:

   ==> print (tidy DOC)
   story:
     CDATA '<greeting>Hello, world!</greeting>'
     COMMENT 'This is a sample document for testing the xml interface.'
     PI foobar: 'Process me!'
     storyinfo:
       author:
         TEXT 'John Fleck'
       datewritten:
         TEXT 'June 2, 2002'
       keyword:
         TEXT 'example keyword'
     body:
       headline:
         TEXT 'This is the headline'
       para:
         TEXT 'This is the body text.'
   ()

   You can also use this to print the DTD of a document. Example:

   ==> print (tree (xml_int_subset DOC2))

   DOCTYPE list: ()
     ELEMENT list: (recipe*)
     ELEMENT recipe: (author,recipe_name,meal,ingredients,directions)
     ELEMENT ingredients: (item+)
     ELEMENT meal: (#PCDATA|course)*
     ELEMENT item: (#PCDATA|sub_item)*
     COMMENT "These are the remaining elements of the recipe tag "
     ELEMENT recipe_name: (#PCDATA)
     ELEMENT author: (#PCDATA)
     ELEMENT directions: (#PCDATA)
     COMMENT "The remaining element of the meal tag "
     ELEMENT course: (#PCDATA)
     COMMENT "The remaining element of the item tag "
     ELEMENT sub_item: (#PCDATA)
   ()

   */

print X			= indent "" X;

indent SPACE NODES:List	= do (indent SPACE) NODES;

indent SPACE (node (element NAME NS ATTRS) CHILDREN)
			= printf "%s%s%s:%s\n" (SPACE,NAME,attrs ATTRS,attrs NS) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (attr KEY VAL) _)
			= printf "%sATTR %s: %s\n" (SPACE,KEY,str VAL);

indent SPACE (node (text CONTENT) _)
			= printf "%sTEXT %s\n" (SPACE,str CONTENT);

indent SPACE (node (cdata CONTENT) _)
			= printf "%sCDATA %s\n" (SPACE,str CONTENT);

indent SPACE (node (comment CONTENT) _)
			= printf "%sCOMMENT %s\n" (SPACE,str CONTENT);

indent SPACE (node (entity_ref NAME) _)
			= printf "%sREF %s\n" (SPACE,NAME);

indent SPACE (node (pi NAME CONTENT) _)
			= printf "%sPI %s: %s\n" (SPACE,NAME,str CONTENT);


indent SPACE (node (doctype NAME EXTID) CHILDREN)
			= printf "%sDOCTYPE %s: %s\n" (SPACE,NAME,str EXTID) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (undefined_element NAME) CHILDREN)
			= printf "%sELEMENT %s: UNDEFINED\n" (SPACE,NAME) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (empty_element NAME) CHILDREN)
			= printf "%sELEMENT %s: EMPTY\n" (SPACE,NAME) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (any_element NAME) CHILDREN)
			= printf "%sELEMENT %s: ANY\n" (SPACE,NAME) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (mixed_element NAME CONTENT) CHILDREN)
			= printf "%sELEMENT %s: %s\n"
			  (SPACE,NAME,content CONTENT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (std_element NAME CONTENT) CHILDREN)
			= printf "%sELEMENT %s: %s\n"
			  (SPACE,NAME,content CONTENT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (cdata_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s CDATA %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (id_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s ID %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (idref_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s IDREF %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (idrefs_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s IDREFS %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (entity_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s ENTITY %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (entities_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s ENTITIES %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (nmtoken_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s NMTOKEN %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (nmtokens_attr ELEM_NAME NAME DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s NMTOKENS %s\n"
			  (SPACE,ELEM_NAME,NAME,dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (enum_attr ELEM_NAME NAME VALS DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s %s %s\n"
			  (SPACE,ELEM_NAME,NAME,
			   "("++join "|" VALS++")",
			   dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (notation_attr ELEM_NAME NAME VALS DEFAULT) CHILDREN)
			= printf "%sATTR %s: %s NOTATION %s %s\n"
			  (SPACE,ELEM_NAME,NAME,
			   "("++join "|" VALS++")",
			   dflt DEFAULT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (int_entity NAME CONTENT) CHILDREN)
			= printf "%sENTITY %s: %s\n"
			  (SPACE,NAME,str CONTENT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (int_param_entity NAME CONTENT) CHILDREN)
			= printf "%sENTITY %% %s: %s\n"
			  (SPACE,NAME,str CONTENT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (ext_entity NAME EXTID CONTENT) CHILDREN)
			= printf "%sENTITY %s: %s %s\n"
			  (SPACE,NAME,str EXTID,str CONTENT) ||
			  indent (SPACE++"  ") CHILDREN;

indent SPACE (node (ext_param_entity NAME EXTID CONTENT) CHILDREN)
			= printf "%sENTITY %% %s: %s %s\n"
			  (SPACE,NAME,str EXTID,str CONTENT) ||
			  indent (SPACE++"  ") CHILDREN;


indent SPACE (node _ CHILDREN)
			= printf "%sOTHER\n" SPACE ||
			  indent (SPACE++"  ") CHILDREN;

attrs []		= "";
attrs ATTRS		= sprintf " (%s)" (join "," (map attr1 ATTRS));

attr1 ((),VAL)		= str VAL;
attr1 (KEY,VAL)		= sprintf "%s=%s" (KEY,str VAL);

/* This function creates a content declaration in legal XML syntax from an
   XMLElementContent value. */

content X		= wrap (contents X);

wrap S			= S if S!0="(";
			= "("++S++")" otherwise;

contents NAME:String	= NAME;
contents pcdata		= "#PCDATA";
contents (sequence Xs)	= "("++join "," (map contents Xs)++")";
contents (union Xs)	= "("++join "|" (map contents Xs)++")";
contents (opt X)	= contents X++"?";
contents (mult X)	= contents X++"*";
contents (plus X)	= contents X++"+";
contents _		= "???" otherwise;

/* Unparse an attribute default. */

dflt required		= "#REQUIRED";
dflt implied		= "#IMPLIED";
dflt (default VAL)	= str VAL;
dflt (fixed VAL)	= "#FIXED "++str VAL;
