/*	$NetBSD: prop_intern.c,v 1.2 2025/05/14 03:25:46 thorpej Exp $	*/

/*-
 * Copyright (c) 2006, 2007, 2025 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include <prop/prop_object.h>

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#endif /* !_KERNEL && !_STANDALONE */

/*
 * _prop_intern_skip_whitespace --
 *	Skip and span of whitespace.
 */
const char *
_prop_intern_skip_whitespace(const char *cp)
{
	while (_PROP_ISSPACE(*cp)) {
		cp++;
	}
	return cp;
}

/*
 * _prop_intern_match --
 *	Returns true if the two character streams match.
 */
bool
_prop_intern_match(const char *str1, size_t len1,
		   const char *str2, size_t len2)
{
	return (len1 == len2 && memcmp(str1, str2, len1) == 0);
}

/*
 * _prop_xml_intern_skip_comment --
 *	Skip the body and end tag of an XML comment.
 */
static bool
_prop_xml_intern_skip_comment(struct _prop_object_internalize_context *ctx)
{
	const char *cp = ctx->poic_cp;

	for (cp = ctx->poic_cp; !_PROP_EOF(*cp); cp++) {
		if (cp[0] == '-' &&
		    cp[1] == '-' &&
		    cp[2] == '>') {
			ctx->poic_cp = cp + 3;
			return true;
		}
	}

	return false;		/* ran out of buffer */
}

/*
 * _prop_xml_intern_find_tag --
 *	Find the next tag in an XML stream.  Optionally compare the found
 *	tag to an expected tag name.  State of the context is undefined
 *	if this routine returns false.  Upon success, the context points
 *	to the first octet after the tag.
 */
bool
_prop_xml_intern_find_tag(struct _prop_object_internalize_context *ctx,
    const char *tag, _prop_tag_type_t type)
{
	const char *cp;
	size_t taglen;

	taglen = tag != NULL ? strlen(tag) : 0;

 start_over:
	cp = ctx->poic_cp;

	/*
	 * Find the start of the tag.
	 */
	cp = _prop_intern_skip_whitespace(cp);
	if (*cp != '<') {
		return false;
	}

	ctx->poic_tag_start = cp++;
	if (_PROP_EOF(*cp)) {
		return false;
	}

	if (*cp == '!') {
		if (cp[1] != '-' || cp[2] != '-') {
			return false;
		}
		/*
		 * Comment block -- only allowed if we are allowed to
		 * return a start tag.
		 */
		if (type == _PROP_TAG_TYPE_END) {
			return false;
		}
		ctx->poic_cp = cp + 3;
		if (_prop_xml_intern_skip_comment(ctx) == false) {
			return false;
		}
		goto start_over;
	}

	if (*cp == '/') {
		if (type != _PROP_TAG_TYPE_END &&
		    type != _PROP_TAG_TYPE_EITHER) {
			return false;
		}
		cp++;
		if (_PROP_EOF(*cp)) {
			return false;
		}
		ctx->poic_tag_type = _PROP_TAG_TYPE_END;
	} else {
		if (type != _PROP_TAG_TYPE_START &&
		    type != _PROP_TAG_TYPE_EITHER) {
			return false;
		}
		ctx->poic_tag_type = _PROP_TAG_TYPE_START;
	}

	ctx->poic_tagname = cp;

	while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') {
		if (_PROP_EOF(*cp)) {
			return false;
		}
		cp++;
	}

	ctx->poic_tagname_len = cp - ctx->poic_tagname;

	/* Make sure this is the tag we're looking for. */
	if (tag != NULL &&
	    (taglen != ctx->poic_tagname_len ||
	     memcmp(tag, ctx->poic_tagname, taglen) != 0)) {
		return false;
	}

	/* Check for empty tag. */
	if (*cp == '/') {
		if (ctx->poic_tag_type != _PROP_TAG_TYPE_START) {
			return false;		/* only valid on start tags */
		}
		ctx->poic_is_empty_element = true;
		cp++;
		if (_PROP_EOF(*cp) || *cp != '>') {
			return false;
		}
	} else {
		ctx->poic_is_empty_element = false;
	}

	/* Easy case of no arguments. */
	if (*cp == '>') {
		ctx->poic_tagattr = NULL;
		ctx->poic_tagattr_len = 0;
		ctx->poic_tagattrval = NULL;
		ctx->poic_tagattrval_len = 0;
		ctx->poic_cp = cp + 1;
		return true;
	}

	_PROP_ASSERT(!_PROP_EOF(*cp));
	cp++;
	if (_PROP_EOF(*cp)) {
		return false;
	}

	cp = _prop_intern_skip_whitespace(cp);
	if (_PROP_EOF(*cp)) {
		return false;
	}

	ctx->poic_tagattr = cp;

	while (!_PROP_ISSPACE(*cp) && *cp != '=') {
		if (_PROP_EOF(*cp)) {
			return false;
		}
		cp++;
	}

	ctx->poic_tagattr_len = cp - ctx->poic_tagattr;

	cp++;
	if (*cp != '\"') {
		return false;
	}
	cp++;
	if (_PROP_EOF(*cp)) {
		return false;
	}

	ctx->poic_tagattrval = cp;
	while (*cp != '\"') {
		if (_PROP_EOF(*cp)) {
			return false;
		}
		cp++;
	}
	ctx->poic_tagattrval_len = cp - ctx->poic_tagattrval;

	cp++;
	if (*cp != '>') {
		return false;
	}

	ctx->poic_cp = cp + 1;
	return true;
}

#define	INTERNALIZER(t, f)			\
{	t,	sizeof(t) - 1,		f	}

static const struct _prop_object_internalizer {
	const char			*poi_tag;
	size_t				poi_taglen;
	prop_object_internalizer_t	poi_intern;
} _prop_object_internalizer_table[] = {
	INTERNALIZER("array", _prop_array_internalize),

	INTERNALIZER("true", _prop_bool_internalize),
	INTERNALIZER("false", _prop_bool_internalize),

	INTERNALIZER("data", _prop_data_internalize),

	INTERNALIZER("dict", _prop_dictionary_internalize),

	INTERNALIZER("integer", _prop_number_internalize),

	INTERNALIZER("string", _prop_string_internalize),

	{ 0, 0, NULL }
};

#undef INTERNALIZER

/*
 * _prop_xml_intern_by_tag --
 *	Determine the object type from the tag in the context and
 *	internalize it.
 */
static prop_object_t
_prop_xml_intern_by_tag(struct _prop_object_internalize_context *ctx)
{
	const struct _prop_object_internalizer *poi;
	prop_object_t obj, parent_obj;
	void *data, *iter;
	prop_object_internalizer_continue_t iter_func;
	struct _prop_stack stack;

	_prop_stack_init(&stack);

  match_start:
 	for (poi = _prop_object_internalizer_table;
	     poi->poi_tag != NULL; poi++) {
		if (_prop_intern_match(ctx->poic_tagname,
				       ctx->poic_tagname_len,
				       poi->poi_tag,
				       poi->poi_taglen)) {
			break;
		}
	}
	if (poi == NULL || poi->poi_tag == NULL) {
		while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
			iter_func = (prop_object_internalizer_continue_t)iter;
			(*iter_func)(&stack, &obj, ctx, data, NULL);
		}
		return NULL;
	}

	obj = NULL;
	if (!(*poi->poi_intern)(&stack, &obj, ctx)) {
		goto match_start;
	}

	parent_obj = obj;
	while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
		iter_func = (prop_object_internalizer_continue_t)iter;
		if (!(*iter_func)(&stack, &parent_obj, ctx, data, obj)) {
			goto match_start;
		}
		obj = parent_obj;
	}

	return parent_obj;
}

#define	ADDCHAR(x)							\
	do {								\
		if (target) {						\
			if (tarindex >= targsize) {			\
				return false;				\
			}						\
			target[tarindex] = (x);				\
		}							\
		tarindex++;						\
	} while (/*CONSTCOND*/0)

/*
 * _prop_json_intern_decode_uesc_getu16 --
 *	Get the 16-bit value from a "u-escape" ("\uXXXX").
 */
static unsigned int
_prop_json_intern_decode_uesc_getu16(const char *src, unsigned int idx,
    uint16_t *valp)
{
	unsigned int i;
	uint16_t val;
	unsigned char c;

	if (src[idx] != '\\' || src[idx + 1] != 'u') {
		return 0;
	}

	for (val = 0, i = 2; i < 6; i++) {
		val <<= 4;
		c = src[idx + i];
		if (c >= 'A' && c <= 'F') {
			val |= 10 + (c - 'A');
		} else if (c >= 'a' && c <= 'f') {
			val |= 10 + (c - 'a');
		} else if (c >= '0' && c <= '9') {
			val |= c - '0';
		} else {
			return 0;
		}
	}

	*valp = val;
	return idx + i;
}

#define	HS_FIRST	0xd800
#define	HS_LAST		0xdbff
#define	HS_SHIFT	10
#define	LS_FIRST	0xdc00
#define	LS_LAST		0xdfff

#define	HIGH_SURROGAGE_P(x)	\
	((x) >= HS_FIRST && (x) <= HS_LAST)
#define	LOW_SURROGATE_P(x)	\
	((x) >= LS_FIRST && (x) <= LS_LAST)
#define	SURROGATE_P(x)		\
	(HIGH_SURROGAGE_P(x) || LOW_SURROGATE_P(x))

/*
 * _prop_json_intern_decode_uesc --
 *	Decode a JSON UTF-16 "u-escape" ("\uXXXX").
 */
static int
_prop_json_intern_decode_uesc(const char *src, char *c, unsigned int *cszp)
{
	unsigned int idx = 0;
	uint32_t code;
	uint16_t code16[2] = { 0, 0 };

	idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[0]);
	if (idx == 0) {
		return 0;
	}
	if (! SURROGATE_P(code16[0])) {
		/* Simple case: not a surrogate pair */
		code = code16[0];
	} else if (HIGH_SURROGAGE_P(code16[0])) {
		idx = _prop_json_intern_decode_uesc_getu16(src, idx,
							   &code16[1]);
		if (idx == 0) {
			return 0;
		}
		/* Next code must be the low surrogate. */
		if (! LOW_SURROGATE_P(code16[1])) {
			return 0;
		}
		code = (((uint32_t)code16[0] - HS_FIRST) << HS_SHIFT) +
		        (          code16[1] - LS_FIRST)              +
		       0x10000;
	} else {
		/* Got the low surrogate first; this is an error. */
		return 0;
	}

	/*
	 * Ok, we have the code point.  Now convert it to UTF-8.
	 * First we'll just split into nybbles.
	 */
	uint8_t u = (code >> 20) & 0xf;
	uint8_t v = (code >> 16) & 0xf;
	uint8_t w = (code >> 12) & 0xf;
	uint8_t x = (code >>  8) & 0xf;
	uint8_t y = (code >>  4) & 0xf;
	uint8_t z = (code      ) & 0xf;

	/*
	 * ...and swizzle the nybbles accordingly.
	 *
	 * N.B. we expcitly disallow inserting a NUL into the string
	 * by way of a \uXXXX escape.
	 */
	if (code == 0) {
		/* Not allowed. */
		return 0;
	} else if (/*code >= 0x0000 &&*/ code <= 0x007f) {
		c[0] = (char)code;	/* == (y << 4) | z */
		*cszp = 1;
	} else if (/*code >= 0x0080 &&*/ code <= 0x07ff) {
		c[0] = 0xc0 | (x << 2) | (y >> 2);
		c[1] = 0x80 | ((y & 3) << 4) | z;
		*cszp = 2;
	} else if (/*code >= 0x0800 &&*/ code <= 0xffff) {
		c[0] = 0xe0 | w;
		c[1] = 0x80 | (x << 2) | (y >> 2);
		c[2] = 0x80 | ((y & 3) << 4) | z;
		*cszp = 3;
	} else if (/*code >= 0x010000 &&*/ code <= 0x10ffff) {
		c[0] = 0xf0 | ((u & 1) << 2) | (v >> 2);
		c[1] = 0x80 | ((v & 3) << 4) | w;
		c[2] = 0x80 | (x << 2) | (y >> 2);
		c[3] = 0x80 | ((y & 3) << 4) | z;
		*cszp = 4;
	} else {
		/* Invalid code. */
		return 0;
	}

	return idx;	/* advance input by this much */
}

#undef HS_FIRST
#undef HS_LAST
#undef LS_FIRST
#undef LS_LAST
#undef HIGH_SURROGAGE_P
#undef LOW_SURROGATE_P
#undef SURROGATE_P

/*
 * _prop_json_intern_decode_string --
 *	Decode a JSON-encoded string.
 */
static bool
_prop_json_intern_decode_string(struct _prop_object_internalize_context *ctx,
    char *target, size_t targsize, size_t *sizep,
    const char **cpp)
{
	const char *src;
	size_t tarindex;
	char c[4];
	unsigned int csz;

	tarindex = 0;
	src = ctx->poic_cp;

	for (;;) {
		if (_PROP_EOF(*src)) {
			return false;
		}
		if (*src == '"') {
			break;
		}

		csz = 1;
		if ((c[0] = *src) == '\\') {
			int advance = 2;

			switch ((c[0] = src[1])) {
			case '"':		/* quotation mark */
			case '\\':		/* reverse solidus */
			case '/':		/* solidus */
				/* identity mapping */
				break;

			case 'b':		/* backspace */
				c[0] = 0x08;
				break;

			case 'f':		/* form feed */
				c[0] = 0x0c;
				break;

			case 'n':		/* line feed */
				c[0] = 0x0a;
				break;

			case 'r':		/* carriage return */
				c[0] = 0x0d;
				break;

			case 't':		/* tab */
				c[0] = 0x09;
				break;

			case 'u':
				advance = _prop_json_intern_decode_uesc(
				    src, c, &csz);
				if (advance == 0) {
					return false;
				}
				break;

			default:
				/* invalid escape */
				return false;
			}
			src += advance;
		} else {
			src++;
		}
		for (unsigned int i = 0; i < csz; i++) {
			ADDCHAR(c[i]);
		}
	}

	_PROP_ASSERT(*src == '"');
	if (sizep != NULL) {
		*sizep = tarindex;
	}
	if (cpp != NULL) {
		*cpp = src;
	}

	return true;
}

/*
 * _prop_xml_intern_decode_string --
 *	Decode an XML-encoded string.
 */
static bool
_prop_xml_intern_decode_string(struct _prop_object_internalize_context *ctx,
    char *target, size_t targsize, size_t *sizep,
    const char **cpp)
{
	const char *src;
	size_t tarindex;
	char c;

	tarindex = 0;
	src = ctx->poic_cp;

	for (;;) {
		if (_PROP_EOF(*src)) {
			return true;
		}
		if (*src == '<') {
			break;
		}

		if ((c = *src) == '&') {
			if (src[1] == 'a' &&
			    src[2] == 'm' &&
			    src[3] == 'p' &&
			    src[4] == ';') {
				c = '&';
				src += 5;
			} else if (src[1] == 'l' &&
				   src[2] == 't' &&
				   src[3] == ';') {
				c = '<';
				src += 4;
			} else if (src[1] == 'g' &&
				   src[2] == 't' &&
				   src[3] == ';') {
				c = '>';
				src += 4;
			} else if (src[1] == 'a' &&
				   src[2] == 'p' &&
				   src[3] == 'o' &&
				   src[4] == 's' &&
				   src[5] == ';') {
				c = '\'';
				src += 6;
			} else if (src[1] == 'q' &&
				   src[2] == 'u' &&
				   src[3] == 'o' &&
				   src[4] == 't' &&
				   src[5] == ';') {
				c = '\"';
				src += 6;
			} else {
				return false;
			}
		} else {
			src++;
		}
		ADDCHAR(c);
	}

	_PROP_ASSERT(*src == '<');
	if (sizep != NULL) {
		*sizep = tarindex;
	}
	if (cpp != NULL) {
		*cpp = src;
	}

	return true;
}

#undef ADDCHAR

/*
 * _prop_intern_decode_string --
 *	Decode an encoded string.
 */
bool
_prop_intern_decode_string(struct _prop_object_internalize_context *ctx,
    char *target, size_t targsize, size_t *sizep,
    const char **cpp)
{
	_PROP_ASSERT(ctx->poic_format == PROP_FORMAT_XML ||
		     ctx->poic_format == PROP_FORMAT_JSON);

	switch (ctx->poic_format) {
	case PROP_FORMAT_JSON:
		return _prop_json_intern_decode_string(ctx, target, targsize,
		    sizep, cpp);

	default:		/* PROP_FORMAT_XML */
		return _prop_xml_intern_decode_string(ctx, target, targsize,
		    sizep, cpp);
	}
}

/*
 * _prop_intern_context_alloc --
 *	Allocate an internalize context.
 */
static struct _prop_object_internalize_context *
_prop_intern_context_alloc(const char *data, prop_format_t fmt)
{
	struct _prop_object_internalize_context *ctx;

	ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP);
	if (ctx == NULL) {
		return NULL;
	}

	ctx->poic_format = fmt;
	ctx->poic_data = ctx->poic_cp = data;

	/*
	 * If we're digesting JSON, check for a byte order mark and
	 * skip it, if present.  We should never see one, but we're
	 * allowed to detect and ignore it.  (RFC 8259 section 8.1)
	 */
	if (fmt == PROP_FORMAT_JSON) {
		if (((unsigned char)data[0] == 0xff &&
		     (unsigned char)data[1] == 0xfe) ||
		    ((unsigned char)data[0] == 0xfe &&
		     (unsigned char)data[1] == 0xff)) {
			ctx->poic_cp = data + 2;
		}

		/* No additional processing work to do for JSON. */
		return ctx;
	}

	/*
	 * Skip any whitespace and XML preamble stuff that we don't
	 * know about / care about.
	 */
	for (;;) {
		data = _prop_intern_skip_whitespace(data);
		if (_PROP_EOF(*data) || *data != '<') {
			goto bad;
		}

#define	MATCH(str)	(strncmp(&data[1], str, strlen(str)) == 0)

		/*
		 * Skip over the XML preamble that Apple XML property
		 * lists usually include at the top of the file.
		 */
		if (MATCH("?xml ") ||
		    MATCH("!DOCTYPE plist")) {
			while (*data != '>' && !_PROP_EOF(*data)) {
				data++;
			}
			if (_PROP_EOF(*data)) {
				goto bad;
			}
			data++;	/* advance past the '>' */
			continue;
		}

		if (MATCH("<!--")) {
			ctx->poic_cp = data + 4;
			if (_prop_xml_intern_skip_comment(ctx) == false) {
				goto bad;
			}
			data = ctx->poic_cp;
			continue;
		}

#undef MATCH

		/*
		 * We don't think we should skip it, so let's hope we can
		 * parse it.
		 */
		break;
	}

	ctx->poic_cp = data;
	return ctx;
 bad:
	_PROP_FREE(ctx, M_TEMP);
	return NULL;
}

/*
 * _prop_intern_context_free --
 *	Free an internalize context.
 */
static void
_prop_intern_context_free(struct _prop_object_internalize_context *ctx)
{
	_PROP_FREE(ctx, M_TEMP);
}

/*
 * _prop_object_internalize_json --
 *	Internalize a property list from JSON data.
 */
static prop_object_t
_prop_object_internalize_json(struct _prop_object_internalize_context *ctx,
    const struct _prop_object_type_tags *initial_tag __unused)
{
	prop_object_t obj, parent_obj;
	void *data, *iter;
	prop_object_internalizer_continue_t iter_func;
	struct _prop_stack stack;
	bool (*intern)(prop_stack_t, prop_object_t *,
		       struct _prop_object_internalize_context *);

	_prop_stack_init(&stack);

 match_start:
	intern = NULL;
	ctx->poic_tagname = ctx->poic_tagattr = ctx->poic_tagattrval = NULL;
	ctx->poic_tagname_len = ctx->poic_tagattr_len =
	    ctx->poic_tagattrval_len = 0;
	ctx->poic_is_empty_element = false;
	ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
	switch (ctx->poic_cp[0]) {
	case '{':
		ctx->poic_cp++;
		intern = _prop_dictionary_internalize;
		break;

	case '[':
		ctx->poic_cp++;
		intern = _prop_array_internalize;
		break;
	
	case '"':
		ctx->poic_cp++;
		/* XXX Slightly gross. */
		if (*ctx->poic_cp == '"') {
			ctx->poic_cp++;
			ctx->poic_is_empty_element = true;
		}
		intern = _prop_string_internalize;
		break;
	
	case 't':
		if (ctx->poic_cp[1] == 'r' &&
		    ctx->poic_cp[2] == 'u' &&
		    ctx->poic_cp[3] == 'e') {
			/* XXX Slightly gross. */
			ctx->poic_tagname = ctx->poic_cp;
			ctx->poic_tagname_len = 4;
			ctx->poic_is_empty_element = true;
			intern = _prop_bool_internalize;
			ctx->poic_cp += 4;
		}
		break;
	
	case 'f':
		if (ctx->poic_cp[1] == 'a' &&
		    ctx->poic_cp[2] == 'l' &&
		    ctx->poic_cp[3] == 's' &&
		    ctx->poic_cp[4] == 'e') {
			/* XXX Slightly gross. */
			ctx->poic_tagname = ctx->poic_cp;
			ctx->poic_tagname_len = 5;
			ctx->poic_is_empty_element = true;
			intern = _prop_bool_internalize;
			ctx->poic_cp += 5;
		}
		break;

	default:
		if (ctx->poic_cp[0] == '+' ||
		    ctx->poic_cp[0] == '-' ||
		    (ctx->poic_cp[0] >= '0' && ctx->poic_cp[0] <= '9')) {
			intern = _prop_number_internalize;
		}
		break;
	}

	if (intern == NULL) {
		while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
			iter_func = (prop_object_internalizer_continue_t)iter;
			(*iter_func)(&stack, &obj, ctx, data, NULL);
		}
		return NULL;
	}

	obj = NULL;
	if ((*intern)(&stack, &obj, ctx) == false) {
		goto match_start;
	}

	parent_obj = obj;
	while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
		iter_func = (prop_object_internalizer_continue_t)iter;
		if ((*iter_func)(&stack, &parent_obj, ctx, data,
				 obj) == false) {
			goto match_start;
		}
		obj = parent_obj;
	}

	/* Ensure there's no trailing junk. */
	if (parent_obj != NULL) {
		ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
		if (!_PROP_EOF(*ctx->poic_cp)) {
			prop_object_release(parent_obj);
			parent_obj = NULL;
		}
	}
	return parent_obj;
}

/*
 * _prop_object_internalize_xml --
 *	Internalize a property list from XML data.
 */
static prop_object_t
_prop_object_internalize_xml(struct _prop_object_internalize_context *ctx,
    const struct _prop_object_type_tags *initial_tag)
{
	prop_object_t obj = NULL;

	/* We start with a <plist> tag. */
	if (_prop_xml_intern_find_tag(ctx, "plist",
				      _PROP_TAG_TYPE_START) == false) {
		goto out;
	}

	/* Plist elements cannot be empty. */
	if (ctx->poic_is_empty_element) {
		goto out;
	}

	/*
	 * We don't understand any plist attributes, but Apple XML
	 * property lists often have a "version" attribute.  If we
	 * see that one, we simply ignore it.
	 */
	if (ctx->poic_tagattr != NULL &&
	    !_PROP_TAGATTR_MATCH(ctx, "version")) {
		goto out;
	}

	/* Next we expect to see opening main tag. */
	if (_prop_xml_intern_find_tag(ctx,
				initial_tag != NULL ? initial_tag->xml_tag
						    : NULL,
				_PROP_TAG_TYPE_START) == false) {
		goto out;
	}

	obj = _prop_xml_intern_by_tag(ctx);
	if (obj == NULL) {
		goto out;
	}

	/*
	 * We've advanced past the closing main tag.
	 * Now we want </plist>.
	 */
	if (_prop_xml_intern_find_tag(ctx, "plist",
				      _PROP_TAG_TYPE_END) == false) {
		prop_object_release(obj);
		obj = NULL;
	}
 out:
	return obj;
}

/*
 * _prop_object_internalize --
 *	Internalize a property list from a NUL-terminated data blob.
 */
prop_object_t
_prop_object_internalize(const char *data,
    const struct _prop_object_type_tags *initial_tag)
{
	struct _prop_object_internalize_context *ctx;
	prop_object_t obj;
	prop_format_t fmt;

	/*
	 * Skip all whitespace until and look at the first
	 * non-whitespace character to determine the format:
	 * An XML plist will always have '<' as the first non-ws
	 * character.  If we encounter something else, we assume
	 * it is JSON.
	 */
	data = _prop_intern_skip_whitespace(data);
	if (_PROP_EOF(*data)) {
		return NULL;
	}

	fmt = *data == '<' ? PROP_FORMAT_XML : PROP_FORMAT_JSON;

	ctx = _prop_intern_context_alloc(data, fmt);
	if (ctx == NULL) {
		return NULL;
	}

	switch (fmt) {
	case PROP_FORMAT_JSON:
		obj = _prop_object_internalize_json(ctx, initial_tag);
		break;
	
	default:		/* PROP_FORMAT_XML */
		obj = _prop_object_internalize_xml(ctx, initial_tag);
		break;
	}

	_prop_intern_context_free(ctx);
	return obj;
}

_PROP_EXPORT prop_object_t
prop_object_internalize(const char *data)
{
	return _prop_object_internalize(data, NULL);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
struct _prop_intern_mapped_file {
	char *	pimf_data;
	size_t	pimf_mapsize;
};

/*
 * _prop_intern_map_file --
 *	Map a file for the purpose of internalizing it.
 */
static struct _prop_intern_mapped_file *
_prop_intern_map_file(const char *fname)
{
	struct stat sb;
	struct _prop_intern_mapped_file *mf;
	size_t pgsize = (size_t)sysconf(_SC_PAGESIZE);
	size_t pgmask = pgsize - 1;
	int fd;

	mf = _PROP_MALLOC(sizeof(*mf), M_TEMP);
	if (mf == NULL) {
		return NULL;
	}

	fd = open(fname, O_RDONLY, 0400);
	if (fd == -1) {
		_PROP_FREE(mf, M_TEMP);
		return NULL;
	}

	if (fstat(fd, &sb) == -1) {
		(void) close(fd);
		_PROP_FREE(mf, M_TEMP);
		return NULL;
	}
	mf->pimf_mapsize = ((size_t)sb.st_size + pgmask) & ~pgmask;
	if (mf->pimf_mapsize < (size_t)sb.st_size) {
		(void) close(fd);
		_PROP_FREE(mf, M_TEMP);
		return NULL;
	}

	/*
	 * If the file length is an integral number of pages, then we
	 * need to map a guard page at the end in order to provide the
	 * necessary NUL-termination of the buffer.
	 */
	bool need_guard = (sb.st_size & pgmask) == 0;

	mf->pimf_data = mmap(NULL, need_guard ? mf->pimf_mapsize + pgsize
					      : mf->pimf_mapsize,
			     PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0);
	(void) close(fd);
	if (mf->pimf_data == MAP_FAILED) {
		_PROP_FREE(mf, M_TEMP);
		return (NULL);
	}
#ifdef POSIX_MADV_SEQUENTIAL
	(void) posix_madvise(mf->pimf_data, mf->pimf_mapsize,
	    POSIX_MADV_SEQUENTIAL);
#endif

	if (need_guard) {
		if (mmap(mf->pimf_data + mf->pimf_mapsize,
			 pgsize, PROT_READ,
			 MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1,
			 (off_t)0) == MAP_FAILED) {
			(void) munmap(mf->pimf_data, mf->pimf_mapsize);
			_PROP_FREE(mf, M_TEMP);
			return NULL;
		}
		mf->pimf_mapsize += pgsize;
	}
	return mf;
}

/*
 * _prop_intern_unmap_file --
 *	Unmap a file previously mapped for internalizing.
 */
static void
_prop_intern_unmap_file(struct _prop_intern_mapped_file *mf)
{
#ifdef POSIX_MADV_DONTNEED
	(void) posix_madvise(mf->pimf_data, mf->pimf_mapsize,
	    POSIX_MADV_DONTNEED);
#endif
	(void) munmap(mf->pimf_data, mf->pimf_mapsize);
	_PROP_FREE(mf, M_TEMP);
}

/*
 * _prop_object_internalize_from_file --
 *	Internalize a property list from a file.
 */
prop_object_t
_prop_object_internalize_from_file(const char *fname,
    const struct _prop_object_type_tags *initial_tag)
{
	struct _prop_intern_mapped_file *mf;
	prop_object_t obj;

	mf = _prop_intern_map_file(fname);
	if (mf == NULL) {
		return NULL;
	}
	obj = _prop_object_internalize(mf->pimf_data, initial_tag);
	_prop_intern_unmap_file(mf);

	return obj;
}

_PROP_EXPORT prop_object_t
prop_object_internalize_from_file(const char *fname)
{
	return _prop_object_internalize_from_file(fname, NULL);
}
#endif /* !_KERNEL && !_STANDALONE */
