
/* Copyright (C) 2002-2008 Free Software Foundation, Inc.
   Contributed by Andy Vaught

  This file is part of g95.

  G95 is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2, or (at your option)
  any later version.

  G95 is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with g95; see the file COPYING.  If not, write to
  the Free Software Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.

  In addition to the permissions in the GNU General Public License, the
  Free Software Foundation gives you unlimited permission to link the
  compiled version of this file into combinations with other programs,
  and to distribute those combinations without any restriction coming
  from the use of this file.  (The General Public License restrictions
  do apply in other respects; for example, they cover modification of
  the file, and distribution when not linked into a combined executable.)
*/


/* format.c-- parse a FORMAT string into a binary format suitable for
 * interpretation during I/O statements */

#include <string.h>

#include "config.h"
#include "safe-ctype.h"
#include "runtime.h"


/* Number of format nodes that we can store statically before we have
 * to resort to dynamic allocation.  The root node is array[0]. */

#define FARRAY_SIZE 200

static fnode f_array[FARRAY_SIZE], *avail = f_array;

/* Local variables for checking format strings.  The saved_token is
 * used to back up by a single format token during the parsing process. */

static char *format_string, *t_string, *error;
static format_token saved_token;
static int value, format_string_len;

static fnode colon_node = { FMT_COLON };

/* Error messages */

static char posint_required[]    = "Positive width required in format",
    period_required[]    = "Period required in format",
    nonneg_required[]    = "Nonnegative width required in format",
    unexpected_element[] = "Unexpected element in format",
    unexpected_end[]     = "Unexpected end of format string",
    bad_string[]         = "Unterminated character constant in format",
    bad_hollerith[]  = "Hollerith constant extends past the end of the format",
    bad_fmt[]        = "Illegal descriptor in F mode";


/* unget_char()-- Back up one character position.  */

#define unget_char() { format_string--;  format_string_len++; }

/* next_char()-- Return the next character in the format string.
 * Returns -1 when the string is done.  If the literal flag is set,
 * spaces are significant, otherwise they are not. */

static int next_char(int literal) {
int c;

    do {
	if (format_string_len == 0)
	    return -1;

	format_string_len--;
	c = toupper(*format_string++);
    } while((c == ' ' || c == '\t') && !literal);

    return c;
}



/* get_fnode()-- Allocate a new format node, inserting it into the
 * current singly linked list.  These are initially allocated from the
 * static buffer. */

static fnode *get_fnode(fnode **head, fnode **tail, format_token t) {
fnode *f;

    if (avail - f_array >= FARRAY_SIZE)
	f = get_mem(sizeof(fnode));

    else {
	f = avail++;
	memset(f, '\0', sizeof(fnode));
    }

    if (head != NULL)
	if (*head == NULL)
	    *head = *tail = f;
	else {
	    (*tail)->next = f;
	    *tail = f;
	}

    f->format = t;
    f->repeat = -1;
    f->source = format_string;
    return f;
}



/* free_fnode()-- Recursive function to free the given fnode and
 * everything it points to.  We only have to actually free something
 * if it is outside of the static array. */

static void free_fnode(fnode *f) {
fnode *next;

    for(; f; f=next) {
	next = f->next;

	if (f->format == FMT_LPAREN)
	    free_fnode(f->u.child);

	if (f < f_array || f >= f_array + FARRAY_SIZE)
	    free_mem(f);
    }
}



/* free_fnodes()-- Free the current tree of fnodes.  We only have to
 * traverse the tree if some nodes were allocated dynamically. */

void free_fnodes(void) {

    if (ioparm->fnode_base < f_array ||
	ioparm->fnode_base >= f_array + FARRAY_SIZE)
	free_fnode(ioparm->fnode_base);

    else
	avail = ioparm->fnode_base;
}



/* format_lex()-- Simple lexical analyzer for getting the next token
 * in a FORMAT string.  We support a one-level token pushback in the
 * saved_token variable. */ 

static format_token format_lex(void) {
signed char c, delim;
format_token token;
int negative_flag;

    if (saved_token != FMT_NONE) {
	token = saved_token;
	saved_token = FMT_NONE;
	return token;
    }

    negative_flag = 0;
    c = next_char(0);

    switch(c) {
    case '-':
	negative_flag = 1;
	/* Fall Through */

    case '+':
	c = next_char(0);
	if (!isdigit(c)) {
	    token = FMT_UNKNOWN;
	    break;
	}

	value = c - '0';

	for(;;) {
	    c = next_char(0);
	    if (c == ' ')
		continue;

	    if (!isdigit(c))
		break;

	    value = 10*value + c - '0';
	}

	unget_char();

	if (negative_flag)
	    value = -value;

	token = FMT_SIGNED_INT;
	break;

    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
	value = c - '0';

	for(;;) {
	    c = next_char(0);
	    if (c == ' ')
		continue;

	    if (!isdigit(c))
		break;

	    value = 10*value + c - '0';
	}

	unget_char();
	token = (value == 0) ? FMT_ZERO : FMT_POSINT;
	break;

    case '.':
	token = FMT_PERIOD;
	break;

    case ',':
	token = FMT_COMMA;
	break;

    case ':':
	token = FMT_COLON;
	break;

    case '/':
	token = FMT_SLASH;
	break;

    case '$': case '\\':
	token = STD_F ? FMT_BAD : FMT_DOLLAR;
	break;

    case 'T':
	switch(next_char(0)) {
	case 'L':   token = FMT_TL;                 break;
	case 'R':   token = FMT_TR;                 break;
	default:    token = FMT_T;   unget_char();  break;
	}

	break;

    case '(':
	token = FMT_LPAREN;
	break;

    case ')':
	token = FMT_RPAREN;
	break;

    case 'X':
	token = STD_F ? FMT_BAD : FMT_X;
	break;

    case 'S':
	switch(next_char(0)) {
	case 'S':   token = FMT_SS;                 break;
	case 'P':   token = FMT_SP;                 break;
	default:    token = FMT_S;   unget_char();  break;
	}

	break;

    case 'B':
	switch(next_char(0)) {
	case 'N':  token = STD_F ? FMT_BAD : FMT_BN;                 break;
	case 'Z':  token = STD_F ? FMT_BAD : FMT_BZ;                 break;
	default:   token = STD_F ? FMT_BAD : FMT_B;   unget_char();  break;
	}

	break;

    case '\'': case '"':
	if (STD_F) {
	    token = FMT_BAD;
	    break;
	}

	delim = c;

	t_string = format_string;
	value = 0;       /* This is the length of the string */

	for(;;) {
	    c = next_char(1);
	    if (c == -1) {
		token = FMT_BADSTRING;
		error = bad_string;
		break;
	    }

	    if (c == delim) {
		c = next_char(1);

		if (c == -1) {
		    token = FMT_BADSTRING;
		    error = bad_string;
		    break;
		}

		if (c != delim) {
		    unget_char();
		    token = FMT_STRING;
		    break;
		}
	    }

	    value++;
	}

	break;

    case 'P':
	token = STD_F ? FMT_BAD : FMT_P;
	break;

    case 'I':
	token = FMT_I;
	break;

    case 'O':
	token = STD_F ? FMT_BAD : FMT_O;
	break;

    case 'Z':
	token = STD_F ? FMT_BAD : FMT_Z;
	break;

    case 'F':
	token = FMT_F;
	break;

    case 'E':
	switch(next_char(0)) {
	case 'N':  token = FMT_EN;                  break;
	case 'S':  token = FMT_ES;                  break;
	default:   token = STD_F ? FMT_BAD : FMT_E;    unget_char();  break;
	}

	break;

    case 'G':
	token = STD_F ? FMT_BAD : FMT_G;
	break;

    case 'H':
	token = STD_F ? FMT_BAD : FMT_H;
	break;

    case 'L':
	token = FMT_L;
	break;

    case 'A':
	token = FMT_A;
	break;

    case 'D':
	switch(next_char(0)) {
	case 'P':  token = FMT_DP;                break;
	case 'C':  token = FMT_DC;                break;
	default:   token = STD_F ? FMT_BAD : FMT_D;  unget_char();  break;
	    break;
	}

	break;

    case -1:
	token = FMT_END;
	break;

    default:
	token = FMT_UNKNOWN;
	break;
    }

    return token;
}



/* parse_format_list()-- Parse a format list.  Assumes that a left
 * paren has already been seen.  Returns a list representing the
 * parenthesis node which contains the rest of the list. */

static fnode *parse_format_list(void) {
fnode *head, *tail;
format_token t, u;
int repeat;

    head = tail = NULL;
    repeat = 0;

/* Get the next format item */

format_item:
    t = format_lex();
    switch(t) {
    case FMT_POSINT:
	repeat = value;

	t = format_lex();
	switch(t) {
	case FMT_LPAREN:
	    get_fnode(&head, &tail, FMT_LPAREN);
	    tail->repeat = repeat;
	    tail->u.child = parse_format_list();
	    if (error != NULL)
		goto finished;

	    goto between_desc;

	case FMT_SLASH:
	    get_fnode(&head, &tail, FMT_SLASH);
	    tail->repeat = repeat;
	    goto optional_comma;

	case FMT_X:
	    get_fnode(&head, &tail, FMT_X);
	    tail->repeat = 1;
	    tail->u.k = value;
	    goto between_desc;

	case FMT_P:
	    goto p_descriptor;

	default:
	    goto data_desc;
	}

    case FMT_LPAREN:
	get_fnode(&head, &tail, FMT_LPAREN);
	tail->repeat = 1;
	tail->u.child = parse_format_list();
	if (error != NULL)
	    goto finished;

	goto between_desc;

    case FMT_RPAREN:
	goto finished;

    case FMT_ZERO:  /* Signed integer or zero can only precede a P format */
    case FMT_SIGNED_INT:  
	t = format_lex();
	if (t != FMT_P) {
	    error = "Expected P edit descriptor in format";

	    goto finished;
	}

    p_descriptor:
	get_fnode(&head, &tail, FMT_P);
	tail->repeat = 1;
	tail->u.k = value;

	t = format_lex();
	if (t == FMT_E  || t == FMT_F || t == FMT_EN ||
	    t == FMT_ES || t == FMT_D || t == FMT_G) {
	    repeat = 1;
	    goto data_desc;
	}

	saved_token = t;
	goto optional_comma;

    case FMT_P:       /* P requires a prior number */
	error = "P descriptor requires leading scale factor";
	goto finished;

    case FMT_X:
	get_fnode(&head, &tail, FMT_X);
	tail->repeat = 1;
	tail->u.k = 1;
	goto between_desc;

    case FMT_STRING:
	get_fnode(&head, &tail, FMT_STRING);

	tail->u.string.p = t_string;
	tail->u.string.length = value;
	tail->repeat = 1;
	goto between_desc;

    case FMT_S:   case FMT_SS:  case FMT_SP:
    case FMT_DP:  case FMT_DC:
    case FMT_BN:  case FMT_BZ:
	get_fnode(&head, &tail, t);
	tail->repeat = 1;
	goto between_desc;

    case FMT_COLON:
	get_fnode(&head, &tail, FMT_COLON);
	tail->repeat = 1;
	goto optional_comma;

    case FMT_SLASH:
	get_fnode(&head, &tail, FMT_SLASH);
	tail->u.r = 1;
	tail->repeat = 1;
	goto optional_comma;

    case FMT_DOLLAR:
	get_fnode(&head, &tail, FMT_DOLLAR);
	tail->repeat = 1;
	goto between_desc;

    case FMT_T:   case FMT_TL:  case FMT_TR:
	if (format_lex() != FMT_POSINT) {
	    error = posint_required;
	    goto finished;
	}

	get_fnode(&head, &tail, t);
	tail->u.n = value;
	tail->repeat = 1;
	goto optional_comma;

    case FMT_I:   case FMT_B:   case FMT_O:   case FMT_Z:
    case FMT_E:   case FMT_EN:  case FMT_ES:  case FMT_D:
    case FMT_L:   case FMT_A:   case FMT_F:   case FMT_G:
	repeat = 1;
	goto data_desc;

    case FMT_H:
	if (repeat > format_string_len) {
	    error = bad_hollerith;
	    goto finished;
	}

	get_fnode(&head, &tail, FMT_STRING);

	tail->u.string.p = format_string;
	tail->u.string.length = repeat;
	tail->repeat = 1;

	format_string += repeat;
	format_string_len -= repeat;

	goto between_desc;

    case FMT_END:
	error = unexpected_end;
	goto finished;

    case FMT_BADSTRING:
	goto finished;

    case FMT_COMMA:
	goto format_item;

    case FMT_BAD:
	error = bad_fmt;
	goto finished;

    default:
	error = unexpected_element;
	goto finished;
    }

/* In this state, t must currently be a data descriptor.  Deal with
 * things that can/must follow the descriptor */

data_desc:
    switch(t) {
    case FMT_P:
	t = format_lex();
	if (t == FMT_POSINT) {
	    error = "Repeat count cannot follow P descriptor";
	    goto finished;
	}

	saved_token = t;
	get_fnode(&head, &tail, FMT_P);

	goto optional_comma;

    case FMT_L:
	t = format_lex();
	if (t != FMT_POSINT) {
	    error = posint_required;
	    goto finished;
	}

	get_fnode(&head, &tail, FMT_L);
	tail->u.n = value;
	tail->repeat = repeat;
	break;

    case FMT_A:
	t = format_lex();
	if (t != FMT_POSINT) {
	    saved_token = t;
	    value = -1;    /* Width not present */
	}

	get_fnode(&head, &tail, FMT_A);
	tail->repeat = repeat;
	tail->u.n = value;
	break;

    case FMT_D: case FMT_E: case FMT_F: case FMT_G: case FMT_EN: case FMT_ES:
	get_fnode(&head, &tail, t);
	tail->repeat = repeat;

	u = format_lex();
	if (t == FMT_F || ioparm->mode == WRITING) {
	    if (u != FMT_POSINT && u != FMT_ZERO) {
		error = nonneg_required;
		goto finished;
	    }

	} else {
	    if (u != FMT_POSINT) {
		error = posint_required;
		goto finished;
	    }
	}

	tail->u.real.w = value;

	t = format_lex();
	if (t != FMT_PERIOD) {
	    error = period_required;
	    goto finished;
	}

	t = format_lex();
	if (t != FMT_ZERO && t != FMT_POSINT) {
	    error = nonneg_required;
	    goto finished;
	}

	tail->u.real.d = value;

	if (t == FMT_D || t == FMT_F)
	    break;

	tail->u.real.e = -1;

/* Look for optional exponent */

	t = format_lex();
	if (t != FMT_E)
	    saved_token = t;

	else {
	    t = format_lex();
	    if (t != FMT_POSINT) {
		error = "Positive exponent width required in format";
		goto finished;
	    }

	    tail->u.real.e = value;
	}

	break;

    case FMT_H:
	if (repeat > format_string_len) {
	    error = bad_hollerith;
	    goto finished;
	}

	get_fnode(&head, &tail, FMT_STRING);

	tail->u.string.p = format_string;
	tail->u.string.length = repeat;
	tail->repeat = 1;

	format_string += repeat;
	format_string_len -= repeat;
	break;

    case FMT_I:  case FMT_B:  case FMT_O:  case FMT_Z:
	get_fnode(&head, &tail, t);
	tail->repeat = repeat;

	t = format_lex();

	if (ioparm->mode == READING) {
	    if (t != FMT_POSINT) {
		error = posint_required;
		goto finished;
	    }

	} else {
	    if (t != FMT_ZERO && t != FMT_POSINT) {
		error = nonneg_required;
		goto finished;
	    }
	}

	tail->u.integer.w = value;
	tail->u.integer.m = -1;

	t = format_lex();
	if (t != FMT_PERIOD)
	    saved_token = t;

	else {
	    t = format_lex();
	    if (t != FMT_ZERO && t != FMT_POSINT) {
		error = nonneg_required;
		goto finished;
	    }

	    tail->u.integer.m = value;
	}

	if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w) {
	    error = "Minimum digits exceeds field width";
	    goto finished;
	}

	break;

    case FMT_BAD:
	error = bad_fmt;
	goto finished;

    default:
	error = unexpected_element;
	goto finished;
    }

/* Between a descriptor and what comes next */

between_desc:
    t = format_lex();
    switch(t) {
    case FMT_COMMA:
	goto format_item;

    case FMT_RPAREN:
	goto finished;

    case FMT_SLASH:
	if (STD_F) {
	    error = "Missing comma";
	    goto finished;
	}

	get_fnode(&head, &tail, FMT_SLASH);
	tail->repeat = 1;
	goto optional_comma;

    case FMT_COLON:
	if (STD_F) {
	    error = "Missing comma";
	    goto finished;
	}

	get_fnode(&head, &tail, FMT_COLON);
	tail->repeat = 1;
	goto optional_comma;

    case FMT_END:
	error = unexpected_end;
	goto finished;

    default:
	saved_token = t;
	goto format_item;
    }

/* Optional comma is a weird between state where we've just finished
 * reading a colon, slash or P descriptor. */

optional_comma:
    t = format_lex();
    switch(t) {
    case FMT_COMMA:
	break;

    case FMT_RPAREN:
	goto finished;

    default:       /* Assume that we have another format item */
	if (STD_F) {
	    error = "Missing comma before edit descriptor";
	    goto finished;
	}

	saved_token = t;
	break;
    }

    goto format_item;

finished:
    return head;
}



/* format_error()-- Generate an error message for a format statement.
 * If the node that gives the location of the error is NULL, the error
 * is assumed to happen at parse time, and the current location of the
 * parser is shown.
 *
 * After freeing any dynamically allocated fnodes, generate a message
 * showing where the problem is.  We take extra care to print only the
 * relevant part of the format if it is longer than a standard 80
 * column display. */

void format_error(fnode *f, char *message) {
int width, i, j, offset;
char *p, buffer[300];

    if (f != NULL)
	format_string = f->source; 

    free_fnodes();

    st_sprintf(buffer, "%s\n", message);

    j = format_string - ioparm->format;

    offset = (j > 60) ? j - 40 : 0;

    j -= offset;
    width = ioparm->format_len - offset;

    if (width > 80)
	width = 80;

    /* Show the format */

    p = strchr(buffer, '\0');

    memcpy(p, ioparm->format + offset, width);

    p += width;
    *p++ = '\n';

    /* Show where the problem is */

    for(i=1; i<j; i++)
	*p++ = ' ';

    *p++ = '^';
    *p = '\0';

    generate_error(ERROR_FORMAT, buffer);
}



/* parse_format()-- Parse a format string.  */

void parse_format(void) {
fnode *f;

    format_string = ioparm->format;
    format_string_len = ioparm->format_len;

    saved_token = FMT_NONE;
    error = NULL;

/* Initialize variables used during traversal of the tree */

    ioparm->reversion_ok = 1;
    ioparm->reversion_flag = 0;
    ioparm->fnode_save = NULL;

/* Allocate the first format node as the root of the tree */

    f = ioparm->fnode_base = get_fnode(NULL, NULL, FMT_LPAREN);

    f->repeat = 1;
    f->current = NULL;
    f->count = 0;

    if (format_lex() == FMT_LPAREN)
	f->u.child = parse_format_list();

    else
	error = "Missing initial left parenthesis in format";

    if (error)
	format_error(NULL, error);
}



/* revert()-- Do reversion of the format.  Control reverts to the left
 * parenthesis that matches the rightmost right parenthesis.  From our
 * tree structure, we are looking for the rightmost parenthesis node
 * at the second level, the first level always being a single
 * parenthesis node.  If this node doesn't exit, we use the top
 * level. */

static void revert(void) {
fnode *f, *r;

    ioparm->reversion_flag = 1; 

    r = NULL;

    for(f=ioparm->fnode_base->u.child; f; f=f->next)
	if (f->format == FMT_LPAREN)
	    r = f;

  /* If r is NULL because no node was found, the whole tree will be used */

    ioparm->fnode_base->current = r;
    ioparm->fnode_base->count = 0;
}



/* next_format0()-- Get the next format node without worrying about
 * reversion.  Returns NULL when we hit the end of the list.
 * Parenthesis nodes are incremented after the list has been
 * exhausted, other nodes are incremented before they are returned. */

static fnode *next_format0(fnode *f) {
fnode *r;

    if (f == NULL)
	return NULL; 

    if (f->format != FMT_LPAREN) {
	f->count++;
	if (f->count <= f->repeat)
	    return f;

	f->count = 0;
	return NULL;
    }

    /* Deal with a parenthesis node */

    for(; f->count<f->repeat; f->count++) {
	if (f->current == NULL)
	    f->current = f->u.child;

	for(; f->current!=NULL; f->current=f->current->next) {
	    r = next_format0(f->current);
	    if (r != NULL)
		return r;
	}
    }

    f->count = 0;
    return NULL;
}



/* next_format()-- Return the next format node.  If the format list
 * ends up being exhausted, we do reversion.  Reversion is only
 * allowed if the we've seen a data descriptor since the
 * initialization or the last reversion.  We return NULL if the there
 * are no more data descriptors to return, which may or may not be an
 * error condition. */

fnode *next_format(void) {
format_token t;
fnode *f;

    if (ioparm->fnode_save != NULL) { /* Deal with a pushed-back format node */
	f = ioparm->fnode_save;
	ioparm->fnode_save = NULL;
	goto done;
    }

    f = next_format0(ioparm->fnode_base);

    if (f == NULL) {
	if (!ioparm->reversion_ok)
	    return NULL;

	ioparm->reversion_ok = 0;
	revert();

	f = next_format0(ioparm->fnode_base);
	if (f == NULL)
	    return NULL;

	/* Push the first reverted token and return a colon node in case
	 * there are no more data items. */

	ioparm->fnode_save = f;
	return &colon_node;
    }

    /* If this is a data edit descriptor, then reversion has become OK. */

done:
    t = f->format;

    if (!ioparm->reversion_ok && 
	(t == FMT_I || t == FMT_B || t == FMT_O  || t == FMT_Z  ||
	 t == FMT_F || t == FMT_E || t == FMT_EN || t == FMT_ES ||
	 t == FMT_G || t == FMT_L || t == FMT_A  || t == FMT_D))
	ioparm->reversion_ok = 1;

    return f;
}



/* unget_format()-- Push the given format back so that it will be
 * returned on the next call to next_format() without affecting
 * counts.  This is necessary when we've encountered a data
 * descriptor, but don't know what the data item is yet.  The format
 * node is pushed back, and we return control to the main program,
 * which calls the library back with the data item (or not). */

void unget_format(fnode *f) {

    ioparm->fnode_save = f;
}
