
/* clib.q: C/C++ extensions for the Q language
   $Id: clib.q,v 1.39 2008/02/22 13:03:35 agraef Exp $ */

/* This file is part of the Q programming system.

   The Q programming system is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option) any
   later version.

   The Q programming system is distributed in the hope that it will be
   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

/* NOTE: The POSIX system interface, which used to be part of this module, is
   now available as a separate 'system' module which, to reduce namespace
   pollution, is not part of the prelude anymore and thus has to be imported
   explicitly in your programs. The basic features which are so ubiquitous in
   Q programs that we retain them in clib (and thus in the prelude) are: C
   replacements for common standard library functions, additional string and
   GMP integer functions, extended file I/O (including C-style formatted I/O),
   byte strings, references, threads, filename globbing, regex and, last but
   not least, the exit function. You should be able to access these without
   further ado in any standard (i.e., "vanilla") Q installation. All other
   types and operations described in Section "Clib" of the manual are now
   implemented in the 'system' module. */

import stdlib, string, tuple;

/****************************************************************************/

/* Manifest constants. */

public var const
  // data sizes (useful with byte strings); if a type is undefined then its
  // size value is set to zero
  SIZEOF_CHAR, SIZEOF_SHORT, SIZEOF_INT, SIZEOF_LONG, SIZEOF_LONG_LONG,
  SIZEOF_FLOAT, SIZEOF_DOUBLE,

  // trap action values
  SIG_IGN, SIG_DFL, SIG_TRP,

  // signal numbers
  SIGABRT, SIGALRM, SIGFPE, SIGHUP, SIGILL, SIGINT, SIGKILL, SIGPIPE,
  SIGQUIT, SIGSEGV, SIGTERM, SIGUSR1, SIGUSR2, SIGCHLD, SIGCONT, SIGSTOP,
  SIGTSTP, SIGTTIN, SIGTTOU, SIGBUS, SIGPOLL, SIGPROF, SIGSYS, SIGTRAP,
  SIGURG, SIGVTALRM, SIGXCPU, SIGXFSZ,

  // scheduling policies (setsched, getsched)
  SCHED_OTHER, SCHED_RR, SCHED_FIFO,

  // file buffering modes (setvbuf)
  IONBF, IOLBF, IOFBF,

  // file positioning modes (fseek)
  SEEK_SET, SEEK_CUR, SEEK_END;

// retrieve system parameters

private extern sys_vars;
def (SIZEOF_CHAR, SIZEOF_SHORT, SIZEOF_INT, SIZEOF_LONG, SIZEOF_LONG_LONG,
     SIZEOF_FLOAT, SIZEOF_DOUBLE,

     SIGABRT, SIGALRM, SIGFPE, SIGHUP, SIGILL, SIGINT, SIGKILL, SIGPIPE,
     SIGQUIT, SIGSEGV, SIGTERM, SIGUSR1, SIGUSR2, SIGCHLD, SIGCONT, SIGSTOP,
     SIGTSTP, SIGTTIN, SIGTTOU, SIGBUS, SIGPOLL, SIGPROF, SIGSYS, SIGTRAP,
     SIGURG, SIGVTALRM, SIGXCPU, SIGXFSZ,

     IONBF, IOLBF, IOFBF,

     SEEK_SET, SEEK_CUR, SEEK_END) = sys_vars;

def SIG_IGN = -1, SIG_DFL = 0, SIG_TRP = 1,
  SCHED_OTHER = 0, SCHED_RR = 1, SCHED_FIFO = 2;

/****************************************************************************/

/* This is provided as a replacement for the built-in 'quit' function, so that
   you can exit your program with a given exit code. */

public extern exit N;

/****************************************************************************/

/* C implementations of common standard library functions. These are *much*
   faster than the originals in stdlib.q, string.q and tuple.q. */

public extern stdlib::append Xs Y, stdlib::cat Xs, stdlib::mklist X N,
  stdlib::nums N M, stdlib::numsby K N M, stdlib::reverse Xs,
  tuple::tuplecat Xs;

public extern string::chars S, string::join DELIM Xs, string::split DELIM Xs,
  string::strcat Xs;

/* fast (albeit unstable) quicksort implementation using the qsort routine
   from the C library */

public extern sort P Xs;

/****************************************************************************/

/* Additional string functions. Some trivial stuff from the C library you
   might have been missing in Q. */

/* character predicates */

public extern islower C, isupper C, isalpha C, isdigit C, isxdigit C,
  isalnum C, ispunct C, isspace C, isgraph C, isprint C, iscntrl C, isascii C;

/* convert a string to lower- or uppercase */

public extern tolower S, toupper S;

/****************************************************************************/

/* Additional integer functions from the GMP library. */

/* exact powers, integer parts of roots, powers/inverses mod K */

public extern pow M N, root M N, intsqrt M;
public extern powmod K M N, invmod K M;

/* number-theoretic functions: probabilistic prime test, gcd, lcm,
   factorization helper, and Jacobi symbol */

public extern isprime N;
public extern gcd M N, lcm M N, remove_factor M N;
public extern jacobi M N;

/****************************************************************************/

/* Extended file functions. Provide an extended version of fopen which handles
   the `+' mode for r/w files, and various other stdio-related stuff from the
   C library. */

public extern ::fopen NAME MODE, fdopen FD MODE, freopen NAME MODE F;
public extern fileno F;

/* Set the buffering mode for a file. */

public extern setvbuf F MODE;

/* Set the encoding of a file. CODESET must be a string denoting a valid
   encoding name for the iconv function. This affects all subsequent text
   read/write operations on the file. (Only for unicode-capable systems which
   have iconv installed.) */

public extern fconv F CODESET;

/* Get a temporary file name, or a temporary file opened in "w+b" mode which
   will be deleted automatically when it is closed. */

public extern tmpnam, tmpfile;

/* File positioning functions. */

public extern ftell F, fseek F POS WHENCE;
public rewind F;

rewind F:File				= fseek F 0 SEEK_SET;

/* Alternative string input functions. */

public extern gets, fgets F;
public extern fget F;

/* ungetc. */

public extern ungetc C, fungetc F C;

/* Some aliases for C aficionados. */

public ::readc as getc, ::freadc F as fgetc;
public ::writes S as puts, ::fwrites F S as fputs;
public ::writec C as putc, ::fwritec F C as fputc;

/****************************************************************************/

/* C-style formatted I/O. These functions provide an interface to the C printf
   and scanf routines. Arguments to the printf routines and the results of the
   scanf routines are encoded as tuples or single non-tuple value (if only one
   item is read/written). */

public extern printf FORMAT ARGS, fprintf F FORMAT ARGS, sprintf FORMAT ARGS;
public extern scanf FORMAT, fscanf F FORMAT, sscanf S FORMAT;

/****************************************************************************/

/* Byte strings. The following type represents unstructured binary data
   implemented as C byte vectors. This data structure is used by the low-level
   I/O functions and other clib functions which operate on binary data. */

public extern type ByteStr;

public isbytestr B;			// check for byte strings

isbytestr _:ByteStr			= true;
isbytestr _				= false otherwise;

/* Byte strings can be constructed from integers, floating point numbers,
   string values or lists of unsigned byte values. If X is not a list, an
   optional SIZE (as in bytestr (X,SIZE)) denotes the desired byte size of the
   object; otherwise a reasonable default size is chosen. If the specified
   size differs from the actual size of X, the result is zero-padded or
   truncated accordingly. Multiprecision integer values are encoded in the
   host byte order, with the least significant limb first. Negative integers
   are represented in 2's complement. Floating point values are encoded using
   double precision by default or if the byte count is sufficient (i.e., at
   least 8 on most systems), and using single precision otherwise. Strings are
   by default encoded in the system encoding. You can also specify the desired
   target encoding as (X,CODESET) (or (X,CODESET,SIZE) if you also need to
   specify a byte size). */

public extern bytestr X;		// create a byte string

/* Like ordinary character strings, byte strings can be concatenated, size-
   measured, indexed, sliced and compared lexicographically. Moreover, a byte
   string can be converted back to a (multiprecision) integer, floating point
   number, string value, or a list of byte values. When converting a string
   you can specify the source encoding as in bstr (B,CODESET), otherwise the
   system encoding is assumed. */

public extern bcat Bs;			// concatenate list of byte strings
public extern bsize B;			// byte size of B
public extern byte I B;			// Ith byte of B
public extern bsub B I J;		// slice of B (bytes I..J)
public extern bcmp B1 B2;		// compare B1 and B2

public extern bint B;			// convert to unsigned integer
public extern bfloat B;			// convert to floating point number
public extern bstr B;			// convert to string
public stdlib::null B;			// check for empty byte string
public bytes B;				// convert to list
public ::list B;			// dito

null B:ByteStr				= (#B=0);
bytes B:ByteStr				= map (B!) [0..#B-1];
list B:ByteStr				= bytes B;

/* For convenience, the following common operators are overloaded. */

#B:ByteStr				= bsize B;
B:ByteStr!I:Int				= byte I B;
B1:ByteStr++B2:ByteStr			= bcat [B1,B2];
sub B:ByteStr I J			= bsub B I J;
(B1:ByteStr=B2:ByteStr)			= (bcmp B1 B2=0);
(B1:ByteStr<B2:ByteStr)			= (bcmp B1 B2<0);
(B1:ByteStr>B2:ByteStr)			= (bcmp B1 B2>0);
(B1:ByteStr<>B2:ByteStr)		= (bcmp B1 B2<>0);
(B1:ByteStr<=B2:ByteStr)		= (bcmp B1 B2<=0);
(B1:ByteStr>=B2:ByteStr)		= (bcmp B1 B2>=0);

/* As of Q 7.11, clib supports a number of additional operations which allow
   you to treat byte strings as mutable C vectors of signed/unsigned 8/16/32
   bit integers or single/double precision floating point numbers. The
   following functions provide read/write access to elements and slices of
   such C vectors. Note that the given index argument I is interpreted
   relative to the corresponding element type. Thus, e.g., get_int32 B I
   returns the Ith 32 bit integer rather than the integer at byte offset I.

   For the get_xxx functions, the index parameter may also be a pair (I,J) to
   return a slice of the given byte string instead of a single element
   (similar to sub/bsub, but interpreting indices relative to the element
   type). The put_xxx functions also accept a byte string instead of an
   element as input, and will then overwrite the corresponding slice of the
   target byte string with the given source. Similar to sub/bsub, these
   variations of get_xxx/put_xxx are "safe" in that they automatically adjust
   the given indices to fit within the bounds of the target byte string. */

/* NOTE: Integer arguments must fit into machine integers, otherwise these
   operations will fail. Integers passed for floating point arguments will be
   coerced to floating point values automatically. */

public extern get_int8 B I, get_int16 B I, get_int32 B I;
public extern get_uint8 B I, get_uint16 B I, get_uint32 B I;
public extern get_float B I, get_double B I;

public extern put_int8 B I X, put_int16 B I X, put_int32 B I X;
public extern put_uint8 B I X, put_uint16 B I X, put_uint32 B I X;
public extern put_float B I X, put_double B I X;

/* Some convenience functions to convert between byte strings and lists of
   integer/floating point elements. */

public extern int8_list B, int16_list B, int32_list B;
public extern uint8_list B, uint16_list B, uint32_list B;
public extern float_list B, double_list B;

public extern int8_vect Xs, int16_vect Xs, int32_vect Xs;
public extern uint8_vect Xs, uint16_vect Xs, uint32_vect Xs;
public extern float_vect Xs, double_vect Xs;

/****************************************************************************/

/* Expression references. These provide a kind of pointers to expression
   values. References can be used to create mutable data structures which can
   also be shared by different threads in a multithreaded program. */

public extern type Ref;			// reference type

public isref REF;			// check for reference objects

isref _:Ref				= true;
isref _					= false otherwise;

public extern ref X;			// initialize a reference object

public extern put REF X;		// store a new value
public extern get REF;			// retrieve the current value

public (:=) X Y @ (=);			// assignment operator

/* The "assignment" operator (syntactic sugar for 'put'). */

X := Y					= put X Y;

/* Sentinels are another, lazy kind of references which are evaluated when
   they are garbage-collected. There are no other access operations.
   Sentinels are a means to implement ordinary Q data structures which perform
   automatic cleanup in the same fashion as some built-in and external data
   types. */

public extern type Sentinel;		// sentinel type

public issentinel S;			// check for sentinel objects

issentinel _:Sentinel			= true;
issentinel _				= false otherwise;

public extern special sentinel X;	// create a sentinel object

/****************************************************************************/

/* Multithreading. These operations are in close correspondence with POSIX
   1003.1b. However, some operations are named differently, and semaphores
   provide the extra functionality to send data from one thread to another.
   Mutexes are also supported, mostly for the purpose of handling critical
   sections involving operations with side-effects (I/O etc.). Mutexes are
   *not* required to make conditions work since these have their own internal
   mutex handling. For more information on POSIX threads, please refer to the
   corresponding section in the UNIX manual. */

/* These functions will only work as advertised if the interpreter has been
   built with POSIX thread support (--with-pthread). It must also be noted
   that in the current implementation the interpreter effectively serializes
   multithreaded scripts on the reduction level and thus user-level threads
   cannot really take advantage of multi-processor machines. */

/* Thread creation and management. */

public extern type Thread;		// thread handle type

public isthread THREAD;			// check for thread objects

isthread _:Thread			= true;
isthread _				= false otherwise;

public extern thread_no THREAD;		// thread number
public extern this_thread;		// handle of the current thread

(T1:Thread = T2:Thread)			= (thread_no T1 = thread_no T2);
(T1:Thread < T2:Thread)			= (thread_no T1 < thread_no T2);
(T1:Thread > T2:Thread)			= (thread_no T1 > thread_no T2);
(T1:Thread <> T2:Thread)		= (thread_no T1 <> thread_no T2);
(T1:Thread <= T2:Thread)		= (thread_no T1 <= thread_no T2);
(T1:Thread >= T2:Thread)		= (thread_no T1 >= thread_no T2);

/* The main thread (private). Don't remove this definition, it is needed to
   prevent the main thread handle from being garbage collected! */

private var const MAIN_THREAD = this_thread;

public extern special thread X;		// start a thread, return its handle
public extern return X;			// terminate thread, return X as result
public extern cancel THREAD;		// cancel THREAD
public extern result THREAD;		// wait for THREAD, return its result
public extern yield;			// allow context switch

public extern active THREAD;		// check if THREAD is active
public extern canceled THREAD;		// check if THREAD was canceled

/* Realtime scheduling. USE WITH CARE. */

public extern setsched THREAD POL PRIO;	// set scheduling parameters
public extern getsched THREAD;		// get scheduling parameters

/* Mutexes. USE WITH CARE. */

public extern type Mutex;		// mutex type

public ismutex MUTEX;			// check for mutex objects

ismutex _:Mutex				= true;
ismutex _				= false otherwise;

public extern mutex;			// standard, i.e., fast mutex object
public extern errorchecking_mutex;	// error checking mutex object
public extern recursive_mutex;		// recursive mutex object

public extern lock MUTEX;		// lock MUTEX
public extern unlock MUTEX;		// unlock MUTEX
public extern try MUTEX;		// try MUTEX, (MUTEX,TIME) for timeout

/* Conditions. */

public extern type Condition;		// condition type

public iscondition COND;		// check for condition objects

iscondition _:Condition			= true;
iscondition _				= false otherwise;

public extern condition;		// new condition object

public extern signal COND;		// signal COND
public extern broadcast COND;		// broadcast COND
public extern await COND;		// wait for COND, or (COND,TIME)

/* Semaphores (semaphore queues, actually). */

public extern type Semaphore;		// semaphore type

public issemaphore SEM;			// check for semaphore objects

issemaphore _:Semaphore			= true;
issemaphore _				= false otherwise;

public extern semaphore;		// semaphore object
public extern bounded_semaphore MAX;	// bounded semaphore object

public extern post SEM X;		// enqueue a value
public extern get SEM;			// dequeue a value
public extern try SEM;			// try SEM, (SEM,TIME) for timeout

public extern get_size SEM;		// get the current queue size
public extern get_bound SEM;		// get the max queue size (0 if none)

#SEM:Semaphore				= get_size SEM;

/****************************************************************************/

/* Filename globbing using the shell's wildcard syntax (*, ? etc.). */

public extern fnmatch PATTERN S;	// check whether S matches PATTERN
public extern glob PATTERN;		// return the list of all filenames
					// matching PATTERN

/****************************************************************************/

/* Regular expression matching using "extended" (egrep-like) syntax as defined
   by POSIX 1003.2/D11.2. */

/* 1. Low-level interface. The following functions are directly implemented in
   C using the POSIX regex functions. The regmatch function searches for the
   first match, regnext for the next, and regdone terminates a global search
   still in progress. The OPTS string allows you to specify various options
   for the search. In particular, "g" denotes a global, "i" a
   case-insensitive, and "n" a "multi-line" search; see the documentation for
   further details. */

public extern regmatch OPTS REGEX S, regnext, regdone;

/* 2. High-level interface. The regex function evaluates, for each match of
   the given regular expression in the given string, the special EXPR
   argument, and returns the collection of all results as a list. The OPTS
   argument has the same meaning as with the low-level functions. In
   particular, if the "g" option is omitted, then only the first match will be
   reported, if any. */

public special regex ~OPTS ~REGEX ~S EXPR;

private special regex_next ~Xs EXPR, check ~P X Y;

regex OPTS:String REGEX:String S:String EXPR
				= check (regmatch OPTS REGEX S)
				  (reverse (regex_next [EXPR] EXPR)) [];

regex_next Xs EXPR		= check regnext
				  (regex_next [EXPR|Xs] EXPR) Xs;

check P:Bool X Y		= X if P;
				= Y otherwise;
check P X Y			= P otherwise;

/* 3. Match state information. These functions are typically invoked after
   regmatch, regnext, or in the EXPR argument of regex, to return information
   about the current match. The match state is maintained on a hidden stack
   manipulated with the regmatch/regnext/regdone functions, hence multiple
   nested searches are possible. */

public extern regstart, regskip, reg N, regpos N, regend N, regs;

/* An expression of the form `regerr MSG', where MSG is the error message, is
   used to return abnormal error conditions such as bad regular expression
   syntax. You can redefine `regerr' as appropriate for your application. */

public regerr MSG;
