# ------------------------------------------------------------------------------------
# configuration files for the program arka, 23 03 2001
# ------------------------------------------------------------------------------------
# FIRST PART: global configuration file

file_history_size=5
command_history_size=20

# ------------------------------------------------------------------------------------
# SECOND PART: global program descriptions
# This part contains descriptions of the command line utilities arka is supposed
# to run. You can create your own descriptions and either put them in this file,
# or in a file in your HOME directory -- $HOME/.arka/programs
# I guess the format is easy enough to figure out.

# rc program options file for create
# just a joke and example
# start(program)(gp_create)(create)(utilities/optional)(create a useful program written in C)
#	start(options)
#	(-a)()()(use pure ANSI C)
#	(-n)(int)(0)(maximal number of bugs. If 0, there is no limit)
#	(-h)()()(print a brief description of a program and exit)
#	(-b)(float)(1)(bugs / 100 lines of code)
#	(-t)()()(use telepathy to determine what program should be written)
#	()(hidden)(%input)(file with specification)(required)
#end(options)
#
#start(description)
#The "create" program takes as input a description of a program and creates
#a program written in C which does the specified task. If you have troubles
#writing a specification, you might want to use the -t option.
#
#Here is an example description:
#--example start here:
#Umm, I need, umm, a program for parsing rcfiles. You know what I mean.
#--end of the example input file
#end(description)
#
#end(program)

start(program)(gp_acc)(Auto-cross-corellation)(GP/DNA-RNA/statistics)(Encoding and autocrosscorelating sequences)
	start(options)
		(-e)()()(encode only, do not compute the ACC)
		(-l)(int)()(print a header with variable descriptions for sequence length equal to [value])
		(-p)(int)()(crossterm to position [value])
		(-q)()()(quiet, suppress error messages)
		()(invisible)(%input) 
	end(options)

	start(description)
		gp_acc turns the sequence into a multi-dimensional, numerical representation
		which can be used in further statistical analyses.
	end(description)
end(program)


start(program)(gp_dimer)(Dimer frequencies)(GP/DNA-RNA/statistics)(count frequencies of nucleotide pairs)
	start(options)
		(-l)(int)(0(distance between nucleotides)
		(-q)()()(quiet, suppress error messages)
		()(invisible)(%input) 
	end(options) 

	start(description)
		Calculate dimere /AA, AC, AG... TG, TT/ frequencies in a nucleotide
		sequence. If the first parameter /distance between nucleotides/ is greater
		then zero, it will calculate frequencies of patterns which have the form
		ANN..NNA, ANN..NNC, ANN..NNG, where NN..NN represents any nucleotide
		sequence of length determined by this parameter.
	end(description)
end(program)


start(program)(gp_pattern)(Pattern frequencies)(GP/DNA-RNA/statistics)(count frequencies of patterns in DNA/RNA sequences)
	start(options)
		(-l)(int)(3)(length of the pattern)
		(-q)()()(quiet, suppress error messages)
		()(invisible)(%input) 
	end(options)

	start(description)
		Calculate frequencies of nucleotide patterns. The only parameter sets the
		pattern length used.
	end(description)
end(program)


start(program)(gp_scan)(ACC scan)(GP/DNA-RNA/statistics)(Computate auto-cross-correlation profiles)
	start(options)
		(-l)(int)()(set lag to value)
		(-w)(int)()(set window width)
		(-s)()()(print also the sequence)
		(-p)()()(print also the lagged parameters)
		(-z)(int)()(value is the number of the first variable descriptor)
		(-Z)(int)()(value is the number of the second variable descriptor)
		()(invisible)(%input) 
	end(options)

	start(description)
		No description yet.
	end(description)
end(program)


start(program)(gp_tm)(Melting temperature)(GP/DNA-RNA/properties)(Determine the Tm of a sequence)
	start(options)
		(-t)()()(use the 4*GC+2*AT thumb rule)
		(-s)()()(use symmetry correction for self complementary molecules)
		(-M)(float)()(set nucleic acid concentration to [value] mM)
		(-c)(float)()(set salt concentration to [value] M)
		(-N)()()(show sequence names)
		()(invisible)(%input) 
	end(options)

	start(description)
		Calculates the melting temperature of a DNA sequence using the nearest neighbor
		method.
	end(description)
end(program)


start(program)(gp_cusage)(Codon usage)(GP/proteins)(Print out codon usage of sequence/s)
	start(options)
		(-c)(file)()(read the optional codon usage table)
		(-o)()()(print only the codon table)
		(-n)()()(output is a *N*icely formated codon table)
		(-u)()()(output is an *U*gly list of codons)
		()(invisible)(%input) 
	end(options)

	start(description)
		Print out codon usage of a DNA / RNA sequence encoding a protein.
	end(description)
end(program)


start(program)(gp_findorf)(Find ORFs)(GP/proteins)(Print ORFs found in a sequence)
	start(options)
		(-m)(int)(100)(set minimal ORF length to value)
		(-M)(int)()(set maximal ORF length to value [default - unlimited])
		(-o)(string)()(s: print the ORF protein sequences; n: show the ORF positions)
		(-c)(file)()(read the optional codon usage table)
#		(-p)(string)()(n: will print out a formated codon table & exit; u: will print out the codon table & exit)
		()(invisible)(%input) 
	end(options)

	start(description)
		Find ORFs in a DNA / RNA sequence
	end(description)
end(program)


start(program)(gp_matrix)(Promoter search)(GP/DNA-RNA)(Search for promoters using the Hertz matrix)
	start(options)
		(-M)()()(computate a new matrix)
		(-m)(file)()(take the matrix values from file m_file)
		(-G)(float)()(adjust matrix to GC contents of [value] %)
		(-g)()()(ignore gap penalties)
		(-t)()()(assume that the transcription start is experimentally defined)
		(-T)(float)()(set the treshold value (default -99))
		(-X)(string)()(limit=value. Set gap limits. 'limit' can be 'min1', 'max1', 'min10', 'max10'.)
		(-D)(file)()(read GC distribution from a file)
		(-N)()()(sequence names will be shown)
		(-S)()()(do not show the sequences)
		()(invisible)(%input) 
	end(options)

	start(description)
		Searches for promoters using a score matrix. For more details see Weiner et
		al., 2000, Nucleic Acids Res. 28:4488-4496
	end(description)
end(program)


start(program)(gp_primer)(Oligonucleotide properties)(GP/DNA-RNA/properties)(Calculate stem/loop and dimere structures of a primer)
	start(options)
		(-i)()()(print only the parameters for the single strongest structure)
		(-m)(int)()(set the maximal number of shown structures /0 for all/)
		()(invisible)(%input) 
	end(options)
	start(description)
		Calculates the stem and loop structures as well as possible dimeres for an
		oligonucleotide sequence in FASTA format. If the option -i is used, then for
		each sequence, you will get the kcal/mol for both the strongest dimere and
		the strongest stem & loop structure.
	end(description)
end(program)


start(program)(gp_seq2prot)(DNA to protein)(GP/DNA-RNA)(Convert DNA sequence to protein sequence)
	start(options)
		(-c)(file)()(read the optional codon usage table)
		(-l)()()(be liberal about stop / start codons)
		(-p)(string)()(n: will print out a nicely formated codon table & exit; u: will print out a simple codon table & exit)
		()(invisible)(%input) 
	end(options)

	start(description)
		Converts a DNA / RNA sequence into a protein sequence using the specified
		genetic code or standard if -c option is left empty.
	end(description)
end(program)


start(program)(gp_deform)(Deformability scan)(GP/DNA-RNA/properties)(deformability scan)
	start(options)

		(-w)(int)()(averaging window size)
		()(invisible)(%input) 
	end(options)

	start(description)
		Conveys a deformability scan of a DNA using a set of physical DNA
		parameters.
	end(description)
end(program)


start(program)(gp_gc)(GC contents)(GP/DNA-RNA/properties)(Determine the gc contents of sequence(s))
	start(options)
		(-t)()()(will calculate the total GC% of all read sequences)
		(-m)()()(will show only mean GC% of all read sequences)
		(-D)(int)()(for each sequence, a distribution of GC with window size equal to value is printed.)
		()(invisible)(%input) 
	end(options)

	start(description)
		Calculates the GC contents of DNA / RNA sequences.
	end(description)
end(program)


start(program)(gp_mkmtx)(Create a frequency matrix)(GP/DNA-RNA/statistics)(Computate a nucleotide frequency matrix)
	start(options)
		(-a)()()(Instead of frequencies, print just the numbers of occurences)
		(-g)(float)()(divide each frequency by the expected frequecy of the given nucleotide at the GC% [value])
		(-l)()()(do not apply a logarythmic scaling of the frequencies)
		()(invisible)(%input) 
	end(options)

	start(description)
		Create a frequency matrix of oligonucleotides at a given position in a set
		of sequences. Sequences are assumed to be co-aligned.
	end(description)
end(program)


start(program)(gp_qs)(Quick sequence search)(GP)(Look for a sequence in the input window)
	start(options)
		(-i)()()(query sequences are read from standard input)
		(-n)()()(allow not exact binding)
		()()()(sequence to search for; HUSAR wildcards /N,M,R etc./ are allowed)
		()(invisible)(%input) 
	end(options)

	start(description)
		Searches the input window for a specific sequence.
	end(description)
end(program)


start(program)(gp_shift)(Shift sequence positions)(GP/DNA-RNA)(Print shifted sequence positions)
	start(options)
		(-3)()()(shift relative to sequence 3' end)(required)
		()(invisible)(%input) 
	end(options)

	start(description)
	end(description)
end(program)


start(program)(gp_digest)(Restriction site analysis)(GP/DNA-RNA/properties)(Restriction site analysis)
	start(options)
		(-e)(file)()(use alternate enzyme file)
		(-o)(string)()(output type: a: ascii; p: position; s: sequences; f: fragments; l: lengths; n: number)
		()(string)()(enzyme name)(required)
		()(invisible)(%input) 
	end(options)

	start(description)
	end(description)
end(program)


start(program)(gp_getseq)(Cut out sequence fragments)(GP/DNA-RNA)(Get a sequence fragment)
	start(options)
#		(-r)()()(only reverse the sequence)
#		(-l)(file)()(take a list of sequences from file [name] (use '-' for standard input))
		(-n)(string)()(use alternate name for the sequence)
		()(int)()(from...)(required)
		()(int)()(...to)(required)
		()(invisible)(%input) 
	end(options)

	start(description)
		Given positions "start" and "end", for each sequence in the input window,
		this program will print out the sequence which lies between these two
		position. For example, if start is "10" and end is "20", the program will
		print out the nucleotides starting from the 10th nucleotide and ranging to
		the 20th nucleotide -- and this for each single sequence found in the input
		window.
	end(description)
end(program)


start(program)(gp_randseq)(Random sequences)(GP/DNA-RNA)(Generate random sequences)
	start(options)
		(-l)(int)()(random sequence length)
		(-n)(int)()(number of generated sequences)
		()(invisible)(%input) 
	end(options)

	start(description)
		Program for producing random sequences. From the sequence found in the input
		window, gp_randseq will cut out random sequence fragments of the specified
		length.
	end(description)
end(program)


start(program)(gp_slen)(Sequence length)(GP/DNA-RNA/statistics)(Print out sequence length/s)
	start(options)
		(-m)()()(print mean length and SE)
		(-v)()()(print version information & exit)
		()(invisible)(%input) 
	end(options)

	start(description)
		Print sequence length.
	end(description)
end(program)


# ---------------------------------------------------------------------------
# Some GNU utilities


start(program)(wc)(words count)(Unix utilities/text)(count words / lines / characters in a file)
	start(options)
		(-c)()()(count characters / bytes)
		(-l)()()(count lines)
		(-L)()()(count the length of the longest line)
		(-w)()()(count the number of words)
		()(invisible)(%input)(input)
	end(options)
	start(description)
	end(description)
end(program)


start(program)(paste)(paste)(Unix utilities/text)(merge lines of files)
	start(options)
		(--serial)()()(serial: paste one file at a time instead of in parallel)
		(-d)(string)()(select delimiters other then TABs)
		()(string)(%input)(file list; use %input for the input window)
	end(options)
	start(description)
Write lines consisting of the sequentially corresponding lines from each FILE,
separated by TABs, to standard  output. With no FILE, or when FILE is -, read
standard input.

	end(description)
end(program)


start(program)(fold)(Wrap lines)(Unix utilities/text)(wrap each input line to fit in specified width)
	start(options)
		(-b)()()(Count bytes rather then columns)
		(-s)()()(Break at spaces)
		(-w)(int)()(Use <width> rather then 80 columns)
		()(invisible)(%input)
	end(options)
	start(description)
	Wrap input lines in each FILE (standard input by default), 
	writing to standard output.
	end(description)
end(program)

start(program)(tsort)(Topological sort)(Unix utilities/text)(Perform topological sort)
	start(options)
		()(invisible)(%input)
	end(options)
	start(description)
       Write  totally  ordered  list  consistent with the partial
       ordering in FILE.  With no FILE, or when FILE is  -,  read
       standard input.
	end(description)
end(program)


start(program)(nl)(Number lines)(Unix utilities/text)(Number lines of files)
	start(options)
		(-b)(string)()(Body numbering is <style>)
		(-d)(string)()(Use <CC> to separate logical pages)
		(-f)(string)()(Footer numbering is <style>)
		(-h)(string)()(Header numbering is <style>)
		(-i)(int)()(Line number increment on each line)
		(-l)(int)()(Group of <number> empty lines counted as one)
		(-n)(string)()(Insert line numbers according to <format>)
		(-p)()()(Do not reset line numbering at logical pages)
		(-s)(string)()(Add <string> after /possible/ line number)
		(-v)(int)()(First line number on each logical page)
		(-w)(int)()(Use <number) columns for line numbers)
		()(invisible)(%input)
	end(options)
	start(description)
       By  default,  selects  -v1  -i1 -l1 -sTAB -w6 -nrn -hn -bt
       -fn.  CC are two delimiter characters for separating logi
       cal  pages,  a missing second character implies :.  Type \
       for .  STYLE is one of:

       a      number all lines

       t      number only nonempty lines

       n      number no lines

       pREGEXP
              number only lines that contain a match for REGEXP

       FORMAT is one of:

       ln     left justified, no leading zeros

       rn     right justified, no leading zeros

       rz     right justified, leading zeros
	end(description)
end(program)

start(program)(find)(Find files)(Unix utilities)(Find files and directories)
	start(options)
		()(file)()(start searching in this directory)
		(-maxdepth)(int)()(descend at most <value> levels)
		(-mindepth)(int)()(do not apply any tests or actions at levels below <value>)
		(-name)(string)()(Base of the file name matches the <pattern>)
		(-newer)(file)()(file was modified more recently then <file>)
		(-exec)(string)()(execute command /remember to add a semicolon -- ';' at the end!)
		(-print)(invisible)( )()
	end(options)
	start(description)
		A shorthand GUI for the GNU version of find program.
	end(description)
end(program)


start(program)(grep)(grep)(Unix utilities)(Print lines matching a pattern)
	start(options)
		(-A)(int)()(Print <num> lines of trailing context after matching lines)
		(-B)(int)()(Print <num> lines of leading context before matching lines)
		(-C)(int)()(Print <num> lines of output context)
		(-c)()()(Suppress normal output; instead print a count of matching lines)
		(-i)()()(Ignore case)
		(-v)()()(Invert the sense of matching to select non-matching lines) 
		(-w)()()(Select only those lines containing matches that form whole words)
		(-x)()()(Select only those matches that exactly match the whole line)
		(-e)(string)()(Pattern to search for)
		()(invisible)(%input)
	end(options)
	start(description)
		A basic GUI interface for the GNU grep program.
	end(description)
end(program)


start(program)(egrep)(egrep)(Unix utilities)(Print lines matching a pattern)
	start(options)
		(-A)(int)()(Print <num> lines of trailing context after matching lines)
		(-B)(int)()(Print <num> lines of leading context before matching lines)
		(-C)(int)()(Print <num> lines of output context)
		(-c)()()(Suppress normal output; instead print a count of matching lines)
		(-i)()()(Ignore case)
		(-v)()()(Invert the sense of matching to select non-matching lines) 
		(-w)()()(Select only those lines containing matches that form whole words)
		(-x)()()(Select only those matches that exactly match the whole line)
		(-e)(string)()(Pattern to search for)
		()(invisible)(%input)
	end(options)
	start(description)
		A basic GUI interface for the GNU egrep program.
		In contrast to grep, egrep uses extended unix regular expressions.
	end(description)
end(program)



# -------------------------------------------------------------------------
# FASTA package programs
# -------------------------------------------------------------------------

start(program)(fasta)(fasta)(FASTA)(scan a protein or DNA sequence library for similar sequences)
	start(options)
		(-a)()()(Show the sequneces in their entirety)
		(-A)()()(Force the use of the slower unlimited Smith-Waterman alignement)
		(-b)(int)()(Number of similarity scores to show)
#		(-c)(int)()()
		(-d)(int)()(Number of top alignements to be shown)
		(-E)(float)()(Expectation value treshold)
		(-f)(float)()(Penalty for the first residue in a gap)
		(-g)(float)()(Penalty for additional residues in a gap)
#		(-h)(float)()(Penalty for a +1 / -1 frameshift)
		(-H)()()(Do not display histogram for similarity scores)
		(-i)()()(Search with the reverse-complement of a sequence)
#		(-k)(float)()()
		(-l)(file)()(The name of the library menu file)
#		(-L)()()(Display more information about the library sequence in the alignment)
#		(-m)(int)()(Alternate display of matches and mismatches /see man page/)
#		(-n)()()(Force the query sequence to be treated as DNA sequence)
		(-Q)(invisible)( )
#		(-r)()()()
#		(-s)(file)()(Name of the alternate matrix file)
		(-w)(int)()(Output line length /default 60, maximal 200/)
		(-x)(string)()("offset1 offset2" start numbering with offset1 and offset2)
#		(-y)(int)()(Set the bandwidth used for optimisation)
		(-z)()()(Do not do statistical significance calculation)
#		(-3)()()(Translate only the three forward frames)
		()(string)()(Additional options you'd like to add)
		()(invisible)(%input)
		()(file)()(Query database file)

	end(options)
	start(description)
fasta  is used to compare a protein or DNA sequence to all
of the entries in a sequence library.  For example,  fasta
can  compare a protein sequence to all of the sequences in
the NBRF PIR protein sequence database.  fasta will  auto
matically decide whether the query sequence is DNA or pro
tein by reading the query sequence as protein  and  deter
mining  whether  the `amino-acid composition' is more than
85% A+C+G+T.  fasta uses an improved version of the  rapid
sequence  comparison  algorithm  described  by  Lipman and
Pearson (Science, (1985) 227:1427) that  is  described  in
Pearson and Lipman, Proc. Natl. Acad. USA, (1988) 85:2444.
	end(description)
end(program)
#

start(program)(fastx)(fastx)(FASTA)(compare a DNA sequence to a protein sequence database)
	start(options)
		(-a)()()(Show the sequneces in their entirety)
		(-A)()()(Force the use of the slower unlimited Smith-Waterman alignement)
		(-b)(int)()(Number of similarity scores to show)
#		(-c)(int)()()
		(-d)(int)()(Number of top alignements to be shown)
		(-E)(float)()(Expectation value treshold)
		(-f)(float)()(Penalty for the first residue in a gap)
		(-g)(float)()(Penalty for additional residues in a gap)
#		(-h)(float)()(Penalty for a +1 / -1 frameshift)
		(-H)()()(Do not display histogram for similarity scores)
		(-i)()()(Search with the reverse-complement of a sequence)
#		(-k)(float)()()
		(-l)(file)()(The name of the library menu file)
#		(-L)()()(Display more information about the library sequence in the alignment)
#		(-m)(int)()(Alternate display of matches and mismatches /see man page/)
#		(-n)()()(Force the query sequence to be treated as DNA sequence)
		(-Q)(invisible)( )
#		(-r)()()()
#		(-s)(file)()(Name of the alternate matrix file)
		(-w)(int)()(Output line length /default 60, maximal 200/)
		(-x)(string)()("offset1 offset2" start numbering with offset1 and offset2)
#		(-y)(int)()(Set the bandwidth used for optimisation)
		(-z)()()(Do not do statistical significance calculation)
#		(-3)()()(Translate only the three forward frames)
		()(string)()(Additional options you'd like to add)
		()(invisible)(%input)
		()(file)()(Query database file)

	end(options)
	start(description)
fastx compares a DNA sequence to a protein sequence database, translating the
DNA sequence in three frames and allowing frameshifts in the alignment.
	end(description)
end(program)
#



start(program)(tfasta)(tfasta)(FASTA)(compare a protein sequence to a DNA sequence library, translating the DNA sequence library `on-the-fly')
	start(options)
		(-a)()()(Show the sequneces in their entirety)
		(-A)()()(Force the use of the slower unlimited Smith-Waterman alignement)
		(-b)(int)()(Number of similarity scores to show)
#		(-c)(int)()()
		(-d)(int)()(Number of top alignements to be shown)
		(-E)(float)()(Expectation value treshold)
		(-f)(float)()(Penalty for the first residue in a gap)
		(-g)(float)()(Penalty for additional residues in a gap)
		(-h)(float)()(Penalty for a +1 / -1 frameshift)
		(-H)()()(Do not display histogram for similarity scores)
		(-i)()()(Search with the reverse-complement of a sequence)
#		(-k)(float)()()
		(-l)(file)()(The name of the library menu file)
#		(-L)()()(Display more information about the library sequence in the alignment)
#		(-m)(int)()(Alternate display of matches and mismatches /see man page/)
#		(-n)()()(Force the query sequence to be treated as DNA sequence)
		(-Q)(invisible)( )
#		(-r)()()()
#		(-s)(file)()(Name of the alternate matrix file)
		(-w)(int)()(Output line length /default 60, maximal 200/)
#		(-x)(string)()("offset1 offset2" start numbering with offset1 and offset2)
#		(-y)(int)()(Set the bandwidth used for optimisation)
		(-z)()()(Do not do statistical significance calculation)
		(-3)()()(Translate only the three forward frames)
		()(string)()(Additional options you'd like to add)
		()(invisible)(%input)
		()(file)()(Query database file)

	end(options)
	start(description)
tfasta compares a protein sequence to a DNA sequence database, translating the
DNA sequence library in 6 frames `on-the-fly' (3 frames with the -3 option). The
search uses the standard BLOSUM50 scoring matrix, and uses a ktup=2 by default.
	end(description)
end(program)
#

start(program)(tfastx)(tfastx)(FASTA)(compare a protein sequence to a DNA sequence library, translating the DNA sequence library `on-the-fly')
	start(options)
		(-a)()()(Show the sequneces in their entirety)
		(-A)()()(Force the use of the slower unlimited Smith-Waterman alignement)
		(-b)(int)()(Number of similarity scores to show)
#		(-c)(int)()()
		(-d)(int)()(Number of top alignements to be shown)
		(-E)(float)()(Expectation value treshold)
		(-f)(float)()(Penalty for the first residue in a gap)
		(-g)(float)()(Penalty for additional residues in a gap)
		(-h)(float)()(Penalty for a +1 / -1 frameshift)
		(-H)()()(Do not display histogram for similarity scores)
		(-i)()()(Search with the reverse-complement of a sequence)
#		(-k)(float)()()
		(-l)(file)()(The name of the library menu file)
#		(-L)()()(Display more information about the library sequence in the alignment)
#		(-m)(int)()(Alternate display of matches and mismatches /see man page/)
#		(-n)()()(Force the query sequence to be treated as DNA sequence)
		(-Q)(invisible)( )
#		(-r)()()()
#		(-s)(file)()(Name of the alternate matrix file)
		(-w)(int)()(Output line length /default 60, maximal 200/)
#		(-x)(string)()("offset1 offset2" start numbering with offset1 and offset2)
#		(-y)(int)()(Set the bandwidth used for optimisation)
		(-z)()()(Do not do statistical significance calculation)
		(-3)()()(Translate only the three forward frames)
		()(string)()(Additional options you'd like to add)
		()(invisible)(%input)
		()(file)()(Query database file)

	end(options)
	start(description)
tfasta compares a protein sequence to a DNA sequence database, translating the
DNA sequence library in 6 frames `on-the-fly' (3 frames with the -3 option). The
search uses the standard BLOSUM50 scoring matrix, and uses a ktup=2 by default.
tfastx, like tfasta, compares a protein sequence to a DNA sequence library.
However, tfastx compares the protein sequence to the forward and reverse
three-frame translation of the DNA library sequence, allowing for frameshifts.
	end(description)
end(program)
#

start(program)(align)(align)(FASTA/)(compute the global alignment of two protein or DNA sequences)
	start(options)
		(-f)(float)()(Penalty for the first residue in a gap /-12 by default/)
		(-g)(float)()(Penalty for additional residues in a gap /-2 by default/)
		(-m)(int)()(Alternate display of matches and mismatches in alignments)
		(-s)(file)()(The filename of an alternative scoring matrix file)
		(-w)(int)()(Output line length for sequence alignments /default 60/)
		()(invisible)(%input)()
		()(file)()(File containing the second sequence)
	end(options)
	start(description)
align produces an optimal  global  alignment  between  two
protein or DNA sequences.  align will automatically decide
whether the query sequence is DNA or  protein  by  reading
the  query sequence as protein and determining whether the
`amino-acid composition' is more than 85% A+C+G+T.   align
uses a modification of the algorithm described by E. Myers
and W. Miller in  "Optimal  Alignments  in  Linear  Space"
CABIOS  (1988) 4:11-17.
	end(description)
end(program)
#
#

start(program)(align0)(align0)(FASTA/)(compute the global alignment of two protein or DNA sequences without penalizing for end-gaps)
	start(options)
		(-f)(float)()(Penalty for the first residue in a gap /-12 by default/)
		(-g)(float)()(Penalty for additional residues in a gap /-2 by default/)
		(-m)(int)()(Alternate display of matches and mismatches in alignments)
		(-s)(file)()(The filename of an alternative scoring matrix file)
		(-w)(int)()(Output line length for sequence alignments /default 60/)
		()(invisible)(%input)()
		()(file)()(File containing the second sequence)
	end(options)
	start(description)
align produces an optimal  global  alignment  between  two
protein or DNA sequences.  align will automatically decide
whether the query sequence is DNA or  protein  by  reading
the  query sequence as protein and determining whether the
`amino-acid composition' is more than 85% A+C+G+T.   align
uses a modification of the algorithm described by E. Myers
and W. Miller in  "Optimal  Alignments  in  Linear  Space"
CABIOS  (1988) 4:11-17.

align0 uses the same algorithm, but does  not  weight  end
gaps.  Sometimes this can have surprising effects.
	end(description)
end(program)
#

start(program)(lalign)(lalign)(FASTA/)(compare two protein or DNA sequences for local similarity and show the local sequence alignments)
	start(options)
    (-f)(float)()(Penalty for the first residue in a gap /-14 by default/)
    (-g)(float)()(Penalty for additional residues in a gap /-4 by default/)
    (-m)(int)()(Alternate display of matches and mismatches in alignments)
    (-s)(file)()(The filename of an alternative scoring matrix file)
    (-w)(int)()(Output line length for sequence alignments /default 60/)
	end(options)
	start(description)
lalign  and plalign programs compare two sequences looking
for local sequence similarities.  lalign/plalign use  code
developed  by  X.  Huang  and  W. Miller (Adv. Appl. Math.
(1991) 12:337-357) for the "sim" program.   While  ssearch
reports only the best alignment between the query sequence
and the library sequence, lalign and plalign will report a
specified number of alignments (the default is 10) between
the two sequences lalign shows the actual local alignments
between  the two sequences and their scores, while plalign
produces a plot of the alignments that looks similar to  a
`dot-matrix'  homology  plot.   On Unixtm systems, plalign
generates postscript output.
	end(description)
end(program)
#

start(program)(prdf)(prdf)(FASTA/)(test a protein sequence similarity for significance)
	start(options)
    (-f)(float)()(Penalty for the first residue in a gap /-12 by default/)
    (-g)(float)()(Penalty for additional residues in a gap /-2 by default/)
		(-h)()()(Do not display histogram of similarity scores)
		(-k)()()(Sets the treshold for joining the initial regions for calculating the initn score)
		(-Q)(invisible)( )
    (-m)(int)()(Alternate display of matches and mismatches in alignments)
    (-s)(file)()(The filename of an alternative scoring matrix file)
		()(invisible)(%input)()
		()(file)()(File containing the second protein sequence)

	end(options)
	start(description)
prdf is used to evaluate the  significance  of  a  protein
sequence  similarity  score by comparing two sequences and
calculating initial and optimized similarity  scores,  and
then  repeatedly shuffling the second sequence, and calcu
lating the initial and optimized  scores.   Extreme  value
distributions  are then fit to each of the three distribu
tions of scores.  The  characteristic  parameters  of  the
extreme  value  distribution are then used to estimate the
probability that each of the  unshuffled  sequence  scores
would  be obtained by chance in one sequence, or in a num
ber of sequences equal to the number  of  shuffles.   This
program is derived from rdf2, which was described by Pear
son and Lipman,  PNAS  (1988)  85:2444-2448,  and  Pearson
(Meth. Enz.  183:63-98).  Use of the extreme value distri
bution for  estimating  the  probabilities  of  similarity
scores  was  described  by Altshul and Karlin, PNAS (1990)
87:2264-2268.  The 'z-values' calculated by rdf2  are  not
as informative as the P-values and expectations calculated
by prdf.
	end(description)
end(program)
#

#start(program)(randseq)(randseq)(FASTA/)(randomly shuffle a protein or DNA sequence)
#	start(options)
#		(-w)(int)()(use local window for shuffle with window size <value>)
#		()(invisible)(%input)()
#
#	end(options)
#	start(description)
#randseq  produces  a  shuffled  amino acid or DNA sequence
#with  the  same  length  and  composition  as  the   input
#sequence.   randseq  uses  the same shuffling algorithm as
#prss and prdf.  Shuffled sequences can be used to evaluate
#the  accuracy  of  the  statistical  estimates produced by
#BLAST, FASTA, or SSEARCH.  Searches done with  a  randomly
#shuffled  sequence should have an expectation value around
#1.0 (BLASTP p-value = 0.63).
#	end(description)
#end(program)
#

#
#start(program)()()(FASTA/)()
#	start(options)
#		()()()()
#	end(options)
#	start(description)
#	end(description)
#end(program)
#

# -------------------------------------------------------------------------
# WU-BLAST programs
# -------------------------------------------------------------------------


start(program)(blastn)(blastn)(WU-BLAST/full options)(find sequence similarities)
	start(options)
		(-sump)()()(Karlin-Altschul "Sum" statistics, the default)
		(-poissonp)()()(use Poisson statistics to evaluate multiple HSPs)
		(-compat1.4)()()(revert to BLAST version 1.4 behavior /with bug fixes/)
		(-compat1.3)()()(revert to BLAST version 1.3 behavior /with bug fixes/)
		(-top)()()(search only the top strand of the query)
		(-bottom)()()(search only the bottom strand of the query)
		(-filter)(string)()(method, e.g., methods of "seg", "xnu", or "seg+xnu")
		(-echofilter)()()(display the filtered query sequence)
		(-stats)()()(collect word-hit statistics /consumes cpu time/)
		(-ctxfactor)(int)()(base statistics on this no. of independent contexts/frames)
		(-postsw)()()(perform full Smith-Waterman before output)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-gapall)()()(run gapped alignment procedure on all HSPs /slower/)
		(-gapw)(int)()(band width /full/ for gapped alignment procedure)
		(-gapS2)(int)()(cutoff score for saving gapped alignments)
		(-gapdecayrate)(int)()(gap decay rate)
		(-olf)(int)()(max. fractional length of overlap for HSP consistency)
		(-golf)(int)()(max. fractional length overlap for GSP consistency)
		(-span2)()()(span 2 /the default/)
		(-span1)()()(span 1)
		(-span)()()(span)
		(-prune)()()(prune)
		(-edegrade)(int)()(only show HSP combos within this factor of the best combo)
		(-consistency)()()(turn off HSP consistency rules for statistics)
		(-hspsepqmax)(int)()(max. separation allowed between HSPs along query)
		(-hspsepsmax)(int)()(max. separation allowed between HSPs along subject)
		(-gapsepqmax)(int)()(max. separation allowed between GSPs along query)
		(-gapsepsmax)(int)()(max. separation allowed between GSPs along subject)
		(-matrix)(file)()(specify a scoring matrix file)
		(-altscore)(string)()(format: qc,sc,score; qc or sc may be "any", score may be "min", "max", or "na")
		(-hspmax)(int)()(max. no. of HSPs per db seq, default 1000)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-gi)()()(display gi identifiers, when available)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-qtype)()()(exit non-zero if query seems to be of wrong type)
		(-qres)()()(exit non-zero if query contains an invalid residue code)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-mmio)()()(do not use memory-mapped I/O)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
#	-progress #  report progress of search at least this often (seconds)
#	-asn1        produce ASN.1 "print-value" output (currently broken)
#	-asn1bin     produce binary-encoded ASN.1 output (currently broken)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.

Notice:  this program and its default parameter settings are optimized to find
nearly identical sequences rapidly.  To identify weak similarities encoded in
nucleic acid, use WU-BLASTX, TBLASTN or TBLASTX.

	end(description)
end(program)


start(program)(blastp)(blastp)(WU-BLAST/full options)(find protein sequence similarities)
	start(options)
		(-sump)()()(Karlin-Altschul "Sum" statistics, the default)
		(-poissonp)()()(use Poisson statistics to evaluate multiple HSPs)
		(-compat1.4)()()(revert to BLAST version 1.4 behavior /with bug fixes/)
		(-compat1.3)()()(revert to BLAST version 1.3 behavior /with bug fixes/)
		(-filter)(string)()(method, e.g., methods of "seg", "xnu", or "seg+xnu")
		(-echofilter)()()(display the filtered query sequence)
		(-stats)()()(collect word-hit statistics /consumes cpu time/)
		(-ctxfactor)(int)()(base statistics on this no. of independent contexts/frames)
#		(-postsw)()()(perform full Smith-Waterman before output)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-gapall)()()(run gapped alignment procedure on all HSPs /slower/)
		(-gapw)(int)()(band width /full/ for gapped alignment procedure)
		(-gapS2)(int)()(cutoff score for saving gapped alignments)
		(-gapdecayrate)(int)()(gap decay rate)
		(-olf)(int)()(max. fractional length of overlap for HSP consistency)
		(-golf)(int)()(max. fractional length overlap for GSP consistency)
		(-span2)()()(span 2 /the default/)
		(-span1)()()(span 1)
		(-span)()()(span)
		(-prune)()()(prune)
		(-edegrade)(int)()(only show HSP combos within this factor of the best combo)
		(-consistency)()()(turn off HSP consistency rules for statistics)
		(-hspsepqmax)(int)()(max. separation allowed between HSPs along query)
		(-hspsepsmax)(int)()(max. separation allowed between HSPs along subject)
		(-gapsepqmax)(int)()(max. separation allowed between GSPs along query)
		(-gapsepsmax)(int)()(max. separation allowed between GSPs along subject)
		(-matrix)(file)()(specify a scoring matrix file)
		(-altscore)(string)()(format: qc,sc,score; qc or sc may be "any", score may be "min", "max", or "na")
		(-hspmax)(int)()(max. no. of HSPs per db seq, default 1000)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-gi)()()(display gi identifiers, when available)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-nwstart)(int)()(start generating neighborhood words here in query)
		(-nwlen)(int)()(generate neighborhood words over this distance in query)
		(-qtype)()()(exit non-zero if query seems to be of wrong type)
		(-qres)()()(exit non-zero if query contains an invalid residue code)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-mmio)()()(do not use memory-mapped I/O)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
#	-progress #  report progress of search at least this often (seconds)
#	-asn1        produce ASN.1 "print-value" output (currently broken)
#	-asn1bin     produce binary-encoded ASN.1 output (currently broken)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.
	end(description)
end(program)


start(program)(blastx)(blastx)(WU-BLAST/full options)(find protein sequence similarities)
	start(options)
		(-sump)()()(Karlin-Altschul "Sum" statistics, the default)
		(-poissonp)()()(use Poisson statistics to evaluate multiple HSPs)
		(-compat1.4)()()(revert to BLAST version 1.4 behavior /with bug fixes/)
		(-compat1.3)()()(revert to BLAST version 1.3 behavior /with bug fixes/)
		(-filter)(string)()(method, e.g., methods of "seg", "xnu", or "seg+xnu")
		(-echofilter)()()(display the filtered query sequence)
		(-stats)()()(collect word-hit statistics /consumes cpu time/)
		(-ctxfactor)(int)()(base statistics on this no. of independent contexts/frames)
#		(-postsw)()()(perform full Smith-Waterman before output)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-gapall)()()(run gapped alignment procedure on all HSPs /slower/)
		(-gapw)(int)()(band width /full/ for gapped alignment procedure)
		(-gapS2)(int)()(cutoff score for saving gapped alignments)
		(-gapdecayrate)(int)()(gap decay rate)
		(-olf)(int)()(max. fractional length of overlap for HSP consistency)
		(-golf)(int)()(max. fractional length overlap for GSP consistency)
		(-span2)()()(span 2 /the default/)
		(-span1)()()(span 1)
		(-span)()()(span)
		(-prune)()()(prune)
		(-edegrade)(int)()(only show HSP combos within this factor of the best combo)
		(-consistency)()()(turn off HSP consistency rules for statistics)
		(-hspsepqmax)(int)()(max. separation allowed between HSPs along query)
		(-hspsepsmax)(int)()(max. separation allowed between HSPs along subject)
		(-gapsepqmax)(int)()(max. separation allowed between GSPs along query)
		(-gapsepsmax)(int)()(max. separation allowed between GSPs along subject)
		(-matrix)(file)()(specify a scoring matrix file)
		(-altscore)(string)()(format: qc,sc,score; qc or sc may be "any", score may be "min", "max", or "na")
		(-hspmax)(int)()(max. no. of HSPs per db seq, default 1000)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-gi)()()(display gi identifiers, when available)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-nwstart)(int)()(start generating neighborhood words here in query)
		(-nwlen)(int)()(generate neighborhood words over this distance in query)
		(-qtype)()()(exit non-zero if query seems to be of wrong type)
		(-qres)()()(exit non-zero if query contains an invalid residue code)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-mmio)()()(do not use memory-mapped I/O)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		(-top)()()(search only the top strand of the query)
		(-bottom)()()(search only the bottom strand of the query)
#	-progress #  report progress of search at least this often (seconds)
#	-asn1        produce ASN.1 "print-value" output (currently broken)
#	-asn1bin     produce binary-encoded ASN.1 output (currently broken)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.
Gish, Warren and David J. States (1993).  Identification of protein coding
regions by database similarity search.  Nat. Genet. 3:266-72.

Notice:  statistical significance is estimated under the assumption that the
equivalent of one entire reading frame in the query sequence codes for protein
and that significant alignments will involve only coding reading frames.
	end(description)
end(program)


start(program)(tblastn)(tblastn)(WU-BLAST/full options)(find sequence similarities)
	start(options)
		(-sump)()()(Karlin-Altschul "Sum" statistics, the default)
		(-poissonp)()()(use Poisson statistics to evaluate multiple HSPs)
		(-compat1.4)()()(revert to BLAST version 1.4 behavior /with bug fixes/)
		(-compat1.3)()()(revert to BLAST version 1.3 behavior /with bug fixes/)
		(-dbtop)()()(search only the top strand of the database)
		(-dbbottom)()()(search only the bottom strand of the database)
		(-dbcode)(int)()(specify a genetic code for the database)
		(-filter)(string)()(method, e.g., methods of "seg", "xnu", or "seg+xnu")
		(-echofilter)()()(display the filtered query sequence)
		(-stats)()()(collect word-hit statistics /consumes cpu time/)
		(-ctxfactor)(int)()(base statistics on this no. of independent contexts/frames)
#		(-postsw)()()(perform full Smith-Waterman before output)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-gapall)()()(run gapped alignment procedure on all HSPs /slower/)
		(-gapw)(int)()(band width /full/ for gapped alignment procedure)
		(-gapS2)(int)()(cutoff score for saving gapped alignments)
		(-gapdecayrate)(int)()(gap decay rate)
		(-olf)(int)()(max. fractional length of overlap for HSP consistency)
		(-golf)(int)()(max. fractional length overlap for GSP consistency)
		(-span2)()()(span 2 /the default/)
		(-span1)()()(span 1)
		(-span)()()(span)
		(-prune)()()(prune)
		(-edegrade)(int)()(only show HSP combos within this factor of the best combo)
		(-consistency)()()(turn off HSP consistency rules for statistics)
		(-hspsepqmax)(int)()(max. separation allowed between HSPs along query)
		(-hspsepsmax)(int)()(max. separation allowed between HSPs along subject)
		(-gapsepqmax)(int)()(max. separation allowed between GSPs along query)
		(-gapsepsmax)(int)()(max. separation allowed between GSPs along subject)
		(-matrix)(file)()(specify a scoring matrix file)
		(-altscore)(string)()(format: qc,sc,score; qc or sc may be "any", score may be "min", "max", or "na")
		(-hspmax)(int)()(max. no. of HSPs per db seq, default 1000)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-gi)()()(display gi identifiers, when available)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-qtype)()()(exit non-zero if query seems to be of wrong type)
		(-qres)()()(exit non-zero if query contains an invalid residue code)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-mmio)()()(do not use memory-mapped I/O)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
#	-progress #  report progress of search at least this often (seconds)
#	-asn1        produce ASN.1 "print-value" output (currently broken)
#	-asn1bin     produce binary-encoded ASN.1 output (currently broken)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.

Notice:  this program and its default parameter settings are optimized to find
nearly identical sequences rapidly.  To identify weak similarities encoded in
nucleic acid, use WU-BLASTX, TBLASTN or TBLASTX.

	end(description)
end(program)


start(program)(tblastx)(tblastx)(WU-BLAST/full options)(find protein sequence similarities)
	start(options)
		(-sump)()()(Karlin-Altschul "Sum" statistics, the default)
		(-poissonp)()()(use Poisson statistics to evaluate multiple HSPs)
		(-compat1.4)()()(revert to BLAST version 1.4 behavior /with bug fixes/)
		(-compat1.3)()()(revert to BLAST version 1.3 behavior /with bug fixes/)
		(-filter)(string)()(method, e.g., methods of "seg", "xnu", or "seg+xnu")
		(-echofilter)()()(display the filtered query sequence)
		(-stats)()()(collect word-hit statistics /consumes cpu time/)
		(-ctxfactor)(int)()(base statistics on this no. of independent contexts/frames)
#		(-postsw)()()(perform full Smith-Waterman before output)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-gapall)()()(run gapped alignment procedure on all HSPs /slower/)
		(-gapw)(int)()(band width /full/ for gapped alignment procedure)
		(-gapS2)(int)()(cutoff score for saving gapped alignments)
		(-gapdecayrate)(int)()(gap decay rate)
		(-olf)(int)()(max. fractional length of overlap for HSP consistency)
		(-golf)(int)()(max. fractional length overlap for GSP consistency)
		(-span2)()()(span 2 /the default/)
		(-span1)()()(span 1)
		(-span)()()(span)
		(-prune)()()(prune)
		(-dbtop)()()(search only the top strand of the database)
		(-dbbottom)()()(search only the bottom strand of the database)
		(-dbcode)(int)()(specify a genetic code for the database)
		(-edegrade)(int)()(only show HSP combos within this factor of the best combo)
		(-consistency)()()(turn off HSP consistency rules for statistics)
		(-hspsepqmax)(int)()(max. separation allowed between HSPs along query)
		(-hspsepsmax)(int)()(max. separation allowed between HSPs along subject)
		(-gapsepqmax)(int)()(max. separation allowed between GSPs along query)
		(-gapsepsmax)(int)()(max. separation allowed between GSPs along subject)
		(-matrix)(file)()(specify a scoring matrix file)
		(-altscore)(string)()(format: qc,sc,score; qc or sc may be "any", score may be "min", "max", or "na")
		(-hspmax)(int)()(max. no. of HSPs per db seq, default 1000)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-gi)()()(display gi identifiers, when available)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-qtype)()()(exit non-zero if query seems to be of wrong type)
		(-qres)()()(exit non-zero if query contains an invalid residue code)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-mmio)()()(do not use memory-mapped I/O)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		(-top)()()(search only the top strand of the query)
		(-bottom)()()(search only the bottom strand of the query)
#	-progress #  report progress of search at least this often (seconds)
#	-asn1        produce ASN.1 "print-value" output (currently broken)
#	-asn1bin     produce binary-encoded ASN.1 output (currently broken)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.

Notice:  statistical significance is estimated under the assumption that the
equivalent of one entire reading frame of the query sequence and one entire
reading frame of the database code for protein and that significant
alignments will only involve coding reading frames.
	end(description)
end(program)


# WU-BLAST programs, short options 

start(program)(blastn)(blastn)(WU-BLAST/short options)(find sequence similarities)
	start(options)
		(-top)()()(search only the top strand of the query)
		(-bottom)()()(search only the bottom strand of the query)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.

Notice:  this program and its default parameter settings are optimized to find
nearly identical sequences rapidly.  To identify weak similarities encoded in
nucleic acid, use WU-BLAST/short optionsX, TBLASTN or TBLASTX.

	end(description)
end(program)


start(program)(blastp)(blastp)(WU-BLAST/short options)(find protein sequence similarities)
	start(options)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-qoffset)(int)()(adjust query coordinate numbering by this amount)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.
	end(description)
end(program)


start(program)(blastx)(blastx)(WU-BLAST/short options)(find protein sequence similarities)
	start(options)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		(-top)()()(search only the top strand of the query)
		(-bottom)()()(search only the bottom strand of the query)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.
Gish, Warren and David J. States (1993).  Identification of protein coding
regions by database similarity search.  Nat. Genet. 3:266-72.

Notice:  statistical significance is estimated under the assumption that the
equivalent of one entire reading frame in the query sequence codes for protein
and that significant alignments will involve only coding reading frames.
	end(description)
end(program)


start(program)(tblastn)(tblastn)(WU-BLAST/short options)(find sequence similarities)
	start(options)
		(-dbtop)()()(search only the top strand of the database)
		(-dbbottom)()()(search only the bottom strand of the database)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.

Notice:  this program and its default parameter settings are optimized to find
nearly identical sequences rapidly.  To identify weak similarities encoded in
nucleic acid, use WU-BLAST/short optionsX, TBLASTN or TBLASTX.

	end(description)
end(program)


start(program)(tblastx)(tblastx)(WU-BLAST/short options)(find protein sequence similarities)
	start(options)
		(-nogap)()()(turn off gapped alignment method, report only ungapped HSPs)
		(-dbtop)()()(search only the top strand of the database)
		(-dbbottom)()()(search only the bottom strand of the database)
		(-dbcode)(int)()(specify a genetic code for the database)
		(-dbrecmin)(int)()(starting database record number to search)
		(-dbrecmax)(int)()(ending database record number to search)
		(-gi)()()(display gi identifiers, when available)
		(-noseqs)()()(do not display sequence alignments -- abbreviated output)
		(-sort_by_pvalue)()()(sort by pvalue)
		(-sort_by_count)()()(sort by count)
		(-sort_by_highscore)()()(sort by highscore)
		(-sort_by_totalscore)()()(sort by total score)
		(-warnings)()()(suppress warning messages)
		(-errors)()()(suppress non-fatal error messages)
		(-top)()()(search only the top strand of the query)
		(-bottom)()()(search only the bottom strand of the query)
		()(file)()(sequence database /mandatory/)
		()(invisible)(%input)()
	end(options)
	start(description)

Reference:  Gish, Warren (1994-1997).  unpublished.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.
Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J.
Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol. 215:403-10.

Notice:  statistical significance is estimated under the assumption that the
equivalent of one entire reading frame of the query sequence and one entire
reading frame of the database code for protein and that significant
alignments will only involve coding reading frames.
	end(description)
end(program)


# other WU-BLAST utilities

start(program)(setdb)(setdb)(WU-BLAST/databases)(produce a protein database for BLAST from a file in FASTA format)
	start(options)
		(-t)(string)()(descriptive title for the database)
		(-l)()()(do not lock database files during processing)
		(-b)()()(lock database files but do not block if already locked)
		(-p)(int)()(periodicity /in seconds/ to warn of file lock contention)
		()(file)()(file containing sequences for the database)
	end(options)
	start(description)
	end(description)
end(program)


start(program)(pressdb)(pressdb)(WU-BLAST/databases)(produce a nt. sequence database for BLAST from a file in FASTA format)
	start(options)
		(-t)(string)()(descriptive title for the database)
		(-o)(file)()(create database named fname, truncating it if it already exists)
		(-a)(file)()(append to database named fname, creating it if necessary)
		(-c)(int)()(8-mer clean limit /not used by WU-BLASTN 1.4/)
		(-s)(string)()(seed for the random nucleotide generator)
		(-l)()()(do not lock database files during processing)
		(-b)()()(lock database files but do not block if already locked)
		(-p)(int)()(periodicity /in seconds/ to warn of file lock contention)
		()(file)()(file containing sequences for the database)
	end(options)
	start(description)
	end(description)
end(program)


start(program)(gb2fasta)(gb2fasta)(WU-BLAST/conversions)(convert a GenBank-format file into FASTA format)
	start(options)
		(-g)()()(omit NCBI gi identifiers)
		(-s)()()(use simple sequence identifiers LOCUS only)
		(-l)(string)()(label)
		()(invisible)(%input)
	end(options)
	start(description)
	end(description)
end(program)


start(program)(gt2fasta)(gt2fasta)(WU-BLAST/conversions)(convert a GenBank-format file into FASTA format)
	start(options)
		(-g)()()(omit NCBI gi identifiers)
		(-s)()()(use simple sequence identifiers LOCUS only)
		(-l)(string)()(label)
		()(invisible)(%input)
	end(options)
	start(description)
	end(description)
end(program)


start(program)(pir2fasta)(pir2fasta)(WU-BLAST/conversions)(convert a file in PIR format into FASTA format)
	start(options)
		(-s)()()(use simple sequence identifiers /ENTRY only/)
		()(invisible)(%input)
	end(options)
	start(description)
	end(description)
end(program)


start(program)(sp2fasta)(sp2fasta)(WU-BLAST/conversions)(convert files in SWISS-PROT or EMBL format into FASTA format)
	start(options)
		(-g)()()(omit NCBI gi identifiers)
		(-s)()()(use simple sequence identifiers /ID line only/)
		(-l)(string)()(label)
		()(invisible)(%input)
	end(options)
	start(description)
	end(description)
end(program)



start(program)(pam)(pam)(WU-BLAST)(Generate a PAM matrix)
	start(options)
		(-s)(float)()(optional scale for the log-odds matrix in the range 0. < scale <= 1000)
		(-x)(int)()(the substitution value for X with any other letter)
		(-p)(int)()( no. digits of precision to report /0-4/)
	end(options)
	start(description)
	end(description)
end(program)


