#!/usr/bin/gawk -f #@ This program came from: ftp://ftp.armory.com/pub/scripts/proctree #@ Look there for the latest version. #@ If you don't find it, look through http://www.armory.com/~ftp/ # # @(#) Show process tree ( 1.7 2003-04-25 ) # 1996-05-29 john h. dubois iii (john@armory.com) # 1996-08-27 Make field list case insensitive # 1996-12-01 Print tree in a nice format. Changed -w to -C; added new -was. # 1997-04-15 Let command names be given as args, not just with -n # 1997-07-16 1.4 Added AO options. Made "both" be default; removed b option. # 1997-07-19 1.4.1 Added header & HI options. # 1998-03-25 1.5 Added Ve options. Changed interpretation of non-option names. # 1998-12-09 1.6 Added Tp options. # 2003-04-25 1.7 Use new getPS(). BEGIN { SUBSEP = "," # To make debugging printout clearer Name = "proctree" Usage = "Usage:\n"\ Name " [-hHrsacANVp] [-w] [-i] [-C] [-t]\n"\ "[-T] "\ "[-n] [-u] [-[eP]] [-I]\n"\ " [-d] [-O] [process-ID|name ...]" rcFile = ".proctree" ARGC = Opts(Name,Usage,"i>CsadT;p",0, "~/" rcFile ":$HOME/" rcFile, "INDENT,COLUMNS,FIELDS,EXTRAFIELDS,USERS,TTYS,NAMES,CHILDREN,PARENTS,"\ "WIDTH,SPACES,ASCII,ANCESTORS,OFFSPRING,MAXEQUINDENT,NOHEADER,VERBOSE", 0,"N",0,"","I,w;P,e") nIndent = 2 # Default indent is 2 spaces # Put USER first because unlike PID it is left-adjusted, so the indenting # is consistent. F1 = "USER,PID,TTY" F2 = "ARGS" FieldList = F1 "," F2 if ("h" in Options) { printf \ "%s: Print a process tree.\n"\ "%s\n"\ "%s prints a tree of processes executing on the system, arranged according\n"\ "to parent-child relationships. Each process is printed, followed by an\n"\ "indented list of its children, then a further indented list of that\n"\ "process' children, etc. If any process IDs are given, only those processes\n"\ "and their ancestors and children are displayed. Any argument that is not a\n"\ "non-negative integer is taken to be a process name to be searched for. The\n"\ "comparison to the process name is done as for the argument to the -n option\n"\ "except that process names given as non-option arguments are not taken to be\n"\ "regular expressions. If both names and PIDs are given, a process must\n"\ "satisfy both types of selectors in order to be printed.\n"\ "Options:\n"\ "Some of the following options can also be set by assigning values to\n"\ "variables in a configuration file named %s, which is searched for in the\n"\ "invoking user's home directory and in the directory specified by the\n"\ "environment variable UHOME, if it is set (if both files exist, values set\n"\ "in the former take precedence). Variables are assigned to with the\n"\ "syntax: varname=value or in the case of flags, by simply putting the\n"\ "indicated variable name in the file without a value. Variable names are\n"\ "given in parentheses in the option descriptions.\n"\ "-h: Print this help.\n"\ "-P: For each process, print the given fields. The possible\n"\ " fields are: USER (user ID), PPID (parent process ID), C (CPU scheduling\n"\ " value), STIME (start time), TTY (TTY nam), TIME (CPU time used), _CMD\n"\ " (first element of arg vector), _CMDT (like _CMD, but only the final\n"\ " pathname component), ARGS (command + arguments), and COMM (name of\n"\ " command being executed). The default is: %s\n"\ " (FIELDS)\n"\ "-e: Like -P, except that any fields named are added to those\n"\ " displayed by default, immediately before the ARGS field.\n"\ "-N: Do not read the configuration file.\n"\ "-V: Verbose. Tell which selection criteria were not satisfied by any\n"\ " processes.\n"\ "The following options control the manner in which the tree is drawn:\n"\ "-i: The number of character positions to indent when showing the\n"\ " children of a process. The minimum is 1. The default is %d. (INDENT)\n"\ "-I: Attempt to equalize the total indentation of each line\n"\ " so that the ps columns are aligned. is the maximum\n"\ " number of indent positions to equalize to. If -1 is given, all lines\n"\ " will be indented to the same distance that the furthest indented line\n"\ " is. Otherwise, if the deepest indent is less than or equal to the\n"\ " value given, the number of indents used will be the same as the\n"\ " deepest indent. If the deepest indent is larger than the value given,\n"\ " those lines indented further will not line up. (MAXEQUINDENT)\n"\ "-H: Do not print a header listing the names of the ps fields (NOHEADER)\n"\ "-C: The screen width to use. Output is truncated to \n"\ " columns. The default is to use one fewer than the width of the user's\n"\ " terminal. If -C0 is given, the output is not truncated. (COLUMNS)\n"\ "-w: The tree is drawn in an alternate format, with one of the\n"\ " children of each process printed on the same line to save space\n"\ " (reduce the number of lines printed). The data to be printed for any\n"\ " process that has children to be printed is truncated or padded to\n"\ " characters. must be at least 2. (WIDTH)\n"\ "-a: Normally, the tree is drawn using box-drawing character appropriate to\n"\ " the type of terminal the program is invoked from. If the terminal\n"\ " does not have box-drawing characters available or -a is given, the\n"\ " tree is drawn using ASCII characters. (ASCII)\n"\ "-T: Use the box-drawing characters for instead of those for\n"\ " the terminal given by the TERM environment variable.\n"\ "-p: An internal definition of the PC character set is used to draw the tree.\n"\ " This can be useful for sending to a printer. It may also be useful on\n"\ " a display that understands the PC character set but has a terminfo\n"\ " description that uses escape sequences to display the box-drawing\n"\ " characters; avoiding the use of escape sequences lets the output be\n"\ " operated on by pagers and editors.\n"\ "-s: Draw the tree using nothing but spaces for indentation. (SPACES)\n"\ "The following options restrict the processes that are displayed to be the\n"\ "processes they select and their ancestors and children. In order to be\n"\ "selected, a process must match all selection criteria, including any PID or\n"\ "process name selectors given as non-option arguments:\n"\ "-u: Select processes owned by any of the users in the\n"\ " comma-separated list. (USERS)\n"\ "-t: Select processes whose controlling TTY is one of those in\n"\ " the comma-separated list. (TTYS)\n"\ "-n: Select processes whose name (basename of argv[0]) matches the\n"\ " egrep(C)-style pattern , which is implicitely anchored at the\n"\ " start and end. If COMM is given with -P, the executable name (trailing\n"\ " pathname component of the file being executed) is used instead. (NAMES)\n"\ "The following options limit which processes among the ancestors and\n"\ "children of processes selected by process ID or the -u, -t, and -n options\n"\ "should be displayed (by default, all children and ancestors are displayed):\n"\ "-d: Instead of displaying all ancestors of processes,\n"\ " display only those that are within generations of a\n"\ " selected process. Example: -d2 displays only selected processes, their\n"\ " parents, and their grandparents (and if -A has not been given, their\n"\ " children). (ANCESTORS)\n"\ "-O: Like -d, but controls how many generations of\n"\ " offspring are displayed. (OFFSPRING)\n"\ "-c: Equivalent to -d0; print only the selected processes and their\n"\ " children. (CHILDREN)\n"\ "-A: Equivalent to -O0; print only the selected processes and their\n"\ " ancestors. (PARENTS)\n"\ "-cA causes only explicitly selected processes to be displayed.\n", Name,Usage,Name,rcFile,FieldList,nIndent exit 0 } ## Option/argument processing if ("x" in Options) Debug = Options["x"] Verbose = "V" in Options if ("d" in Options) # Number of ancestors to print Ancestors = Options["d"] else Ancestors = -1 if ("O" in Options) # Number of generations of offspring to print Offspring = Options["O"] else Offspring = -1 if ("c" in Options) Ancestors = 0 if ("A" in Options) Offspring = 0 if ("P" in Options) FieldList = toupper(Options["P"]) if ("e" in Options) FieldList = F1 "," toupper(Options["e"]) "," F2 MakeSet(GetFields,FieldList,",") GetFields["PPID"] # always need this delete GetFields["PID"] if (SelectUsers = ("u" in Options)) { n = MakeSet(Users,Options["u"],",") if (Debug) printf "%d users in user list.\n",n > "/dev/stderr" GetFields["USER"] } if (SelectTTYs = ("t" in Options)) { MakeSet(TTYs0,Options["t"],",") for (tty in TTYs0) TTYs[canonTTY(tty)] GetFields["TTY"] } if (SelectPat = ("n" in Options)) NamePat = Options["n"] if (ARGC > 1) { if (Debug) printf "Got %d PID(s)/name(s)\n",ARGC-1 > "/dev/stderr" for (i = 1; i < ARGC; i++) { arg = ARGV[i] if (arg ~ /^[0-9]+$/) { SelectPIDs = 1 givenPIDs[arg] } else { Names[arg] SelectNames = 1 } } } if (SelectNames || SelectPat) { NamePat = "^(" NamePat ")$" cmdField = "_CMD" GetFields[cmdField] } ## Gather ps data if ((e = getPS(PIDs,Procs,set2list(GetFields,","),Children,Debug > 5)) < 0) { print (e == -2) ? "Bad field name given." : "ps failed." > "/dev/stderr" exit 1 } delete PIDs["ps"] ## Process data # Make PIDs[] be a process -> process-parent map, # and deal with children with no parent found for (pid in PIDs) { PIDs[pid] = parent = Procs[pid,"PPID"] if (Debug > 2) printf "Parent of %d is %d\n",pid,parent > "/dev/stderr" if (!(parent in PIDs)) { # if parent is not in PIDs, assume it is due to ps snapshot failure # and make the process be a child of init PIDs[pid] = 1 Children[1] = Children[1] "," pid } } # If not all processes are to be displayed, mark those that should be. if (SelectUsers || SelectTTYs || SelectNames || SelectPIDs || SelectPat) { if (Debug) print "Marking selected processes..." > "/dev/stderr" for (pid in PIDs) { # If a process is selected... pName = basename(Procs[pid,cmdField]) if ((CheckIn(SelectUsers,Procs[pid,"USER"],Users,"users") + \ CheckIn(SelectTTYs,canonTTY(Procs[pid,"TTY"]),TTYs,"TTYs") + \ CheckIn(SelectNames,pName,Names,"process names") + \ CheckIn(SelectPIDs,pid,givenPIDs,"PIDs") + \ CheckPat(SelectPat,pName,NamePat)) == 5) { SelectedPIDs[pid] if (Debug > 1) printf " +%d",pid > "/dev/stderr" } else if (Debug > 2) printf " -%d",pid > "/dev/stderr" } if (Ancestors) markParents(SelectedPIDs,DisplayPPIDs,PIDs,Ancestors) # Mark decendants of selected PIDs in DisplayCPIDs[] if (Offspring) for (pid in SelectedPIDs) doChildren(pid,DisplayCPIDs,Children,Offspring) # Children & parents have been marked in separate arrays, so that # marking processes in one direction doesn't prevent marking in the # other when there is a common intermediate process. Now merge marked # processes with explicitly selected processes. Union(DisplayCPIDs,DisplayPPIDs,SelectedPIDs) } else { # Mark all processes. SelectedPIDs[1] doChildren(1,SelectedPIDs,Children,-1) } if (Debug > 1) print "" > "/dev/stderr" if (Verbose) { reportUnfound(SelectUsers,Users,"owned by these users") reportUnfound(SelectTTYs,TTYs,"attached to these TTYs") reportUnfound(SelectPIDs,givenPIDs,"with these PIDs") reportUnfound(SelectNames,Names,"with these names") m[NamePat] = gotMatch reportUnfound(SelectPat,m,"matching this pattern") } split(FieldList,Fields,",") preOrderTraverse(1,Children,1,"",0) if ("C" in Options) { Cols = Options["C"] HeadTailInit(-1,Cols ? Cols : -1,0,0) } else HeadTailInit(-1) if ("i" in Options) nIndent = Options["i"] if ("I" in Options) equIndent = Options["I"] if (!("H" in Options)) treeData["HEADER"] = makePSline(-1,Procs,Fields) if (Debug) print "" > "/dev/stderr" if ("T" in Options) term = Options["T"] if ("p" in Options) term = ":437" if ("a" in Options) delete ENVIRON["TERM"] DrawTrees(treeData,nIndent,("w" in Options) ? Options["w"] : 0,emptyArr, "s" in Options,term,1,COLUMNS ? (COLUMNS-1) : 0,0,0,equIndent) if (Debug) print "" > "/dev/stderr" } function reportUnfound(report,set,descrip, didheader) { if (!report) return for (element in set) if (set[element] < 1) { if (!didheader) { printf "No processes found %s:",descrip didheader = 1 } printf " " element } if (didheader) print "" } # Globals: gotMatch function CheckPat(docheck,name,pattern) { if (!docheck) return 1 if (Debug > 3) printf "\nChecking pattern \"%s\" for match to process name %s ", pattern,name > "/dev/stderr" if (name ~ pattern) { gotMatch = 1 return 1 } else return 0 } function CheckIn(docheck,element,set,name) { if (!docheck) return 1 if (Debug > 3) printf "\nChecking %s for element %s ",name,element > "/dev/stderr" if (element in set) { set[element]++ return 1 } else return 0 } # Globals: Debug function markParents(SelectedPIDs,DisplayPPIDs,Parents,Ancestors, pid,numAnc,p,p2) { for (pid in SelectedPIDs) { # Mark every proc up the chain as good by storing its pid # in DisplayPPIDs[], until we hit init or an already-marked # proc. numAnc = 0 for (p = pid; p >= 1; p = Parents[p]) { # If only marking ancestors for a limited distance up # chain, must continue up chain even if this process # is marked already because it may be marked due to # one of its decendants, in which case the chain won't # already be marked far enough. if ((p in DisplayPPIDs) && (Ancestors == -1) || (Ancestors != -1) && (numAnc > Ancestors)) break numAnc++ if (Debug > 1 && numAnc > Ancestors) printf "\nHit ancestor limit\n" > "/dev/stderr" DisplayPPIDs[p] if (!(p in Parents)) { printf "No parent for process %s?\n", p > "/dev/stderr" for (p2 = pid; p2 != p; p2 = Parents[p2]) printf "%d <- ",p2 > "/dev/stderr" printf p > "/dev/stderr" break } } } } # Mark decendants of pid in DisplayCPIDs[] # Globals: DisplayCPIDs[], Children[] function doChildren(pid,DisplayCPIDs,Children,numChildren, Elem,child) { if (pid in Children) { # If this process has children, mark them if (numChildren != -1) numChildren -= 1 MakeSet(Elem,Children[pid],",") for (child in Elem) # If not marked yet... if (!(child in DisplayCPIDs)) { DisplayCPIDs[child] if (numChildren) { if (Debug) printf " c%d",child > "/dev/stderr" doChildren(child,DisplayCPIDs,Children,numChildren) } } } } # preOrderVisit: Build a tree data structure for use by DrawTrees(). # We will be visited in pre-order. # Our mission: to identify marked subtrees within the process tree as a whole, # and make a DrawTrees() style tree out of each one. # This is done by completely processing each tree when its topmost node is # found, and marking each processed node so that it will not be further # processed when preOrderTraverse() calls us with the tree's subnodes. # Input variables: # Node: PID to be checked for whether it is the top of a marked tree OR # PID to be checked for whether it is part of a marked tree (depending # on the value of Data). # Index: Index to store this process' data under in treeData[]. # Data is true if we are processing marked tree subnodes, false if we are # searching for the top of a marked tree. # Globals: # ProcessedNodes[]: Maintained as the set of processed nodes. # TopIndex is maintained as the number of the current tree being built. # Procs[], Fields[]: For use in building a ps output line. # treeData[]: DrawTrees()-style data structure to build. # Children[]: Children of each process. # Debug: If Debug is > 1, debugging info will be printed. # Return value: # True if the children of this node should be processed. function preOrderVisit(NodeID,Node,Ind,Data, ret,psLine) { if (Data) { # Processing a marked node's subnodes ProcessedNodes[Node] if (ret = (Node in SelectedPIDs)) { if (Debug > 1) printf "Visiting marked node %d; index: %s\n", NodeID,Ind > "/dev/stderr" psLine = makePSline(Node,Procs,Fields) gsub("^[ \t]+|[ \t]+$","",psLine) # discard leading/trailing ws treeData[Ind] = psLine } } else { if ((Node in SelectedPIDs) && !(Node in ProcessedNodes)) { # Start a new tree if (Debug > 1) printf "Starting new tree rooted at PID %d\n", Node > "/dev/stderr" preOrderTraverse(Node,Children,1,++TopIndex,1) } ret = 1 } return ret } ### Begin-lib file # Assorted filename/file test/etc. functions. function basename(path) { sub(".*/","",path) return path } # Searches the directories in Dirs[] for readable file File # Dirs should have numeric indices starting with 1. function SearchPath(File,Dirs, i,garbage,Path) { for (i = 1; \ i in Dirs && (getline garbage < (Path = Dirs[i] "/" File)) != 1; i++) close(Path) close(Path) if (i in Dirs) return Path else return "" } # Returns 0 if FileName cannot be read, else nonzero. function Readable(FileName, foo,Ret) { Ret = getline foo < FileName close(FileName) return (Ret != -1) } # Returns 0 if FileName can be read but is empty, # 1 if FileName can be read and is not empty, # -1 if FileName cannot be read. function TryFile(FileName, foo,Ret) { Ret = getline foo < FileName close(FileName) return Ret } # Finds files that match Pattern and were last modified Days days # ago (a number optionally preceded by "+" or "-" as in find(C)) # A list of the files is put in List # The number of matching files found is returned function FilesByDate(Dir,Pattern,Days,List, num) { num = 0 FindCmd = "cd " Dir "; find " Pattern " -mtime " Days " -print 2>&1" while ((FindCmd | getline FileName) == 1) if (FileName !~ "find: stat.. failed") { List[FileName] num++ } close(FindCmd) return num } function getcwd( cmd,cwd) { cmd = "pwd" cmd | getline cwd close(cmd) return cwd } ### End-lib file ### Begin-lib array # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # Assorted array functions # # @(#) array 1.5 2004-10-13 # 1990-1997 john h. dubois iii (john@armory.com) # 1997-01-26 Added Order parameter to Assign() # 1997-08-27 Fixed Assign() to work properly with >1char assign-op # 1999-12-24 Added NoValOK to Assign() # 2002-01-26 Added getSlice() # 2002-01-29 1.3 Added VSep parm to Assign() # 2002-09-28 1.4 Added qsplit(), and Quote parm to Assign() # 2004-10-13 1.5 Added nv2arr(), parms2arr() # InitArr(): Initialize an array with values. # This function takes a list of indices and a list of values and stores each # value in an array under the index that has a corresponding position. # Input variables: # Ind is the list of indices. # Vals is the list of values. # Sep is the pattern that separates the indices in Ind and the values in Vals. # Output variables: # The array Arr[] is populated with the results. # Global variables: none. # Return value: none. # Example: # InitArr(Arr, "foo,bar,baz", "val1, val2, val3", ",") # Result: Arr["foo"] = "val1", Arr["bar"] = "val2", Arr["baz"] = "val3" function InitArr(Arr,Ind,Vals,sep, numind,indnames,values) { split(Ind,indnames,sep) split(Vals,values,sep) for (numind in indnames) Arr[indnames[numind]] = values[numind] } # CopyArr(): Copy an array. # Input variables: # From[] is the array to copy. # Output variables: # To[] is the array to copy to. All of the element in From[] are copied to # To[]. Elements of To[] may be replaced, but no elements in To[] that have # indexes that do not exist in From[] are touched. function CopyArr(From,To, Elem) { for (Elem in From) To[Elem] = From[Elem] } # SubtractArr(): Subtract the values all of the elements of one array from the # values having the same indexes in another array. # Input variables: # Minuend[] containes the values that will be subtracted from. Only the # elements with indexes that also appear in Subtrahend[] will be # referenced. # Subtrahend[] contains the values that will be subtracted. # Output variables: # The subtraction results for each pair of values with a given index are stored # in Minuend[] under the same index. # Subtract the values in Subtrahend from those in Minuend function SubtractArr(Minuend,Subtrahend, Elem) { for (Elem in Subtrahend) Minuend[Elem] -= Subtrahend[Elem] } # Invert(): Invert an array: create a new array with indexes equal to the # values of a source array and values for those indexes equal to the # corresponding indexes of the source array. # If multiple input elements have the same value, only one of the indexes of # those values will appear in the output. # Input variables: # src[] is the source array. # Output variables: # The results are stored in dest[]. # Return value: # The number of elements stored in dest. function Invert(src,dest, i, count) { for (i in src) if (!(src[i] in dest)) { dest[src[i]] = i count++ } return count } # PackArr(): Compact an array that has integer indexes so that the indexes are # contiguous integers. # Input variables: # Arr[] is the array to compact. # Num gives the number of elements in Arr[] to compact. # Start is the index to begin compacting at. Compacting proceeds by searching # upwards from this value. If Start is not passed, it defaults to 0. # Output variables: # Arr[] is returned with integer indexes [Start..Start+Num-1], containing the # values of the elements of Arr[] that had integer indexes >= Start, in their # original order. # Note: If Num is larger than the number of such elements in Arr[], this # function will not return. function PackArr(Arr,Num,Start, NewInd,OldInd) { if (Num < 0) return 0 NewInd = OldInd = Start+0 for (; Num; Num--) { while (!(OldInd in Arr)) OldInd++ if (NewInd != OldInd) { Arr[NewInd] = Arr[OldInd] delete Arr[OldInd] } OldInd++ NewInd++ } return 1 } # getSlice(): Get a slice of an array. # Given a multidimensional array, get an array that has one less dimension by # taking a slice through the array where one of the dimensions is set to a # fixed value. In the case of a 2-dimensional array, this would return a row; # in the case of a 3-dimensional array it would return a plane, etc. # Input variables: # inArray[] is the source array. # indNum selects the dimension that will have a fixed value. Dimensions are # numbered from 1 starting at the left. E.g. in the index [a,b,c], a is # dimension 1, b is dimension 2, and c is dimension 3. # ind gives the value of the fixed dimension. # sep is the string that separates indexes in inArray[] and outArray[] to # create a multidimensional array. Use SUBSEP to get the default # separator. # Output variables: # outArray[] is the destination array. The dimension indexes in it occur in # the same order that they did in inArray[], with the fixed dimension removed. function getSlice(inArray,ind,indNum,sep,outArray, i,elem,num,j) { split("",outArray) for (i in inArray) { num = split(i,elem,sep) if (elem[indNum] == ind) { outInd = "" for (j = 1; j <= num; j++) if (j != indNum) { if (outInd != "") outInd = outInd sep outInd = outInd elem[j] } outArray[outInd] = inArray[i] } } } # Split a string into elements with quoting. # This is similar to split(), except that inside of a quoted section, quotes # are not acted on. Quote characters are not included in the returned values. # # Input variables: # s is the string to split. # sep is the pattern to split on. # quote is the quoting character. # # Output variables: # The elements are returned in elem[1-n]. # If info[] is passed, information regarding the split is returned in it. # The current defined elements are: # info["unterm"]: True if the string ended in the middle of a quoted section. # # Return value: # The number of strings stored in elem. function qsplit(s, elem, sep, quote, info, len, i, c, inquote, e, j, curelem, quoted, unquoted) { # Scan s. Each time a quoted section begins, split the part leads up to it # and store it in elem. When the quoted section ends, attach that to the # last element produced by the previous split. split("", info) split("", elem) curelem = 1 if (sep == " ") { gsub(/^[ \t\n]+|[ \t\n]+$/,"",s) sep = "[ \t\n]+" } # Add 1 for end-of-input-string processing if ((len = length(s)+1) == 1) return 0 for (i = 1; i <= len; i++) { c = substr(s, i, 1) if (c == quote || i == len) { if (inquote) { # End of a quoted section elem[curelem] = elem[curelem] quoted quoted = "" } else { # Start of a quoted section if (split(unquoted, e, sep)) { curelem-- for (j = 1; j in e; j++) { curelem++ elem[curelem] = elem[curelem] e[j] } } unquoted = "" } inquote = !inquote } else if (inquote) quoted = quoted c else unquoted = unquoted c } # inquote is toggled at the end info["unterm"] = !inquote return curelem } # Assign: make an array from a list of assignments. # An index with the name of each variable in the list is created in the array. # Its value is set to the value given for it. # Input variables: # Elements is a string containing the list of variable-value pairs. # Sep is the string that separates the pairs in the list. # AssignOp is the string that separates variables from values. # The first instance of AssignOp is used; if more than one occurs, the 2nd and # later instances become part of the value. # If NoValOK is true, elements produced by splitting on Sep that have no # instance of AssignOp are stored with a null value, using the entire element # as the variable name. # If VSep is non-null, variables are split on this pattern, and each name that # results is assigned the given value. For example, if Sep is "=" and VSep is # ",", the string foo,bar=baz will result in Arr["foo"] = Arr["bar"] = "baz" # If Quote is non-null, it is a quoting character. Instances of Sep that occur # in quoted sections are taken literally. # Output variables: # Arr is the array. It is *not* cleared of its previous contents. # Order[1..n] are set to the variable names in the order they are given. # It is cleared of its previous contents. # Return value: # The number of elements stored in the array, or -1 on error. # The only possible error is an element with no AssignOp and NoValOK not true. # Example: # Assign(Arr,"foo=blot bar=blat baz=blit"," ","=") function Assign(Arr, Elements, Sep, AssignOp, Order, NoValOK, VSep, Quote, Num, Names, Elem, Assignments, Assignment, i, Var, aLen, Value, Vars, nvar, varNum, varName) { split("",Order) if (Quote == "") Num = split(Elements,Assignments,Sep) else Num = qsplit(Elements,Assignments,Sep,Quote) aLen = length(AssignOp) varNum = 0 if (VSep == "") # AssignOp is guaranteed not to appear in varnames, so this ensures # that each var is treated as a single name VSep = AssignOp for (i = 1; i <= Num; i++) { Assignment = Assignments[i] if (Ind = index(Assignment,AssignOp)) { Var = substr(Assignment,1,Ind - 1) Value = substr(Assignment,Ind + aLen) } else if (NoValOK) { Var = Assignment # Make var name consist of entire value Value = "" } else return -1 nvar = split(Var,Vars,VSep) for (; nvar; nvar--) { varName = Vars[nvar] Arr[varName] = Value Order[++varNum] = varName } } return varNum } # parms2arr(): Convert a set of parameters into an ordered set of elements in # an array. # Input variables: # p are the parameter strings. # If elideEmpty is true, empty parameters are elided. # Output variables: # The parameters are stored in arr[], as follows: # For each parameter, arr[parameter-number] is set to the parameter. # parameter-number starts with 1 and is incremented for each parameter. # Return value: The number of parameters stored. function parms2arr(elideEmpty, arr, p1, p2, p3, p4, p5, p6, p7, p8, src, dest) { split("", arr) # Convert parms list into array mapping parm values to names arr[1] = p1 arr[2] = p2 arr[3] = p3 arr[4] = p4 arr[5] = p5 arr[6] = p6 arr[7] = p7 arr[8] = p8 dest = 1 for (src = 1; src in arr; src++) { if (arr[src] != "") { if (src != dest) { arr[dest] = arr[src] delete arr[src] } dest++ } else { delete arr[src] if (!elideEmpty) dest++ } } return dest-1 } # nv2arr(): Convert a set of name=value parameters into an ordered set of # elements in an array. # Input variables: # p are the parameter strings. They have the format name=value. # The names should be unique. # Null parameter strings are always elided. If elideEmpty is true, # parameters with empty values are also elided. # Output variables: # The parameters are stored in arr[], as follows: # For each non-empty parameter, arr[name] is set to its value. # The parameter names are store indexed by arr["order",parameter-number], # where parameter-number starts with 1 and in incremented for each # non-empty parameter. # Example: If called with nv2arr(arr, "foo=bar", "", "biz=baz"), # arr will be contain: # arr["foo"] = "bar" # arr["biz"] = "baz" # arr["order",1] = "foo" # arr["order",2] = "biz" # Return value: The number of parameters stored. function nv2arr(arr, elideEmpty, p1, p2, p3, p4, p5, p6, p7, p8, parms, i, parm, pos, varname, value, num) { parms2arr(1, parms, p1, p2, p3, p4, p5, p6, p7, p8) for (i = 1; i in parms; i++) { parm = parms[i] pos = index(parm, "=") value = substr(parm,pos+1) if (elideEmpty && value == "") continue varname = substr(parm,1,pos-1) arr[varname] = value arr["order", ++num] = varname } return num } ### End-lib array ### Begin-lib HeadTail # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) HeadTail 1.3 2001-05-21 # 1995-04-18 john h. dubois iii (john@armory.com) # 1995-04-28 Added tail routines. # 1996-05-09 Added all args to HeadTailInit() # 2001-05-21 Added file arg to ColPrint(). # Make ColPrint() split input into lines before processing it. # These functions are used to print only a selected number of the first or last # lines of output. They are typically used in a program which has options that # allow the invoking user to to specify that only the first or last screen's # worth of output is desired, or some specific number of lines from the start # or end of output. The functions can also truncate each line so that it does # not exceed a certain length, typically so that they do not wrap past the end # of the screen. # # Usage: # Call HeadTailInit() first to select the number of lines and columns to print. # Then instead of using print/printf directly within the program, use # HeadPrint() or TailPrint(). If you require formatted printing in place of # printf, you will need to do e.g. HeadPrint(sprintf(format,arg,..)); there are # no direct printf replacements. # If you use HeadPrint(), for sake of efficiency you can stop producing output # when HeadPrint() returns 0. # If you use TailPrint(), you must call TailFlush() after all output has been # generated. # # Requires libraries: tab # # todo: Clean up. # HeadTailInit(): Initialize bounded printing. # Input variables: # Lines and Cols set the number of lines and columns to print. # If -1 is passed for either, printing is not bounded in that dimension. # If null or 0 is passed for either, its value is set according the dimensions # of the current terminal. The environment is searched for the values of LINES # and COLUMNS; if either of these is not set, the "tput" utility is used. # tput will return the current value stored in the line discipline if set # and otherwise will return the value given for the terminal in the terminfo # database. If tput in turn cannot give a value for the terminal, the default # values of 24 lines and 80 columsn is used. # LineGap and ColGap determine how much "grace space" to give. The actual # number of lines and columns printed is limited to the number of lines and # columns as described above minus the line or column gap. By default (if # not passed, or if a negative number is passed for them), these are set to # 1, as is appropriate for screen-bound printing. If these functions are # being used to restrict output to a specific number of lines or columns not # neccessarily derived from the screen dimensions, 0 should be passed for # LineGap and ColGap. # Current implementation stores these values in the global variables # LINES, COLUMNS, LINEGAP, and COLGAP. function HeadTailInit(Lines,Cols,LineGap,ColGap, Cmd) { # tput will use values in environment, but we want to avoid running # it if possible. if (Cols > 0) COLUMNS = Cols else if (!Cols) if ("COLUMNS" in ENVIRON) COLUMNS = ENVIRON["COLUMNS"] else { Cmd = "exec tput cols 2>/dev/null" Cmd | getline COLUMNS close(Cmd) if (COLUMNS == "") COLUMNS = 80 } if (Lines > 0) LINES = Lines else if (!Lines) if ("LINES" in ENVIRON) LINES = ENVIRON["LINES"] else { Cmd = "exec tput lines 2>/dev/null" Cmd | getline LINES close(Cmd) if (LINES == "") LINES = 24 } LINEGAP = (LineGap != "" && LineGap >= 0) ? LineGap : 1 COLGAP = (ColGap != "" && ColGap >= 0) ? ColGap : 1 } # TailPrint(): Store a bounded number of lines from the end of output for later # printing. # If LINES is >0, the last LINES-LINEGAP lines are kept in a circular buffer. # When TailFlush() is called, they are printed. # For efficiency, all processing other than saving a bounded number of lines # is deferred until TailFlush() is called, since it will only have to act on # the final lines of output. # If lines are passed with embedded newlines, more than the configured number # of lines will be saved; the excess will be discarded by TailFlush(). # If LINES = 0, lines are printed immediately instead of being stored. # Global vars: uses LINES & COLUMNS; sets and uses TailPtr; # saves lines in TailLines[] from 1..LINES-LINEGAP function TailPrint(Line) { if (!LINES) print Line else { if (++TailPtr > (LINES-LINEGAP)) TailPtr = 1 TailLines[TailPtr] = Line } } # TailFlush(): Print the final lines of output as stored by TailPrint(). # Embedded newlines cause an input line to be split into multiple output lines; # trailing newlines are stripped. # If COLUMNS > 0, each line is processed to expand tabs to spaces and is then # truncated to COLUMNS-COLGAP characters. # Input variables: None. function TailFlush( NumPrinted,Lines,Line,i,Buffer,PrintLines) { if (!LINES) return NumPrinted = 0 PrintLines = LINES-LINEGAP # Since lines may contain multiple lines, we must create a buffer to be # printed by reading line buffer backwards. # Stop when we have copied enough lines, or if we wrap around to the end # and find that the entire line buffer was not used. while (NumPrinted < PrintLines && TailPtr in TailLines) { # Split line into individual lines, then process them last to first Num = split(TailLines[TailPtr],Lines,"\n") for (i = Num; i >= 1; i--) { Line = Lines[i] if (i == Num && Line == "") # discard trailing newline continue # Put this line at the front of the print buffer if (COLUMNS) Buffer = substr(TabEx(Line),1,COLUMNS - COLGAP) "\n" Buffer else Buffer = Line "\n" Buffer if (++NumPrinted == PrintLines) break } if (!--TailPtr) # Wrap pointer if necessary TailPtr = PrintLines } printf "%s",Buffer } # HeadPrint(): Print the initial lines of output. # Line should not include newlines. # If COLUMNS > 0, Line is truncated to COLUMNS-COLGAP characters before # it is printed. # Return value: # If more lines may be printed, 1 is returned. # If LINES > 0, 0 is returned when LINES-LINEGAP lines have been printed. # Global vars: uses LINES, COLUMNS, LINEGAP, COLGAP; sets & uses LinesPrinted. function HeadPrint(Line) { # Check first, in case some calls of this function to not check return # value, and in case LINES is 1. if (LINES && LinesPrinted >= (LINES-LINEGAP)) return 0 if (COLUMNS) print substr(Line,1,COLUMNS - COLGAP) else print Line if (LINES && ++LinesPrinted >= (LINES-LINEGAP)) return 0 return 1 } # ColPrint(): Column-bounded printing. # Really, HeadPrint() should have the functionality present here, and this # should be made a front end for HeadPrint(). function ColPrint(Line,file, n,i,Lines) { if (file == "") file = "/dev/stdout" n = split(Line,Lines,"\n") for (i = 1; i <= n; i++) if (COLUMNS) print substr(Lines[i],1,COLUMNS - COLGAP) > file else print Lines[i] > file return 1 } ### End-lib HeadTail ### Begin-lib set # @(#) set 1.3 1997-01-26 # john h. dubois iii (john@armory.com) # 1996-05-23 added return values # 1996-05-25 added set2list() # 1997-01-26 Added AOnly(), Exclusive() # Return value: the number of new elements added to Inter function Intersection(A,B,Inter, Elem,Count) { Count = 0 for (Elem in A) if (Elem in B && !(Elem in Inter)) { Inter[Elem] Count++ } return Count } # Any element that is in A or B but not both and which is not already in # Excl is added to Excl. # Return value: the number of new elements added to Excl function Exclusive(A,B,Excl) { return AOnly(A,B,Excl) + AOnly(B,A,Excl) } # Any element that is in A and not in B or aOnly is added to aOnly. # Return value: the number of new elements added to aOnly. function AOnly(A,B,aOnly, Elem,Count) { Count = 0 for (Elem in A) if (!(Elem in B) && !(Elem in aOnly)) { aOnly[Elem] Count++ } return Count } # Return value: the number of new elements added to Both function Union(A,B,Both) { return CopySet(A,Both) + CopySet(B,Both) } # Deletes any elements that are in both Minuend and Subtrahend from Minuend. # Return value: the number of elements deleted. function SubtractSet(Minuend,Subtrahend, Elem,nDel) { nDel = 0 for (Elem in Subtrahend) if (Elem in Minuend) { delete Minuend[Elem] nDel++ } return nDel } # Return value: the number of new elements added to To function CopySet(From,To, Elem,n) { n = 0 for (Elem in From) if (!(Elem in To)) { To[Elem] n++ } return n } # Returns 1 if Set is empty, 0 if not. function IsEmpty(Set, i) { for (i in Set) return 0 return 1 } # MakeSet: make a set from a list. # An index with the name of each element of the list is created in the given # array. # Input variables: # Elements is a string containing the list of elements. # Sep is the character that separates the elements of the list. # Output variables: # Set is the array. # Return value: the number of new elements added to the set. function MakeSet(Set,Elements,Sep, i,Num,Names,nFound,ind) { nFound = 0 Num = split(Elements,Names,Sep) for (i = 1; i <= Num; i++) { ind = Names[i] if (!(ind in Set)) { Set[ind] nFound++ } } return nFound } # Returns the number of elements in set Set function NumElem(Set, elem,Num) { for (elem in Set) Num++ return Num } # Remove all elements from Set function DeleteAll(Set, i) { split("",Set,",") } # Returns a list of all of the elements in Set[], with each pair of elements # separated by Sep. function set2list(Set,Sep, list,elem) { for (elem in Set) list = list Sep elem return substr(list,2) # skip 1st separator } ### End-lib set ### Begin-lib canonTTY function nodevTTY(tty) { sub("^/dev/","",tty) return tty } # Convert /dev/ttyxx and xx to the canonical form ttyxx function canonTTY(tty) { if (substr(tty,1,5) == "/dev/") tty = substr(tty,6) else if (substr(tty,1,3) != "tty") tty = "tty" tty return tty } function shortTTY(tty) { # Strip leading "tty" only if name did not begin with /dev if (substr(tty,1,5) == "/dev/") tty = substr(tty,6) else if (substr(tty,1,3) == "tty") tty = substr(tty,4) return tty } # names["lower"] and names["upper"] are made the canonical non-modem-control # and modem-control versions of the TTY name respectively. # These are the name as passed but with the first alpha char after "tty" # (if any) converted to lowercase and uppercase respectively. # If the TTY name is an absolute path and is not of the form /dev/tty*, the # case conversion is not done. # Any leading /dev/ is stripped. If the name does not contain any directory # component and does not begin with "tty", it is prefixed with "tty". function bothTTYnames(tty,names, sTTY,letter) { sTTY = shortTTY(tty) if (substr(tty,1,1) == "/" && substr(tty,1,8) != "/dev/tty") { names["lower"] = names["upper"] = sTTY return 1 } match(sTTY,"[a-zA-Z][^a-zA-Z]*$") letter = substr(sTTY,RSTART,1) tty = (substr(tty,1,1) == "/") ? "" : "tty" names["upper"] = tty PasteStr(sTTY,RSTART,toupper(letter)) names["lower"] = tty PasteStr(sTTY,RSTART,tolower(letter)) return 0 } ### End-lib canonTTY ### Begin-lib qsort # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) qsort 1.2 2001-05-21 # 1990 john h. dubois iii (john@armory.com) # 2001-05-21 1.2 Handle 0-1 element cases explicitly in qsortSegment to avoid # referencing nonexistant elements. # qsortArbIndByValue(): Sort an array according to the values of its elements. # Input variables: # Arr[] is an array of values with arbitrary indices. # Output variables: # k[] is returned with numeric indices 1..n. The values assigned to these # indices are the indices of Arr[], ordered so that if Arr[] is stepped # through in the order Arr[k[1]] .. Arr[k[n]], it will be stepped through in # order of the values of its elements. # Return value: The number of elements in the arrays (n). function qsortArbIndByValue(Arr,k, ArrInd,ElNum) { ElNum = 0 for (ArrInd in Arr) k[++ElNum] = ArrInd qsortSegment(Arr,k,1,ElNum) return ElNum } # qsortSegment(): Sort a segment of an array. # Input variables: # Arr[] contains data with arbitrary indices. # k[] has indices 1..nelem, with the indices of Arr[] as values. # Output variables: # k[] is modified by this function. The elements of Arr[] that are pointed to # by k[start..end] are sorted, with the values of elements of k[] swapped # so that when this function returns, Arr[k[start..end]] will be in order. # Return value: None. function qsortSegment(Arr,k,start,end, left,right,sepval,tmp,tmpe,tmps) { if ((end - start) < 1) # 0 or 1 elements return # handle two-element case explicitly for a tiny speedup if ((end - start) == 1) { if (Arr[tmps = k[start]] > Arr[tmpe = k[end]]) { k[start] = tmpe k[end] = tmps } return } # Make sure comparisons act on these as numbers left = start+0 right = end+0 sepval = Arr[k[int((left + right) / 2)]] # Make every element <= sepval be to the left of every element > sepval while (left < right) { while (Arr[k[left]] < sepval) left++ while (Arr[k[right]] > sepval) right-- if (left < right) { tmp = k[left] k[left++] = k[right] k[right--] = tmp } } if (left == right) if (Arr[k[left]] < sepval) left++ else right-- if (start < right) qsortSegment(Arr,k,start,right) if (left < end) qsortSegment(Arr,k,left,end) } # qsortByArbIndex(): Sort an array according to the values of its indices. # Input variables: # Arr[] is an array of values with arbitrary indices. # Numeric indicates whether the indices of Arr[] should be compared as numbers. # Output variables: # k[] is returned with numeric indices 1..n. The values in k[] are the indices # of Arr[], ordered so that if Arr[] is stepped through in the order # Arr[k[1]] .. Arr[k[n]], it will be stepped through in order of the values # of its indices. If Numeric is true, indices will be compared as numbers. # Numeric indices do not have to be contiguous. # Return value: The number of elements in the arrays (n). function qsortByArbIndex(Arr,k,Numeric, ArrInd,ElNum) { ElNum = 0 if (Numeric) # Indexes do not preserve numeric type, so must be forced for (ArrInd in Arr) k[++ElNum] = ArrInd+0 else for (ArrInd in Arr) k[++ElNum] = ArrInd qsortNumIndByValue(k,1,ElNum) return ElNum } # qsortNumIndByValue(): Sort an array with contiguous numeric indices according # to the values of its elements. # Input variables: # Arr[] is an array of elements with contiguous numeric indexes. # start and end are the starting and ending indices of the range to be sorted. # Output variables: # Arr[] is returned with the same indices, but with the values assigned to them # swapped in order to accomplish the sorting described above. function qsortNumIndByValue(Arr,start,end, left,right,sepval,tmp,tmpe,tmps) { # handle two-element case explicitly for a tiny speedup if ((start - end) == 1) { if ((tmps = Arr[start]) > (tmpe = Arr[end])) { Arr[start] = tmpe Arr[end] = tmps } return } left = start+0 right = end+0 sepval = Arr[int((left + right) / 2)] while (left < right) { while (Arr[left] < sepval) left++ while (Arr[right] > sepval) right-- if (left <= right) { tmp = Arr[left] Arr[left++] = Arr[right] Arr[right--] = tmp } } if (start < right) qsortNumIndByValue(Arr,start,right) if (left < end) qsortNumIndByValue(Arr,left,end) } ### End-lib qsort ### Begin-lib DrawTrees # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) DrawTrees 1.2 2003-11-02 # 1996-11-30 john h. dubois iii (john@armory.com) # 1997-07-19 1.1 Added preOrderTraversal(), indentation equalization. # 2003-11-02 1.2 Added addcr # This library is used to draw character-cell representations of trees. # todo: Maybe a generalized tree printing routine. # DrawTrees(): Draw complete trees from pre-generated data. # Input variables: # Data[] is a tree of data to draw. The indexes consist of one or more # integer values separated by SUBSEP. The "depth" of the element determines # how many integers (dimensions) are contained in the index. For each set # of node siblings, the integer describing the varying dimension varies from # 1 through n where n is the number of siblings. This shows the indexes # used for the elements of a small tree with depth 3: # 1----+-1,1--+-1,1,1 # | |-1,2,2 # | \-1,2,3 # \-1,2--+-1,2,1 # \-1,2,2 # 2------2,1--+-2,1,1 # \-2,2,2 # ^----^--see below # The values of the elements are lines of data which constitute the nodes of # the tree; it is these lines that will be printed as part of the tree. # # Offset & Width: By default, the tree is drawn with each node on a separate # line. Offset is the horizonal offset of each child from its parent. It # must be at least 1. If Width is non-0, the tree is instead drawn with the # first child of each parent immediately to the right of its parent. Width # is the number of characters allocated to the node data for each level. If # the data for an interior node is longer than Width, the value is truncated # to Width-1 characters and a left-tee is appended to indicate the # truncation, so Width should be at least two. If this style is used, # Offset is the number of characters of additional horizontal separation to # use after the "split point"; in the example tree above, Width is set to 1, # causing the addition of the characters at the positions marked by ^ on the # "see below" line. # # AltChars[]: The tree is drawn using box-drawing characters appropriate to # the terminal if they are available, and a default set of ASCII characters if # not. If AltChars[] contains all of the following elements, they are used to # draw the tree. I is the index to use; A is the ASCII default. # I A Description # x | Vertical bar # q - Horizontal bar # l / top left corner # m \ bottom left corner # w + Top tee # t } Left tee # + > Right arrow (optional) # ~ * Bullet (optional) # If AltChars[] does not contain all of these elements and the alternate # character set it used, AltChars[] is returned filled in with the # characters used to draw the tree. The same array can then be passed back # to DrawTrees(), avoiding the need for it to use tput again to get the # alternate character set capabilities of the terminal. # # If Spaces is true, indentation is done with spaces only; the effect is to # set all of the above characters to be a space. # # If term is passed, it overrides the TERM environment variable. Pass "dumb" # to force the ASCII values to be used. # # If useArrow is true and the terminal has a right-arrow character defined, # it is used for the branch character to the left of node data. # # If maxLength is non-0, output lines are truncated to maxLength characters. # # If AddInd is true, in the output each value is preceded by its index. # # If Sort is true, the tree is sorted by the lexicographical values of its # elements, and the qsort library must be included in the program. # # If maxBranchInd is non-0, an attempt is made to equalize the total # indentation of each line due to the tree-drawing characters by using a # string of horizontal bar characters, so that the lines in Data[] will # be aligned with each other. Width must be set to 0. If maxBranchInd is # -1, all lines will be indented to the same distance that the furthest # indented line is. If maxBranchInd is positive, it places a limit on how # much indentation equalization is done. The value sets the number of # indents (each indent being Offset characters long) that may be used. If the # deepest indent is less than or equal to the value given, the number of # indents used will be the same as the deepest indent. If the deepest # indent is larger than the value given, those lines indented further will # not line up, resulting in ragged output. When scanning the tree to determine # its maximum depth, the indexes are examined directly rather than doing a # proper tree traversal, so Data[] should not contain any extraneous elements # except an optional "HEADER" element. If Data["HEADER"] is set, it is printed # blank-indented to the equalization distance. If "HEADER" is set, it will be # printed even if maxBranchInd is 0, but will only be aligned with the data # for the root nodes. # # If addcr is true, each line is terminated with a cr/lf instead of just a cr. # # Return value: # A negative value is returned on error. function DrawTrees(Data,Offset,Width,AltChars,Spaces,term,useArrow,maxLength, AddInd,Sort,maxBranchInd,addcr, i,tinfo,Strings,smacs,rmacs,BranchIndent,BlankIndent,bTail,veBar,hoBar, bLeft,topTee,lTee,arrow,bullet,WidthBar,OffsetBar,ind,elem,n,maxDepth, Header,CR) { if (Spaces) { veBar = hoBar = bLeft = topTee = lTee = arrow = " " bullet = "*" } else { if ("x" in AltChars && "q" in AltChars && "m" in AltChars && \ "w" in AltChars && "t" in AltChars) { # Either AltChars[] was passed filled in for us, or we filled it in # in a previous call. If the latter, some extra info has been # cached in it; extract it. if ("smacs" in AltChars) tinfo["smacs"] = AltChars["smacs"] if ("rmacs" in AltChars) tinfo["rmacs"] = AltChars["rmacs"] if ("enacs" in AltChars) tinfo["enacs"] = AltChars["enacs"] } else altInit(tinfo,term,1,AltChars) if ("x" in AltChars && "q" in AltChars && "m" in AltChars && \ "w" in AltChars && "t" in AltChars) { # Cache rmacs/smacs/enacs in AltChars[] for next call. AltChars["smacs"] = smacs = Strings["smacs"] = tinfo["smacs"] AltChars["rmacs"] = rmacs = Strings["rmacs"] = tinfo["rmacs"] if ("enacs" in tinfo) { # enable alternate character set printf "%s",tinfo["enacs"] AltChars["enacs"] = tinfo["enacs"] } veBar = AltChars["x"] hoBar = AltChars["q"] bLeft = AltChars["m"] tLeft = AltChars["l"] topTee = AltChars["w"] lTee = AltChars["t"] arrow = "+" in AltChars ? AltChars["+"] : hoBar bullet = "~" in AltChars ? AltChars["~"] : lTee } else { # Do not attempt mixing of alt & regular char sets for tree drawing veBar = "|" hoBar = "-" bLeft = "\\" tLeft = "/" topTee = "+" # { lTee = "}" arrow = ">" bullet = "*" } } # b: blank indent. Will be preceded by newline & followed by branch char. # v: indent that includes a vertical branch on the left: "| " # Will be preceded by newline or whitespace & followed by branch char. # l: lower left horizontal branch indent. "\---" # t: left tee horizontal branch indent. "}---" # l & t will be preceded by newline or whitespace & followed by either # branch tail (">") or indentation equalization. # p: Node padding. Must be adjusted to fit, so is not # surrounded by smacs/rmacs. Preceded by node data; followed by branch. # If Width is 0, the node padding will equal to the offset, so it is # also used for unbranched indentation equalization. # bie: Branched indentation equalization. Like p but with a top tee on the # left. "-+--" # n: Internode branch. Preceded by branch; followed by node data. "-->" # tn: Teed internode branch. Preceded b/branch; followed b/node data."+->" # c: Node data truncation character. Will be followed by branch char. # n, tn, and c are used only in "width" mode. # lt: Line truncation character. # bt: Branch tail. ">" # r: Tree root. "/" for (i = Offset + Width; i > 0; i-=1) { BlankIndent = BlankIndent " " BranchIndent = BranchIndent hoBar } WidthIndent = substr(BlankIndent,1,Width) OffsetIndent = substr(BranchIndent,1,Offset) if (BranchIndent != "" && Offset > 1) bTail = useArrow ? arrow : hoBar Strings["c"] = smacs lTee Strings["lt"] = smacs bullet rmacs Strings["p"] = BranchIndent Strings["n"] = substr(BranchIndent,1,Offset-1) bTail rmacs Strings["tn"] = topTee substr(BranchIndent,1,Offset-2) bTail rmacs Strings["b"] = BlankIndent Strings["v"] = WidthIndent smacs veBar rmacs substr(BlankIndent,1,Offset-1) Strings["l"] = WidthIndent smacs bLeft substr(OffsetIndent,3) Strings["t"] = WidthIndent smacs lTee substr(OffsetIndent,3) Strings["bt"] = bTail rmacs if ("HEADER" in Data) Header = Data["HEADER"] if (maxBranchInd) { if (Width) return -1 Strings["bie"] = \ (Offset > 1 ? hoBar : "") topTee substr(BranchIndent,3) for (ind in Data) { n = split(ind,elem,SUBSEP)-1 if (n > maxDepth) maxDepth = n } if (maxBranchInd > 0 && maxBranchInd < maxDepth) maxDepth = maxBranchInd for (i = 1; i < maxDepth; i++) { Strings["r"] = Strings["r"] BranchIndent if (Header != "") printf "%s",BlankIndent } Strings["r"] = \ smacs tLeft substr(BranchIndent,3) Strings["r"] bTail rmacs if (Header != "") printf "%s",BlankIndent } if (addcr) CR = "\r" if (Header != "") print Header CR dtTraverse(Data,"",Strings,0,"",Width,maxLength,Offset+Width,AddInd,Sort, maxDepth,CR) return 0 } # dtTraverse(): Traverse and print a subtree. # Data[], Width, AddInd, Sort: as described for DrawTrees(). # catind: index into Data[] for the parent of this node, followed by a SUBSEP # char. # Strings[]: Line drawing characters, and rmacs/smacs strings. # level: The depth of this node, with tree roots at level 0. # branch: An indentation string to print the vertical components of the # branches of the siblings of the parents of this node. # Length: How many further characters may be added at this indentation level. # levelWidth: How many characters the indentation for each level occupies. # equIndent: How much levels worth of indentation equalization to do. function dtTraverse(Data,catind,Strings,level,branch,Width,Length,levelWidth, AddInd,Sort,equIndent,CR, childNum,ind,siblings,children,nbranch,len,s,subLength,value,k,Arr,i) { if (Length && (subLength = Length - levelWidth) < 1) # Make sure subLength does not end up 0, which indicates no limit subLength = -1 if (Sort) { # build a subtree level to sort for (childNum = 1; (ind = catind childNum) in Data; childNum++) Arr[ind] = Data[ind] qsortArbIndByValue(Arr,k) } # If doing indentation equalization, there is less room for data if (level < equIndent) Length -= (equIndent - level) * levelWidth # A single instance of this function handles all of the immediate children # of a particular process. They are iterated over here. for (childNum = 1; (ind = catind childNum) in Data; childNum++) { if (Sort) ind = k[childNum] children = (ind,1) in Data if (level) { # If this is not a root node, draw indentation string # Determine whether this child has further siblings siblings = (catind (childNum+1)) in Data # If printing one node per line or this is not the first child, # indent string for this node was not drawn when the node data for # its parent was printed, so print indent string now. # If child has further siblings, need a left tee indent; # otherwise need a lower left indent. if (!Width || childNum != 1) { printf "%s",branch Strings[siblings ? "t" : "l"] if (level < equIndent) { printf "%s",children ? Strings["bie"] : Strings["p"] for (i = level+1; i < equIndent; i++) printf "%s",Strings["p"] } printf "%s", Strings["bt"] } } else if (equIndent) printf "%s",Strings["r"] # Done printing whatever indentation strings need to precede the # node data for this child; now print node data and indentation for its # children, if any. value = Data[ind] # Get node data if (AddInd) value = ind ":" value if (Width && children) { if (subLength == -1) # No room left to show children; indicate that by terminating # with line truncation char printf "%.*s%s%s\n",Length-1,value,Strings["lt"],CR else { if ((len = length(value)) > Width) printf "%.*s%s",Width-1,value,Strings["c"] # truncate else printf "%s%s%.*s",value,Strings["smacs"], Width-len, Strings["p"] # pad on right # If this node has children, print offset branch printf "%s",Strings[((ind,2) in Data) ? "tn" : "n"] } } else if (Length) { if (length(value) > Length) printf "%.*s%s%s\n",Length-1,value,Strings["lt"],CR else printf "%.*s%s\n",Length,value,CR } else print value CR if (children && subLength != -1) { if (level) nbranch = branch Strings[siblings ? "v" : "b"] dtTraverse(Data,ind SUBSEP,Strings,level+1,nbranch,Width,subLength, levelWidth,AddInd,Sort,equIndent,CR) } } } # buildTree: Add nodes to a tree, find each of their children, and call # buildTree() recursively for each set of children. This is typically used # for cases where an array of data exists, along with a separate array mapping # the parent/child relationships in the first array. # Output variables: # Tree[] is the tree being built, in the style described for DrawTrees(). # Input variables: # treeData[1..n] contain the top level of data that should be added to Tree[]. # Strings in treeData[] may be modified by getChildren() if it is called for # a node. # Prefix is the string that the index of each element in treeData[] should be # prefixed with when it is copied to Tree[], if any. # Depth is the current depth within the tree, with the top node at depth 1. # It is used only to be passed to getChildren() in case it cares. # childData[1..n] has two purposes. First, buildTree() will only call # getChildren() for those indexes of treeData[] that also exist in childData[]. # Second, additional data may be passed to getChildren() for a node by # assigning a value to the node index in childData[]. # # For each element in childData[], the function getChildren() is called with # the parameters (treeData,childData[i],cTreeData,cChildData,i,Depth). # cTreeData[] and cChildData[] are arrays which should be filled if the node # has any children. i is a numeric index in treeData[]. # The return value of getChildren() should be the number of children found. # treeData[] is passed rather than the value of one of its elements so that # the value of the element being processed may be modified before it is # copied to Tree[]. If it is deleted from the array, it is skipped (not # copied to Tree[]); in this case no children should be added. # getChildren() must be defined elsewhere in the program. function buildTree(Tree,treeData,childData,Prefix,Depth, i,cTreeData,cChildData,j) { j = 1 for (i = 1; i in treeData; i++) { split("",cTreeData) # Clear these split("",cChildData) if (i in childData && \ getChildren(treeData,childData[i],cTreeData,cChildData,i,Depth) && \ i in treeData) buildTree(Tree,cTreeData,cChildData,Prefix j SUBSEP,Depth+1) if (i in treeData) # Only modification of Tree[] takes place here Tree[Prefix j++] = treeData[i] } } # Example of the use of buildTree(): # BEGIN { # SUBSEP = "," # info["a"] = "Node 1"; info["b"] = "Node 2"; info["c"] = "Node 3" # info["d"] = "Node 4"; info["e"] = "Node 5"; info["f"] = "Node 6" # info["g"] = "Node 7" # treeData[1] = "a"; treeData[2] = "b" # top level nodes # children["a"] = "c:d"; children ["b"] = "e:f" # 2nd level nodes # children["f"] = "g" # childData[1]; childData[2] # buildTree(Tree,treeData,childData) # split("",junk) # convince awk this is an array # for (t in Tree) # printf "%s:%s\n",t,Tree[t] # DrawTrees(Tree,4,0,junk,1) # } # # function getChildren(treeData,childInfo,childTreeData,childChildData,ind, # elem,i,n) { # n = split(children[treeData[ind]],childTreeData,":") # for (i in childTreeData) # childChildData[i] # treeData[ind] = info[treeData[ind]] # return n # } # Breadth-first-search version of buildTree(). This is intended to flatten # the tree representation of a possibly cyclic graph as much as possible. # All nodes at each depth are visited before the nodes at the next depth are # visited. # All parameters are as for buildTree() except that the scalar Prefix is # replaced by the array Prefixes[]. It has an element for each value in # treeData[] (with the same index), with the value being the prefix for the # index which that element should be stored in treeData[] with. # getChildren() is called as by buildTree(), except that there is an additional # argument telling getChildren() the first index in cTreeData[] and # cChildData[] to use (instead of starting at 1). function bfBuildTree(Tree,treeData,childData,Prefixes,Depth, i,cTreeData,cChildData,j,childPos,cPrefixes,nChild,cIndex,l) { childPos = 1 for (i = 1; i in treeData; i++) { nChild = (i in childData) ? \ getChildren(treeData,childData[i],cTreeData,cChildData,i,Depth, childPos) : 0 if (i in treeData) { # if not skipping this node if (i == 1 || Prefixes[i] != Prefixes[i-1]) j = 1 cIndex = Prefixes[i] j SUBSEP for (l = 1; l <= nChild; l++) cPrefixes[childPos++] = cIndex Tree[Prefixes[i] j] = treeData[i] j++ } } if (childPos > 1) bfBuildTree(Tree,cTreeData,cChildData,cPrefixes,Depth+1) } # preOrderTraversal: Do a pre-order traversal of the tree described by Tree[] # (the parent node is visited before any children). # Tree[] is indexed by node name; the data for each node is a comma-separated # list of the children of that node. Nodes that do not have children need not # appear in Tree[]. Node names may be any string that does not include a # comma. # Node is the node to start at. # If nSort is true, child node names are expected to be numbers and are # visited in numeric order. # Ind is uses as the top level node in building indexes for a DrawTrees() style # tree. The indexes are passed to preOrderVisit(). Ind would typically be 1. # Data is an arbitrary parameter to be passed to the function that does node # processing. It may be a string or array, but must be consistent within a # program, since awk will compile it as one or the other. # Parents should be left null on the first call. # For each node, preOrderVisit(node-id,node,ind,data) is called. # node-id is concatenation of all of the names of the nodes that lead from the # initial node to the node being processed, separated by commas, e.g.: # initialnode,internode,thisnode # node is the node name by itself. # ind is a DrawTrees() style node index, for use if preOrderVisit() is building # a tree for DrawTrees(). ind starts at 1 and is incremented by the amount # that preOrderVisit() returns when called to process a subtree. This is done # by having preOrderVisit() return whatever value the preOrderVisit() it calls # returns. preOrderVisit() would typically return 0 if the index passed to it # was not used, and 1 if it was used. # Data is passed from the arguments to this function. # If preOrderVisit() returns true, its children (if any) will be visited; # if it returns false, they will not. # preOrderVisit() should be constructed to process the node appropriately. function preOrderTraverse(Node,Tree,nSort,Ind,Data,Parents, Children,numChildren,childNum,subIndex,ret) { if ((ret = preOrderVisit(Parents Node,Node,Ind,Data)) && Node in Tree) { numChildren = split(Tree[Node],Children,",") if (nSort) { for (i = 1; i <= numChildren; i++) Children[i] += 0 # mark as numeric, for qsort qsortNumIndByValue(Children,1,numChildren) } Parents = Parents Node "," subIndex = 1 for (childNum = 1; childNum <= numChildren; childNum++) subIndex += preOrderTraverse(Children[childNum],Tree,nSort, Ind SUBSEP subIndex,Data,Parents) } return ret } ### End-lib DrawTrees ### Begin-lib xControl # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) xControl 1.3 2004-04-27 # 1992-11-09 john h. dubois iii (john@armory.com) # 1996-05-29 Added octal-only conversion. # 1998-12-09 Added ToControl # 2004-04-27 1.3 Do not treat characters >159 as control chars. # Added Override to Uncontrol. # Uncontrol(S): Convert control characters in S to symbolic form. # Characters in S with values < 32 and with value 127 are converted to the form # ^X. Characters with value >= 128 are converted to the octal form \0nnn, # where nnn is the octal value of the character. # The resulting string is returned. # If OctalOnly is true, octal numbers are used for all symbolic values instead # of ^X. # If any characters are indexes of Override[], the values assigned to those # indexes are used. # Global variables: UncTable[] and char2octal[]. function Uncontrol(S,OctalOnly,Override, i,len,Output,c) { len = length(S) Output = "" if (!("a" in UncTable)) MakeUncontrolTable() for (i = 1; i <= len; i++) { c = substr(S,i,1) Output = Output ((c in Override) ? Override[c] : (OctalOnly ? char2octal[c] : UncTable[c])) } return Output } # MakeUncontrolTable: Make tables for use by Uncontrol(). # Global variables: # UncTable[] is made into a character -> symbolic character lookup table # with characters with values < 32 and with value 127 converted to the form # ^X, and characters with value >= 128 are converted to the octal form \0nnn. # char2octal[] is made into a similar table but with all non-printing chars # in the form \0nnn. function MakeUncontrolTable( i,c) { for (i = 0; i < 32; i++) { UncTable[c = sprintf("%c",i)] = "^" sprintf("%c",i + 64) char2octal[c] = "\\" sprintf("%03o",i) } for (i = 32; i < 127; i++) { c = sprintf("%c",i) char2octal[c] = UncTable[c] = sprintf("%c",i) } UncTable[c = sprintf("%c",127)] = "^?" char2octal[c] = "\\0177" for (i = 128; i < 160; i++) { UncTable[c = sprintf("%c",i)] = "\\" sprintf("%03o",i) char2octal[c] = "\\" sprintf("%03o",i) } for (i = 160; i <= 255; i++) { c = sprintf("%c",i) char2octal[c] = UncTable[c] = sprintf("%c",i) } } function MakeToControlTable( i) { _ToControl_Map["@"] = "\000" _ToControl_Map["["] = "\033" _ToControl_Map["\\"] = "\034" _ToControl_Map["]"] = "\035" _ToControl_Map["^"] = "\036" _ToControl_Map["_"] = "\037" _ToControl_Map["?"] = "\177" for (i = 1; i <= 26; i++) { _ToControl_Map[sprintf("%c",i+64)] = sprintf("%c",i) _ToControl_Map[sprintf("%c",i+96)] = sprintf("%c",i) } } # Convert character sequences in S of the form ^X to control character X # and return the resulting string. function ToControl(S, len,i,c0,c,newS) { len = length(S) if (!("@" in _ToControl_Map)) MakeToControlTable() for (i = 1; i <= len; i++) if ((c0 = substr(S,i,1)) == "^" && \ (c = substr(S,i+1,1)) in _ToControl_Map) { newS = newS _ToControl_Map[c] i += 1 } else newS = newS c0 return newS } # Converts sequences of the form \n or \nn or \nnn (where n is a digit) # to the character whose ASCII value is decimal n or nn or nnn # If the character following \ is a nondigit, the character is taken literally function UnEscape(S, out,len,i,c,esc,val,vallen) { len = length(S) out = "" for (i = 1; i <= len; i++) { c = substr(S,i,1) if (c == "\\") { c = substr(S,++i,1) if (c ~ "[0-9]") { val = substr(S,i) vallen = match(val,"[^0-9]") - 1 if (vallen > 3) vallen = 3 else if (vallen == -1) vallen = 3 c = sprintf("%c",substr(val,1,vallen) + 0) i += vallen - 1 } } out = out c } return out } # cEsc 1.0 1998-08-02 # 1998-08-02 john h. dubois iii (john@armory.com) # Performs these escape sequence conversions on S and returns the result: # \\ backslash # \a alert or bell # \b backspace # \f formfeed # \n newline # \r carriage return # \t horizontal tab # \v vertical tab # \d, \dd, \ddd octal value of d, dd, or ddd function cEsc(S, out,len,i,j,c,val,ind) { len = length(S) out = "" for (i = 1; i <= len; i++) { c = substr(S,i,1) if (c == "\\") { # translate escape sequence c = substr(S,++i,1) if (c ~ "[0-7]") { val = c for (j = 1; j <= 2; j++) { if ((c = substr(S,i+1,1)) ~ "[0-7]") { val = val * 8 + c i++ } else break } c = sprintf("%c",val+0) } else if (ind = index("abfnrtv",c)) c = substr("\a\b\f\n\r\t\v",ind,1) } out = out c } return out } ### End-lib xControl ### Begin-lib tinfo # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) tinfo 1.2 2003-07-04 # 1996-11-30 John H. DuBois III (john@armory.com) # 1998-12-09 1.1 Added :437 magic, non-overwrite of values passed in tinfo[], # and tiCapInterp(). # 2002-11-24 1.1.1 Make tiget work reliably in old-awk. # 2003-07-04 1.2 Started work on tparm() # tinfo: Some routines for extracting & using capabilities from the terminfo # database. # todo: finish tparm() # altInit(): Get alternate character set terminfo capabilities. # term, noerror: see tiget(), except that if the magic termtype ":437" is # given, instead of querying the terminfo database a built-in alternate # character set description for the IBM 437 (as used by the PC console, many # printers, terminals in PC Character Set mode, the Wyse 60, etc.) is used. # tinfo[] is returned containing the acsc capability, and any of the enacs, # smacs, and rmacs capabilities that are defined for the terminal. Each is # indexed by its capability name. # acsc is the mapping of vt100 alternate character codes to those appropriate # for the given terminal. # enacs is used to enable the alternate character set. # smacs starts it and rmacs ends it. # If tinfo[] is passed already containing any of these capabilities, the # passed strings are used instead of querying the terminfo database for them. # AltMap is the acsc string broken down with each alternate character indexed # by its vt100 equivalent. num is an ordered list of the vt100 characters # indexed starting with 1, for applications that need to know what order they # were given in. # The global _macs[] is set up with _macs[0] = rmacs & _macs[1] = smacs, for # use by altPrint(). # The alternate characters and their indexes (vt100 equivalents) are: # 0 solid square block a checker board f degree symbol # g plus/minus h board of squares j lower right corner # k upper right corner l upper left corner m lower left corner # n plus q horizontal line t left tee # u right tee v bottom tee w top tee # x vertical line + arrow pointing right . arrow pointing down # - arrow pointing up , arrow pointing left ` diamond # ~ bullet I lantern symbol o scan line 1 # s scan line 9 function altInit(tinfo,term,noerror,AltMap,num, ret,caplist,acsc,len,j,i) { if (term == ":437") { if (!("acsc" in tinfo)) # We avoid the use of the characters in the range 0-31 because many # terminals interpret them as control sequences. Instead, up and # down arrow and diamond are skipped here, and the double < and # double > symbols are used for left and right arrow. tinfo["acsc"] = \ "0\333a\261g\361h\262j\331k\277l\332m\300n\305q\304t\303u\264v\301w\302x\263I\350~\372f\370,\256+\257" } else { if (ret = tiget("acsc",tinfo,term,0,1)) { # All other types of errors cause tput to print an informative message # to stderr, which is not redirected. if (!noerror && ret == 1) print "Terminal has no acsc capability." >> "/dev/stderr" return ret } caplist = "enacs,smacs,rmacs" tiget(caplist,tinfo,term,0,1) } acsc = tinfo["acsc"] len = length(acsc) j = 0 for (i = 1; i < len; i += 2) AltMap[num[++j] = substr(acsc,i,1)] = substr(acsc,i+1,1) if ("rmacs" in tinfo) _macs[0] = tinfo["rmacs"] if ("smacs" in tinfo) _macs[1] = tinfo["smacs"] return 0 } # altPrint: Print characters in either the alternate or standard character set. # string is the string to print. # alt should be 1 if string is in the alternate character set; 0 if in the # standard character set. # tinfo contains the smacs and rmacs strings, if needed. # altPrint keeps track of whether the terminal is in the standard or alternate # character set, and issues smacs and rmacs as needed. # It should always be called with alt false at the end of program execution to # ensure that the terminal is left in the standard character set. # Globals: The character set is tracked in _altPrintSet function altPrint(string,alt,tinfo) { if (alt != _altPrintSet) { printf "%s%s",_macs[alt],string _altPrintSet = alt } else printf "%s",string } # tiget: get terminfo capabilities. # capnames is a comma-separated list of terminfo capabilities to get. # Each capability is put in tinfo[], indexed by capability name. # If term is passed, it is the terminal type to get the capabilities for. # If not, the value of the environment variable TERM is used. # If noerror is true, error messages are suppressed. # If nooverwrite is true and tinfo[] already has an index for a given capability, # the value is not overwriten. # Return value: the exit status of the last tput, or -1 if term is not passed # and there is no TERM environment variable. function tiget(capnames,tinfo,term,noerror,nooverwrite, cmd,ret,names,capname,i,oRS,cap) { if (term == "") if ("TERM" in ENVIRON) term = ENVIRON["TERM"] else return -1 split(capnames,names,",") oRS = RS # Save old value. mawk chokes on RS in function parameter list. RS = "" # this makes the record separator be "\n\n", which hopefully # is not very common in terminfo capabilities ret = 0 tinfo[""] # mark this as an array, for lame awks unset tinfo[""] for (i = 1; i in names; i++) { capname = names[i] if (!nooverwrite || !(capname in tinfo || (capname ".failed") in tinfo)) { cmd = "exec tput -T " term " " capname if (noerror) cmd = cmd " 2>/dev/null" # If there is no such capability, tput will exit 1. # But old-awk does not return the exit status in close, # so also check for read success & null string. if ((ret = (cmd | getline cap)) == 1 && !(ret = close(cmd)) && cap != "") tinfo[capname] = cap else tinfo[capname ".failed"] } } RS = oRS return ret } # Caches capabilities in _capabilities[] function tiget1(capname,term,noerror, capnames,tinfo) { if ((term,capname) in _capabilities) return _capabilities[term,capname] else { tiget(capname,tinfo,term,noerror) # Cache result, whether successful or not (null) return _capabilities[term,capname] = tinfo[capname] } } function tinfo_seterr(noerror) { _tinfo_noerror = noerror } function setterm(termtype) { _tinfo_term = termtype } function _tparm_push(value) { _tparm_stack[++_tparm_sp] = value } function _tparm_pop() { return _tparm_stack[_tparm_sp--] } # UNFINISHED. Actually, barely started... # tparm: Get and interpret a parameterized capability. function tparm(capname, p1, p2, p3, p4, p5, p6, p7, p8, p9, len, cap, i, c, out, p) { p[1] = p1; p[2] = p2; p[3] = p3; p[4] = p4; p[5] = p5 p[6] = p6; p[7] = p7; p[8] = p8; p[9] = p9 _tparm_sp = 0 len = length(cap = tiget1(capname,_tinfo_term,_tinfo_noerror)) for (i = 1; i <= len; i++) { c = substr(cap,i,1) if (c == "%") { c = substr(cap,++i,1) if (c == "%") out = out "%" else if (c == "p") _tparm_push(p[substr(cap,++i,1)]) else if (c ~ "[+# :doxXs0-9.]") { # printf-style fmt string # Format: %[[:]flags][width[.precision]][doxXs] (flags are -+#) if (c == ":") i++ match(substr(cap,i), /[-+#]*([0-9]+(.[0-9]+)?)?[doxXs]/) out = out sprintf("%" substr(cap, i+RSTART-1, RLENGTH), _tparm_pop()) i+= RLENGTH-1 } } else out = out c } return out } # tiCapInterp: Interpret raw terminfo strings the same way tic does. # \b backspace # \f formfeed # \n newline # \r return # \t tab # \l linefeed (newline) # \s space # \^ caret # \\ backslash # \, comma # \: colon # \0 null (actually \200) # \E escape # \e escape # \bbb Octal character bbb # \any-other-char, including \bb and \b (other than \0), are interpreted literally # (the \ is included in the resulting string). # ^x control-x, for any of the usual values of x # ^any-other-char is interpreted literally function tiCapInterp(S, out,len,i,c,ind) { len = length(S) out = "" if (!("@" in _ToControl_Map)) MakeToControlTable() for (i = 1; i <= len; i++) { c = substr(S,i,1) if (c == "\\") { # translate escape sequence c = substr(S,++i,1) if (substr(S,i,3) ~ "[0-7][0-7][0-7]") { c = sprintf("%c",c*64 + substr(S,i+1,1)*8 + substr(S,i+2,1)) i+=2 } else if (ind = index("bfnrtls^\\,:0eE",c)) c = substr("\b\f\n\r\t\n ^\\,:\200\033\033",ind,1) else c = "\\" c } else if (c == "^") { c = substr(S,++i,1) if (c in _ToControl_Map) c = _ToControl_Map[c] else c = "^" c } out = out c } return out } ### End-lib tinfo ### Begin-lib tab # @(#) tab 1.1 2001-01-08 # 1995 John H. DuBois III (john@armory.com) # 2001-01-08 Added tabsize parameter to TabEx(). # Expand all tabs in Line to spaces. # If tabsize is passed, tabs are expanded as though tabstops are set every # tabsize character cells. The default for tabsize is 8. function TabEx(Line,tabsize, Segs,i,Num,S) { if (tabsize == "") tabsize = 8 Num = split(Line,Segs,"\t") Line = "" for (i = 1; i < Num; i++) { S = Segs[i] if (tabsize) Line = Line S sprintf("%*s",tabsize - length(S) % tabsize,"") else Line = Line S } return Line Segs[Num] } # In multiline string OptDesc, insert string Tab at the start of every line # that begins with any of the characters in TabChars. # If the first character in Tab also occurs in TabChars, make sure it is the # last character in TabChars to avoid double indenting. function TabOpts(OptDesc,Tab,TabChars, i,len,c) { len = length(TabChars) for (i = 1; i <= len; i++) { c = substr(TabChars,i,1) sub("^" c,Tab c,OptDesc) gsub("\n" c,"\n" Tab c,OptDesc) } return OptDesc } # TabPad: pad S to TabLen tabwidths with tabs function TabPad(S,TabLen) { for (TabLen -= int(length(S)/8); TabLen >= 1; TabLen--) S = S "\t" return S } # TabInsPad: Replace the leftmost string of tabs in S with a string of tabs # such that the part of the string to the left of the tabs has been padded to # TabLen tabwidths. If the string has no tabs, the entire string is taken to # be the part to the left of the tabs. If the part of the string to the left # of the tabs is already long enough, a single tab is still added. function TabInsPad(S,TabLen, left,right,tablen) { pos = index(S,"\t") if (!pos) left = S else { left = substr(S,1,pos-1) right = substr(S,pos+1) } sub("\t+","",right) # get rid of prev. tabs tabs = TabLen - int(length(left)/8) if (tabs == 0) tabs = 1 for (; tabs; tabs--) left = left "\t" return left right } ### End-lib tab ### Begin-lib ProcArgs # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) ProcArgs 1.8 2003-10-07 # 1992-02-29 john h. dubois iii (john@armory.com) # 1993-07-18 Added "#" arg type # 1993-09-26 Do not count h option against MinArgs # 1994-01-01 Stop scanning at first non-option arg. Added ">" option type. # Removed meaning of "+" or "-" by itself. # 1994-03-08 Added & option and *()< option types. # 1994-04-02 Added NoRCopt to Opts() # 1994-06-11 Mark numeric variables as such. # 1994-07-08 Opts(): Do not require any args if h option is given. # 1995-01-22 Record options given more than once. Record option num in argv. # 1995-06-08 Added ExclusiveOptions(). # 1996-01-20 Let rcfiles be a colon-separated list of filenames. # Expand $VARNAME at the start of its filenames. # Let varname=0 and [-+]option- turn off an option. # 1996-05-05 Changed meaning of 7th arg to Opts; now can specify exactly how # many of the vars should be searched for in the environment. # Check for duplicate rcfiles. # 1996-05-13 Return more specific error values. Added AllowUnrecOpt. # 1996-05-23 Check type given for & option # 1996-06-15 Re-port to awk # 1996-10-01 Moved file-reading code into ReadConfigFile(), so that it can be # used by other functions. # 1996-10-15 Added OptIntroChars # 1996-11-01 Added exOpts arg to Opts() # 1996-11-16 Added ; type # 1996-12-08 Added Opt2Set() & Opt2Sets() # 1996-12-27 Added CmdLineOpt() # 1997-02-22 Remove packed elements. # 1997-02-28 Make sequence # for rcfiles & environ be "f" and "e". # Replaced CmdLineOpt() with OptsGiven(). # 1997-05-26 Added mangleHelp(). # 1997-08-20 Added end-of-line escape char processing to ReadConfigFile() # 1997-12-21 Improved mangleHelp() # 1998-04-19 1.2 Moved default file reading code into ReadLogicalLines[]. # Added quote processing. # 1999-02-01 1.2.1 Added Opt2Arr() # 1999-03-25 1.2.2 Removed compress argument to ProcArgs(). # Opts() now compresses ARGV. # 1999-05-03 1.2.3 Declare Lines[] in InitOpts() # 1999-05-05 1.2.4 Bugfix in option deletion # 2000-01-19 1.2.5 If -x is given, always print value assignments. # 2000-07-23 1.2.6 Fixed return value of Opt2Arr() # 2000-10-15 1.2.7 Fix test for ) value # 2001-04-05 1.2.8 Improved mangleHelp(). ProcArgs() sets global _optIntroChar. # 2001-06-05 1.3 Added Sep option to Opt2Arr(). Set (optname,1). # Added addPseudoOpt(), prioOpt(), elideOpts(). # Made OptsGiven() return a number rather than just true/false # and added Results[] parameter. # Allow unrecognized options only if they are the first option # in an option string. # 2001-06-13 1.3.1 Minor tweaks to reduce the number of complaints from # gawk --lint # 2001-10-26 1.4 Separated recording of sequence number from recording of data # source. Allow multiple instances of options to be set in a # config file. Record file for each option instance. # Added "a" and "p" source types. # 2002-02-26 1.4.1 Fixed return value of Opt2Set() # 2002-06-11 1.4.2 Global var fix # 2002-11-23 1.4.3 Convert \- to - in mangleHelp(). # 2003-04-05 1.4.4 Fixed some global vars to be local. # Fixed extra newline added by mangleHelp(). # 2003-05-13 1.5 Added Pat parameter to varValOpt() # 2003-06-06 1.6 Changed the way multiple instance of Boolean options are dealt # with. # 2003-09-30 1.7 Renamed ProcArgs to _procArgv and changed the way it processes # it option-list, as the first step towards a new # option-specification syntax. # 2003-10-07 1.8 Added errVar parm to varValOpt() # todo: Let gnu-style options (--varname, --varname=value) be given, where # todo: varname is the variable name as used by Opts(). # todo: Make --help equivalent to -h and implement --version as universal # todo: option, for help2man. # todo: Need some way of unsetting options that take values (not just setting # them to a null value), so cmd line opts can unset rcfile assignments. # Let a value of "-" unset numeric options, and a null string unset # strings that are not allowed to be 0 length. # For strings that are allowed to b 0 length, maybe +opt (but should be # compatible with gnu). # todo: If -x is given, also print assignments to options that do not have # variables. Maybe make option letter be default variable name. # todo: Use exclusive-option info to prevent setting of options found in # lower-priority sources that are incompatible with options found in # higher-priority sources. # todo: Ignore case in variable names. # todo: New option/value-name syntax, e.g. "x>debug|r;reports|c=autocase" # (varname always required, = specifying a flag) - other flags to # indicate whether option should be allowed on # cmd-line/rcfile/environment/cgi-environment # todo: Add some syntax for indicating that a single instance of an option may # include multiple values, and the pattern/chars that will separate them. # todo: If no x option specified, use it to turn on debugging and set global # Debug flag. If no n option, use it to prevent reading of rcfile. # todo: Opts() should really just set ARGC directly. # todo: Ability to extract named options from the environment as QUERY_varname # instead of just varname, to make use of awk programs directly as cgi # programs easier. # todo: Let integer options take values in alternate bases. # todo: Add a character type, understanding escape sequences, var. integer # value specs, etc. # todo: Make -? a synonym for -h. # _procArgv(): Process arguments given on the command line. # This function retrieves options given as initial arguments on the command # line. # # Strings in argv[] which begin with "-" or "+" are taken to be # strings of options, except that a string which consists solely of "-" # or "+" is taken to be a non-option string; like other non-option strings, # it stops the scanning of argv and is left in argv[]. # An argument of "--" or "++" also stops the scanning of argv[] but is removed. # If an option takes an argument, the argument may either immediately # follow it or be given separately. # "-" and "+" options are treated the same. "+" is allowed because most awks # take any -options to be arguments to themselves. gawk 2.15 was enhanced to # stop scanning when it encounters an unrecognized option, though until 2.15.5 # this feature had a flaw that caused problems in some cases. See the # OptIntroChars parameter to explicitly set the option-introduction characters. # Note that POSIX 1003.1-2001 generally prohibits introducing options with "+". # # Option processing will stop with the first unrecognized option, just as # though -- or ++ was given except that the unrecognized option will not be # removed from ARGV[]. Normally an error value is returned in this case, # but see the description of AllowUnrecOpt. # Input variables: # argv[] contains the arguments. # argc is the number of elements in argv[]. # # OptParms[] describes the legal options and their type. # See _parseGetoptsOptList() for further description. # # If AllowUnrecOpt is true, it is not an error for an unrecognized option to # be found as long as it is the first option in an option string; instead # it simply causes option processing to stop without an error as though the # argument that contained the unrecognized option did not begin with an option # introduction character. # # If OptIntroChars is not a null string, it is the set of characters that # indicate that an argument is an option string if the string begins with one # of the characters. The default is "-+". A string consisting solely of two # of the same option-introduction characters stops the scanning of argv[]. # Output variables: # Options and their arguments are deleted from argv[]. # Note that this means that there may be gaps left in the indices of argv[]. # All option date is stored in Options[], as follows: # # The first time a particular option is given, Options[option-name] and # Options[option-name,1] are set to the value of the argument given # for it, and Options[option-name,"count"] is (initially) set to 1. # If it is given more than once, for the second and later instances # Options[option-name,"count"] is incremented and the value of the argument # given for it is assigned to Options[option-name,instance] where instance is 2 # for the second occurrence of the option, etc. # In other words, the first time an option with a value is encountered, the # value is assigned both to an index consisting only of its name and to an # index consisting of its name and instance number; for any further # occurrences of the option, the value is assigned only to the index that # includes the instance number. All values given for an option are stored. # If a program uses multiple instance of an option but should only pay # attention to instances that occur in the first venue in which an option is # found, they must do so explicitly. # The value of a Boolean (flag) option is 1 if it is given normally, and 0 if # it is turned off by being followed with a "-". In other words, if -x is # given, the "x" option gets a value of 1; if -x- is given, the "x" option gets # a value of 0. Other than this, the only difference between Boolean options # and value options is that Options[option-name] is left unset if the first # instance of an option is one that is turned off. This allows an option to be # checked for with the test "if (option-name in Options)", while also allowing # an option that is turned on in a config file to be turned off on the command # line. The values for Boolean options that include instance numbers, # including that for instance 1, are given the 1 or 0 value. To explicitly # check for whether the first instance of an option was turned off (as opposed # to the option not being given at all), check for instance 1 of it. This can # be useful for Boolean options that are turned on by default. # Options[option-name,"source",instance] is set to the source of data for each # instance of the option (the venue in which that instance of the option was # encountered). The possible values are "f" (config file), "e" (environment), # "a" (argv[]), and "p" (pseudo-opt). # If an option is set from a config file, Options[option-name,"file",instance] # is set to the filename. # The sequence number for each option is stored in # Options[option-name,"num",instance], where instance is 1 for the first # occurrence of the option, etc. The sequence number tells the position in a # given venue (source) in which the option occurred; it starts at 1 and is # incremented for each option found, both those that have a value and those # that do not. It starts over at 1 when the next venue is processed. # Return value: # The number of arguments left in argv is returned (should always be at least # 1). # If an error occurs, the global string _procArgv_err is set to an error # message and a negative value is returned. # # Globals: # _procArgv_err (see above). # The last option-introduction character processed is stored in the global # _optIntroChar for use by error messages. # Current error values: # -1: option that required an argument did not get it. # -2: argument of incorrect type supplied for an option. # -3: unrecognized (invalid) option. function _procArgv(argc,argv,OptParms,Options,AllowUnrecOpt,OptIntroChars, ArgNum,ArgsLeft,Arg,ArgLen,ArgInd,Option,NumOpt,Value,HadValue, NeedNextOpt,GotValue,OptionNum,c,OptTerm,OptIntroCharSet) { # ArgNum is the index of the argument being processed. # ArgsLeft is the number of arguments left in argv. # Arg is the argument being processed. # ArgLen is the length of the argument being processed. # ArgInd is the position of the character in Arg being processed. # Option is the character in Arg being processed. # NumOpt is true if a numeric option may be given. # Value is the value given with an option. ArgsLeft = argc NumOpt = "&" in OptParms OptionNum = 0 # Build option specifier sets if (OptIntroChars == "") OptIntroChars = "-+" while (OptIntroChars != "") { c = substr(OptIntroChars,1,1) OptIntroChars = substr(OptIntroChars,2) OptIntroCharSet[c] # Option introduction character set OptTerm[c c] # Option terminator string set } for (ArgNum = 1; ArgNum < argc; ArgNum++) { Arg = argv[ArgNum] if (length(Arg) < 2 || \ !((_optIntroChar = substr(Arg,1,1)) in OptIntroCharSet)) break # Not an option; quit if (Arg in OptTerm) { # Option list explicitly terminated delete argv[ArgNum] # Discard Option List Terminator ArgsLeft-- break } ArgLen = length(Arg) # For each character in this string after the option intro character... for (ArgInd = 2; ArgInd <= ArgLen; ArgInd++) { Option = substr(Arg,ArgInd,1) if (NumOpt && Option ~ /[-+.0-9]/) { # If this option is a numeric option, make its flag be & Option = "&" # Prefix Arg with a char so that ArgInd will point to the # first char of the numeric option. Arg = "&" Arg ArgLen++ } # Get type of option. Disallow & as literal flag. else if (!((Option,"type") in OptParms) || Option == "&") { # Unrecognized options may be allowed, but only if they # occur at the start of an option string, because there is # no means to pass the offset of the problematic option back. if (AllowUnrecOpt && ArgInd == 2) return ArgsLeft else { _procArgv_err = "Invalid option: " _optIntroChar Option return -3 } } # Find what the value of the option will be if it takes one. # NeedNextOpt is true if the option specifier is the last char of # this arg, which means that if the option requires a value it is # the following arg. if (NeedNextOpt = (ArgInd >= ArgLen)) { # Value is the following arg if (GotValue = ((ArgNum + 1) < argc)) Value = argv[ArgNum+1] } else { # Value is included with option Value = substr(Arg,ArgInd + 1) GotValue = 1 } # _assignVal returns: negative value on error, # 0 if option did not require an argument, # 1 if it did & used the whole arg, # 2 if it required just one char of the arg. if (HadValue = _assignVal(Option, Value, Options, OptParms, GotValue, "", ++OptionNum, "a", "", !NeedNextOpt, _optIntroChar)) { if (HadValue < 0) # error occurred return HadValue if (HadValue == 2) ArgInd++ # Account for the single-char value we used. else { if (NeedNextOpt) { # Last option spec was the last char of its arg. # Since this option required a value, # it took the following arg as value. # Discard Option; code we break to will discard value delete argv[ArgNum++] ArgsLeft-- } break # This option has been used up } } } # Do not delete arg until after processing of it, so that if it is not # recognized it can be left in ARGV[]. delete argv[ArgNum] # Discard Option or Option Value ArgsLeft-- } return ArgsLeft } # _parseGetoptsOptList(): Parse a getopts-style option list. # The option list is a string which contains all of the possible command line # options. # A character followed by certain characters indicates that the option takes # an argument, with type as follows: # : String argument # ; Non-empty string argument # * Numeric argument (may include decimal point and fraction) # ( Non-negative numeric argument # ) Positive numeric argument # # Integer argument # < Non-negative integer argument # > Positive integer argument # The only difference the type of argument makes is in the runtime argument # error checking that is done. # # The & option is a special case used to get numeric options without the # user having to give an option character. It is shorthand for [-+.0-9]. # If & is included in optlist and an option string that begins with one of # these characters is seen, the value given to "&" will include the first # char of the option. & must be followed by a type character other than ":" # or ";". # Note that if e.g. &> is given, an option of -.5 will produce an error. # Also note that POSIX 1003.1-2001 generally prohibits options consisting of # digit-strings. # In addition to letters and numerals, ~@%_=,./ are reasonable characters to # use as options. # Input variables: # # OptList is the getopts-style option list. # # Output variables: # # For each option specified, the type of the option is stored in OptParms[] by # assigning values to various indexes of the form OptParms[option-letter,parm] # as follows: # # Type Indexes set # ----- ---------------------------- # flag "type" = "flag" # : "type" = "string", "minlen" = 0 # ; "type" = "string", "minlen" = 1 # * "type" = "number" # ( "type" = "number", "ge" = 0 # ) "type" = "number", "gt" = 0 # # "type" = "integer" # < "type" = "integer", "ge" = 0 # > "type" = "integer", "gt" = 0 # # Return value: # On success, a null string. # On failure, an error description. function _parseGetoptsOptList(OptList, OptParms, i, len, optChar, typeChar, type) { len = length(OptList) for (i = 1; i <= len; i++) { optChar = substr(OptList, i, 1) if (index(":;*()#<>-+;|-", optChar)) return "Disallowed option character: " optChar else { typeChar = substr(OptList, i+1, 1) if (typeChar == "") # end of string type = "flag" else if (index(":;", typeChar)) { type = "string" if (typeChar == ":") OptParms[optChar, "minlen"] = 0 else OptParms[optChar, "minlen"] = 1 } else if (index("*()", typeChar)) type = "number" else if (index("#<>", typeChar)) type = "integer" else type = "flag" if (optChar == "&" && type != "integer" && type != "number") return "Option & must have integer or number type" if (index("(<", typeChar)) OptParms[optChar, "ge"] = 0 else if (index(")>", typeChar)) OptParms[optChar, "gt"] = 0 OptParms[optChar, "type"] = type i += type != "flag" } } return "" } # _parseVarDesc(): Parse a variable description. # Variable descriptions have this format: # option-character,option-name,option-type[,extra-parameter-name=value,...] # option-character is a single character that may be used to give the option # on the command line either in traditional POSIX syntax # (-option-letter[value]) or in GNU-style syntax (--option-letter[=value]) # option-name is a name that may be used to give the option in any venue: # on the command line as a GNU-style option, in a config file, or (if # allowed) in the environment. # At least one of option-character and option-name must be given. # If a single-character option-name is given, it must not be the same as # the option-character for any other option. # option-type is the type of the option. It is required. The types are: # string - any string # number - any number representable in a double # integer - any whole number representable in an int # Any parameters given after the third have the form "name=value". # Only specific names are allowed, and some names are only allowed for # certain types. # The parameters that may be specified are: # ge: A value that a number/integer must be greater than or equal to # gt: A value that a number/integer must be greater than # minval: A mininum length for strings # sources: A string of characters represending the venues in which the # option-name may be given. If not given, the option may be given # either in a config file (if any are specified) or on the command # line. The characters are: # f: The option may be specified in a config file # a: The option may be given as an argument on the command line # e: The option may be given in the environment # # Input variables: # varDesc is the variable description. # # Output variables: # The option parameters are stored in OptParms[]. # For each option specified, the type of the option is stored in OptParms[] by # assigning values to various indexes of the form OptParms[option-letter,parm] # as follows: # # !!! Fill this in # # Return value: # On success, a null string. # On failure, a string describing the problem. function _parseVarDesc(varDesc, OptParms, parms, parmFields, numFields, char, name, type, pos, parmName, val, err, source, i, len, optNum) { numFields = split(varDesc, parmFields, ",") char = parmFields[1] name = parmFields[2] type = parmFields[3] if (char == "" && name == "" || type == "") return "Invalid option description; "\ "must include a type and a character or name" if (type != "string" && type != "number" && type != "integer") return "Invalid type (must be string, number, or integer): " type parms["type"] = type # Process parameters and check the values assigned to them. # The results are stored in parms[] and in the variable "source". for (fieldNum = 4; fieldNum <= numFields; fieldNum++) { # Split parameter into name and value parmName = parmFields[fieldNum] pos = index(parmName, "=") if (!pos) return "No value assigned to parameter \"" parmName "\"" val = substr(parmName, pos+1) parmName = substr(parmName, 1, pos-1) if (parmName == "ge" || parmName == "gt") { if (type != "integer" && type != "number") return "Parameter \"" parmName \ "\" cannot be applied to type " type if (val !~ /^[-+]?([0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)$/) return "Parameter \"" parmName "\" must have numeric value" parms[parmName] = val+0 } else if (parmName == "minlen") { if (type != "string") return "Parameter \"" parmName \ "\" cannot be applied to type " type if (val !~ /^[0-9]+$/) return "Parameter \"" parmName \ "\" must have non-negative integer value" parms[parmName] = val+0 } else if (parmName == "sources") { if (val !~ /^[fae]+$/) return "Parameter \"" parmName "\" must have a value "\ "consisting of one or more of the letters: fae" source = val } else return "Unknown parameter name \"" parmName "\"" } optNum = ++OptParms["numopts"] if (char != "") { if (index(":;*()#<>-+;|-", char)) return "Disallowed option character: " char OptParms[char,"arg"] _storeParms(char, parms, OptParms) OptParms["options", optNum, "char"] = char } if (name != "") { if (name !~ /^[a-z0-9][-_a-z0-9]*$/) return "Invalid option name: " name # Named parameters may by default be given in a file or on the command # line. if (source == "") sources = "fa" len = length(sources) for (i = 1; i <= len; i++) OptParms[char,substr(sources,i,1)] _storeParms(name, parms, OptParms) OptParms["options", optNum, "name"] = name } if (char != "" && name != "") { OptParms[char,"alt"] = name OptParms[name,"alt"] = char } return "" } function _storeParms(name, parms, out, parm) { for (parm in parms) out[name, parm] = parms[parm] } # _assignVal(): Store an option. # Assignment to values in Options[] occurs only in this function. # # Input variables: # Option: Option specifier character. # Value: Value to be assigned to option, if it takes a value. # OptParms[]: Option descriptions. See _parseGetoptsOptList(). # GotValue: Whether any value is available to be assigned to this option. # Name: Name of option being processed. # OptionNum: Number of this option (starting with 1) for the source it comes # from. For example, each option found on the command line should get a # successive integer, as should each option found in a given config file. # SingleOpt: true if the value (if any) that is available for this option was # given as part of the same command line arg as the option. Used only for # options from the command line. # specGiven is the option specifier character used, if any (e.g. - or +), # for use in error messages. # # Output variables: # Options[]: Options array to return values in. # # Global variables: # On failure, a description of the error is stored in _procArgv_err # # Return value: # On success: # 0 if option did not require an argument # 1 if option did require an argument and used the entire argument # 2 if option required just one character of the argument # On failure: A negative value. # Current error values: # -1: Option that required an argument did not get it. # -2: Value of incorrect type supplied for option. # -3: Assertion failure function _assignVal(Option,Value,Options,OptParms,GotValue,Name,OptionNum, Source,File,SingleOpt,specGiven, Instance,UsedValue,Err,numericType,type) { if (!((Option,"type") in OptParms)) { # This should never happen _procArgv_err = "_assignVal(): No type for option '" Option "'!" return -3 } type = OptParms[Option, "type"] numericType = type == "integer" || type == "number" # If argument is of any of the types that takes a value... if (type != "flag") { if (!GotValue) { if (Name != "") _procArgv_err = "Variable requires a value -- " Name else _procArgv_err = "option requires an argument -- " Option return -1 } if ((Err = _checkValue(OptParms,Value,Option,Name,specGiven)) != "") { _procArgv_err = Err return -2 } # Mark this as a numeric variable; will be propagated to Options[] val. if (numericType) Value += 0 UsedValue = 1 } else { # Option is a flag if (Source != "a" && Value != "") { # If this is an environ or rcfile assignment & it was given a value # despite being a flag... Value = !(Value == "0" || Value == "-") UsedValue = 1 } else if (OptionNum && SingleOpt && substr(Value,1,1) == "-") { # If this is a command line flag and has a - following it in the # same arg, the flag is being turned off. Value = 0 UsedValue = 2 } else { # If this is a flag assignment w/o a value, it is being turned on. Value = 1 UsedValue = 0 } } if ((Instance = ++Options[Option,"count"]) == 1 && (type != "flag" || Value)) Options[Option] = Value Options[Option,Instance] = Value Options[Option,"num",Instance] = OptionNum Options[Option,"source",Instance] = Source if (Source == "f") Options[Option,"file",Instance] = File return UsedValue } # _checkValue is used to check that an appropriate value is assigned to an # option that takes a value. # OptChar is the option letter # Value is the value being assigned # Name is the var name of the option, if any # OptParms[] describes the options # specGiven is the option specifier character use, if any (e.g. - or +), # for use in error messages. # Return value: # Null on success, error string on error function _checkValue(OptParms,Value,OptChar,Name,specGiven, Err,ErrStr,V,type,minlen,ge,gt) { type = OptParms[OptChar, "type"] if (type == "string") { if (length(Value) < (minlen = OptParms[OptChar,"minlen"])) if (minlen == 1) Err = "must be a non-empty string" else Err = "must be a string of length at least " minlen } else if (type != "integer" && type != "number") # This should never happen return "_checkValue(): Invalid type '" type "' for option '" OptChar \ "'!" # All remaining types are numeric. # A number begins with optional + or -, and is followed by a string of # digits or a decimal with digits before it, after it, or both else if (Value !~ /^[-+]?([0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)$/) Err = "must be a number" else if (type == "integer" && index(Value,".")) Err = "may not include a fraction" else { V = Value + 0 if ((OptChar, "gt") in OptParms) gt = OptParms[OptChar, "gt"] else if ((OptChar, "ge") in OptParms) ge = OptParms[OptChar, "ge"] if (V < 0 && ge != "" && ge == 0) # >= 0 Err = "may not be negative" else if (V <= 0 && gt != "" && gt == 0) # > 0 Err = "must be a positive number" else if (ge != "" && V < ge) Err = "must be a number >= " ge else if (gt != "" && V <= gt) Err = "must be a number > " gt } if (Err != "") { ErrStr = "Bad value \"" Value "\". Value assigned to " if (Name != "") return ErrStr "variable " substr(Name,1,1) " " Err else { if (OptChar == "&") OptChar = Value return ErrStr "option " specGiven substr(OptChar,1,1) " " Err } } else return "" } # Note: only the above functions are needed by _procArgv. # The rest of these functions call _procArgv() and also do other # option-processing stuff. # Opts: Process command line arguments. # Opts processes command line arguments using _procArgv() # and checks for errors. If an error occurs, a message is printed # and the program is exited. # # Input variables: # Name is the name of the program, for error messages. # Usage is a usage message, for error messages. # OptList the option description string, as used by _procArgv(). # MinArgs is the minimum number of non-option arguments that this # program should have, non including ARGV[0] and +h. # If the program does not require any non-option arguments, # MinArgs should be omitted or given as 0. # rcFiles, if given, is a colon-seprated list of filenames to read for # variable initialization. If a filename begins with ~/, the ~ is replaced # by the value of the environment variable HOME. If a filename begins with # $, the part from the character after the $ up until (but not including) # the first character not in [a-zA-Z0-9_] will be searched for in the # environment; if found its value will be substituted, if not the filename will # be discarded. # rcfiles are read in the order given. # Values given in them will not override values given on the command line, # and values given in later files will not override those set in earlier # files, because _assignVal() will store each with a different instance index. # The first instance of each variable, either on the command line or in an # rcfile, will be stored with no instance index, and this is the value # normally used by programs that call this function. # VarNames is a comma-separated list of variable names to map to options, # in the same order as the options are given in OptList. # If EnvSearch is given and nonzero, the first EnvSearch variables will also be # searched for in the environment. If set to -1, all values will be searched # for in the environment. Values given in the environment will override # those given in the rcfiles but not those given on the command line. # NoRCopt, if given, is an additional letter option that if given on the # command line prevents the rcfiles and environment from being read. # See _procArgv() for a description of AllowUnRecOpt and optIntroChars, and # ExclusiveOptions() for a description of exOpts. # Special options: # If x is made an option and is given, some debugging info is output. # h is assumed to be the help option. # Global variables: # The command line arguments are taken from ARGV[]. # The arguments that are option specifiers and values are removed from # ARGV[], leaving only ARGV[0] and the non-option arguments, with the # non-option arguments moved down so that they start at index 1. # The number of elements in ARGV[] should be in ARGC. # After processing, ARGC is set to the number of elements left in ARGV[]. # The option values are put in Options[]. # On error, Err is set to a positive integer value so it can be checked for in # an END block. # _procArgv_err may be set by this function, and is also used by it when it is # set by other functions (InitOpts()). # Return value: The number of elements left in ARGV is returned. # todo: Maybe make variable names case-independent? function Opts(Name,Usage,OptList,MinArgs,rcFiles,VarNames,EnvSearch,NoRCopt, AllowUnrecOpt,optIntroChars,exOpts, Var, Var2Char, VarInfo, ArgsLeft, e, Debug, OptParms, errStr) { if (MinArgs == "") MinArgs = 0 if ((errStr = _parseGetoptsOptList(OptList NoRCopt, OptParms)) != "") { printf "%s: Option list specification error: %s.\n", Name, errStr >> "/dev/stderr" Err = 1 exit 1 } # Process command line arguments ArgsLeft = _procArgv(ARGC, ARGV, OptParms, Options, AllowUnrecOpt, optIntroChars) if ("x" in Options) { Debug = Options["x"] if (Debug == "") Debug = 1 } else Debug = 0 if (ArgsLeft < (MinArgs+1) && !("h" in Options)) { if (ArgsLeft >= 0) { _procArgv_err = "Not enough arguments" Err = 4 } else Err = -ArgsLeft print mangleHelp(sprintf("%s: %s.\nUse -h for help.\n%s", Name,_procArgv_err,Usage)," \t\n[") >> "/dev/stderr" exit 1 } if (ArgsLeft > 1) paPackArr(ARGV,ArgsLeft-1,1) split("",Var2Char); # Let awk know this is an array split("",VarInfo); # Let awk know this is an array mkVarMap(OptList,VarNames,EnvSearch,Var2Char,VarInfo) # Process environment & rcfiles if (rcFiles != "" && (NoRCopt == "" || !(NoRCopt in Options)) && (e = InitOpts(rcFiles,Options,Var2Char,OptParms,VarInfo,Debug)) < 0) { print Name ": " _procArgv_err ".\nUse -h for help." >> "/dev/stderr" Err = -e exit 1 } if (Debug) for (Var in Var2Char) if (Var2Char[Var] in Options) printf "(%s) %s=%s\n", Var2Char[Var],Var,Options[Var2Char[Var]] >> "/dev/stderr" else printf "(%s) %s not set\n",Var2Char[Var],Var >> "/dev/stderr" if ((exOpts != "") && ((_procArgv_err = ExclusiveOptions(exOpts,Options)) != "")) { printf "%s: Error: %s\n",Name,_procArgv_err >> "/dev/stderr" Err = 1 exit 1 } return ArgsLeft } # Packs Arr[], which should have integer indices starting at or above n, # to contiguous integer indices starting with n. # If n is not given it defaults to 0. # Num should be the number of elements in Arr. function paPackArr(Arr,Num,n, NewInd,OldInd) { NewInd = OldInd = n+0 for (; Num; Num--) { while (!(OldInd in Arr)) OldInd++ if (NewInd != OldInd) { Arr[NewInd] = Arr[OldInd] delete Arr[OldInd] } OldInd++ NewInd++ } } # mangleHelp(): Convert a help message so that options are displayed with # the appropriate option introduction character. # If the last option given was introduced with "+" or this is awk and no # options were given, convert -x options in a help message to +x. # If whitespace is non-null, it is the set of characters that may precede an # option indicator to indicate that it is such. # The default is newline, space, or tab. # -x options may be escaped so that they are not acted on by preceding them # with "\". All instances of \- are converted to -. # The output will *not* have a trailing newline, even if the input did. function mangleHelp(message,whitespace, i,wChar,elem,j,n) { if (_optIntroChar == "+" || _optIntroChar == "" && ARGV[0] == "awk") { if (whitespace == "") whitespace = " \t\n" # For each possible whitespace character... for (i = 1; (wChar = substr(whitespace,i,1)) != ""; i++) { n = split(message,elem,"\\" wChar "-") message = elem[1] # Replace all -x except -- with +x for (j = 2; j <= n; j++) if (substr(elem[j],1,1) == "-") message = message wChar "-" elem[j] else message = message wChar "+" elem[j] } } # The \-x -> -x conversion needs to be done regardless of whether the # option introduction characters are being converted. # gsub is done line at a time rather than on the whole message at once # because some awks die when trying to do a gsub on a very long string. # Yuck! n = split(message,elem,"\n") if (elem[n] == "") n-- message = "" for (i = 1; i <= n; i++) { gsub(/\\-/,"-",elem[i]) message = message elem[i] if (i < n) message = message "\n" } return message } # Remove all of the options in optList from a "Usage:" (not help) message. function elideOpts(optList,usageMessage, i,c,len) { len = length(optList) for (i = 1; i <= len; i++) { c = substr(optList,i,1) # Remove character from flags list usageMessage = \ gensub("(\\[[^<>]*)" c "([^<>]*\\])","\\1\\2",1,usageMessage) # Remove option + value description, e.g. [-optname] sub("\\[-" c "[^]]*\\] *","",usageMessage) } # Remove empty flags lists gsub(/-?\[-?\] */,"",usageMessage) # Remove now-empty lines gsub(/\n[ \t]+\n/,"\n",usageMessage) return usageMessage } # ReadConfigFile(): Read a file containing var/value assignments, in the form # . # Whitespace (spaces and tabs) around a variable (leading whitespace on the # line and whitespace between the variable name and the assignment character) # is stripped. Lines that do not contain an assignment operator or which # contain a null variable name are ignored, other than possibly being noted in # the return value. # Input variables: # File is the file to read. # AssignOp is the assignment string. The first instance of AssignOp on a line # separates the variable name from its value. # If StripWhite is true, whitespace around the value (whitespace between the # assignment char and the value, and whitespace at the end of the logical # line) is stripped. # VarPat is a pattern that variable names must match. # Example: "^[a-zA-Z][a-zA-Z0-9]+$" # It should ensure that variable names cannot include SUBSEP. # If FlagsOK is true, variables are allowed to be "set" by being put alone on # a line; no assignment operator is needed. These variables are set in # the output array with a null value. Lines containing nothing but # whitespace are still ignored. # Comment, Escape, Quote: See ReadLogicalLine(). # Output variables: # Values[] contains the assignments. The first time an assignment to a given # variable name is encountered, it is stored under two indexes, # and ,1. Any further assignments to a # given variable are stored under ,. # Lines[] contains all of the same indexes that Values[] does, with the value # being the line number that the variable assignment occurred on. A flag # set is recorded by giving it an index in Lines[] but not in Values[]. # Counts[] is set to the number of times a given variable was # encountered. Its indexes form the set of all variable names encountered. # Return value: # If any errors occur, a string consisting of descriptions of the errors # separated by newlines is returned, with each string preceded by a numeric # value and a colon. The following strings may change and new values may be # added but the currently assigned numeric values will not: # -1:IO error reading from file # -2:Bad variable name # -3:Bad assignment # -4:Last line of file ended with line escape char or within quoted section # If no errors occur, the number of physical lines read is returned. function ReadConfigFile(Values,Lines,Counts,File,Comment,AssignOp,Escape,Quote, StripWhite,VarPat,FlagsOK,Debug, Line,Errs,AssignLen,LineNum,Var,Val,inLines,lineNums,num,Pos,instance, i) { split("",Lines) # let awk know this is an array split("",inLines) # let awk know this is an array split("",lineNums) # let awk know this is an array Errs = "" AssignLen = length(AssignOp) if (VarPat == "") VarPat = "." # null varname not allowed num = ReadLogicalLines(File,Comment,Escape,Quote,inLines,lineNums,Debug) if (num < 0) return num if (Debug) printf "Processing configuration file %s\n",File >> "/dev/stderr" for (i = 1; i <= num; i++) { Line = inLines[i] Pos = index(Line,AssignOp) if (Pos) { Var = substr(Line,1,Pos-1) Val = substr(Line,Pos+AssignLen) if (StripWhite) { sub("^[ \t]+","",Val) sub("[ \t]+$","",Val) } } else { Var = Line # If no value, var is entire line Val = "" } LineNum = lineNums[i] if (!FlagsOK && Val == "") { Errs = Errs \ sprintf("\n-3:Bad assignment on line %d of file %s: %s", LineNum,File,Line) continue } sub("[ \t]+$","",Var) if (Var !~ VarPat) { Errs = \ Errs sprintf("\n-2:Bad variable name on line %d of file %s: %s", LineNum,File,Var) continue } if ((instance = ++Counts[Var]) == 1) { Lines[Var] = LineNum if (Pos) Values[Var] = Val } Lines[Var,instance] = LineNum if (Pos) Values[Var,instance] = Val } return (Errs == "") ? LineNum : substr(Errs,2) # Skip first newline } # ReadLogicalLine: Read a logical line of input from File. In the simplest # case, the next physical line is read and returned. Leading whitespace is # removed. Blank lines (lines that are empty or contain nothing but # whitespace) are skipped. Reading of a logical line is further modified by # the following variables: # Comment is the line-comment character. If it is found as the first non- # whitespace character on a line, the line is ignored. # Escape is the escape character. In # general, the escape character removes the special meaning of the character # that follows it; the escape character itself is removed. Currently, there # are three characters with special meanings: newlines, quotes, and the escape # character itself. If an escape character is followed by a newline, the line # is joined to the following line and the escape character and newline are # removed. An escape character at the end of a comment line does NOT extend # the comment line. A comment character at the start of a physical line that # is being joined to a previous line does not cause the line to be ignored. # If an escape character is followed by another escape character, they are # replaced by one escape character. See below for a description of escaping a # quote character. If an escape character is followed by any other character, # the escape character is simply removed. # Quote is the quote character. From one instance of the quote character # to the next, newlines are preserved. If a physical line ends with quoting in # effect, the next line is joined to it, with a newline embedded between them. # This is different from escaping a newline; in both cases, the lines are # joined, but in quotes the newline is preserved while an escaped newline is # deleted. A quote character may be escaped by the escape character either # inside or outside of a quoted section; an escaped quote character is taken # literally, so it neither starts nor ends a quoted section. Within a quoted # section, this is the only special meaning of the escape character; if the # escape character is followed by any other character, it is treated as a # literal character and included in the returned value. # Comment, Escape, and Quote behave roughly like the #, \, and " characters # in the Bourne shell. If any of them are null, the associated functionality # does not exist. # Global variables: _rll_start is set to the physical line number that the # returned logical line started on. _rll_lnum[] is used to track the current # line number. # Return value: The logical line read. At EOF, a null string is returned. # On error, a space followed by an error message is returned. The error # message begins with a numeric designation, separated from the rest of the # message by a colon. function ReadLogicalLine(File,Comment,Escape,Quote,Debug, line,status,out,inQuote,inEsc,len,i,c) { # Get first line while ((status = (getline line < File)) == 1) { _rll_lnum[File]++ sub("^[ \t]+","",line) # discard leading whitespace # Skip blank & comment lines (break loop only for content lines) if (line != "" && substr(line,1,1) != Comment) break else # Make sure we do not exit with line set to a comment if a comment # is the last line in the file. line = "" } if (status <= 0) { if (Debug > 8) printf "Closing %s (read status %d)\n",File,status >> "/dev/stderr" close(File) _rll_lnum[File] = 0 } if (status < 0) return " 1:Error reading from file " File ":" ERRNO _rll_start = _rll_lnum[File] if (Escape == "" && Quote == "") return line len = length(line) inEsc = inQuote = 0 out = "" for (i = 1; i <= len; i++) { c = substr(line,i,1) if (inEsc) { out = out c inEsc = 0 } else if (c == Escape && (!inQuote || substr(line,i+1,1) == Quote)) inEsc = 1 else if (c == Quote) inQuote = !inQuote else out = out c if (i == len && (inEsc || inQuote)) { if (inQuote) out = out "\n" # If in quotes, want the next non-null line while ((status = (getline line < File)) == 1 && inQuote && \ !length(line)) _rll_lnum[File]++ if (status == 1) _rll_lnum[File]++ else { close(File) _rll_lnum[File] = 0 } if (status < 0) return " -1:Error reading from file " File ":" ERRNO if (!status) { if (inEsc) return \ " -2:Error in file " File ": Escape character at EOF" else return \ " -2:Error in file " File ": EOF within quoted section" } len = length(line) inEsc = i = 0 } } return out } # ReadLogicalLines: Read a file and return its contents in Lines[] with one # logical line per integer index starting at 1. See ReadLogicalLine() # for a description of the other parameters. # The physical line number that each logical line started on is returned in # LineNums[]. # Return value: On success, the number of logical lines read (the highest # index in Lines[]). On failure, a string describing the error, starting with # a negative number. function ReadLogicalLines(File,Comment,Escape,Quote,Lines,LineNums,Debug, num,line) { num = 0 while ((line = ReadLogicalLine(File,Comment,Escape,Quote,Debug)) != "") { if (Debug > 9) printf "Got config line: %s\n",line >> "/dev/stderr" if (substr(line,1,1) == " ") return "-" substr(line,2) Lines[++num] = line LineNums[num] = _rll_start } return num } # mkVarMap: Make a lookup table to map option variable names to option letters. # # Input variables: # # OptList, VarNames, and EnvSearch are as described for Opts(). # # Output variables: # # Var2Char[]: For each variable name in VarNames, an element is stored in # Var2Char[] that maps that variable name to the corresponding option letter. # # VarInfo[]: If OptList specifies a (getopts-style) type character for the # option, that character is stored in VarInfo[] under the same variable-name # index. For variables that should be searched for in the environment (per # EnvSearch), the index VarInfo[variable-name,"e"] is also created, with no # value. # # Globals, return value: None. function mkVarMap(OptList, VarNames, EnvSearch, Var2Char, VarInfo, NumVars,i,optListInd,Vars,Type,Var) { NumVars = split(VarNames,Vars,",") optListInd = 1 if (EnvSearch == -1) EnvSearch = NumVars for (i = 1; i <= NumVars; i++) { Var = Vars[i] Var2Char[Var] = substr(OptList,optListInd++,1) # Get option letter Type = substr(OptList,optListInd,1) # Get option type if (Type ~ "^[:;*()#<>&]$") { VarInfo[Var] = Type optListInd++ } if (i <= EnvSearch) VarInfo[Var,"e"] } } # Get variable assignments from environment and rcfiles. # # Input variables: # # rcFiles is as described for Opts(). # # Var2Char[] maps variable names to option characters. # # OptParms[]: Option parameter information (option type and bounds), as # generated by _parseGetoptsOptList()/_parseVarDesc() and used by _assignVal(). # # VarInfo[] maps variable names to type characters, and also indicates whether # each variable should be searched for in the environment (see mkVarMap()). # # Typically, mkVarMap() will be used to convert an option-list and # variable-name-list into the Var2Char[] and VarInfo[] datasets required by # this function. # # If Debug is true, debugging information is printed to stderr. # # Output variables: # # Data is stored in Options[]. # # Global variables: # # Sets _procArgv_err. Uses ENVIRON[]. # # If anything is read from any of the rcfiles, sets global READ_RCFILE to 1. # # Return value: An integer. # # On failure, one of negative values returned by _assignVal(), or -1 for other # failures. # # On success, 0. function InitOpts(rcFiles, Options, Var2Char, OptParms, VarInfo, Debug, Line, Var, Pos, Type, Ret, i, rcFile, fNames, numrcFiles, filesRead, Err, Values, retStr, Lines, Counts, OptNum, instance, numinst, envvar) { split("",filesRead,"") # make awk know this is an array split("",Counts,"") # make awk know this is an array Ret = 0 # Process environment first, since values assigned there should take # priority OptNum = 0 for (Var in Var2Char) { if ((Var,"e") in VarInfo && Var in ENVIRON && (Err = _assignVal(Var2Char[Var],ENVIRON[Var],Options,OptParms,1,Var, ++OptNum,"e")) < 0) return Err } numrcFiles = split(rcFiles,fNames,":") split("",Values); # Let awk know this is an array split("",Lines); # Let awk know this is an array for (i = 1; i <= numrcFiles; i++) { rcFile = fNames[i] if (substr(rcFile,1,2) == "~/") rcFile = ENVIRON["HOME"] substr(rcFile,2) else if (substr(rcFile,1,1) == "$") { rcFile = substr(rcFile,2) match(rcFile,"^[a-zA-Z0-9_]*") envvar = substr(rcFile,1,RLENGTH) if (envvar in ENVIRON) rcFile = ENVIRON[envvar] substr(rcFile,RLENGTH+1) else continue } if (rcFile in filesRead) continue # rcfiles are liable to be given more than once, e.g. UHOME and HOME # may be the same filesRead[rcFile] retStr = \ ReadConfigFile(Values,Lines,Counts,rcFile,"#","=","\\","\"",0,"",1, Debug) if (Debug > 3) printf "Done processing configuration file.\n" >> "/dev/stderr" if ((retStr+0) > 0) { READ_RCFILE = 1 READ_RCFILE += 0 # so awklint will not complain about single use } else if (retStr+0 < -1) { # If any failure other than cannot read file sub(/^[^:]*:/,"",retStr) _procArgv_err = retStr Ret = -1 } OptNum = 0 for (Var in Counts) if (Var in Var2Char) { numinst = Counts[Var] for (instance = 1; instance <= numinst; instance++) if ((Err = _assignVal(Var2Char[Var], Var in Values ? Values[Var,instance] : "",Options, OptParms,Var in Values,Var,++OptNum,"f",rcFile)) < 0) return Err } else { _procArgv_err = sprintf(\ "Unknown var \"%s\" assigned to on line %d\nof file %s",Var, Lines[Var],rcFile) Ret = -1 } } return Ret } # OptSets is a semicolon-separated list of sets of option sets. # Within a list of option sets, the option sets are separated by commas. For # each set of sets, if any option in one of the sets is in Options[] AND any # option in one of the other sets is in Options[], an error string is returned. # If no conflicts are found, nothing is returned. # Example: if OptSets = "ab,def,g;i,j", an error will be returned due to # the exclusions presented by the first set of sets (ab,def,g) if: # (a or b is in Options[]) AND (d, e, or f is in Options[]) OR # (a or b is in Options[]) AND (g is in Options) OR # (d, e, or f is in Options[]) AND (g is in Options) # An error will be returned due to the exclusions presented by the second set # of sets (i,j) if: (i is in Options[]) AND (j is in Options[]). # todo: make options given on command line unset options given in config file # todo: that they conflict with. # todo: Let a null string given for a ; option unset the option. function ExclusiveOptions(OptSets,Options, Sets,SetSet,NumSets,Pos1,Pos2,Len,s1,s2,c1,c2,ErrStr,L1,L2,SetSets,NumSetSets, SetNum,OSetNum) { NumSetSets = split(OptSets,SetSets,";") # For each set of sets... for (SetSet = 1; SetSet <= NumSetSets; SetSet++) { # NumSets is the number of sets in this set of sets. NumSets = split(SetSets[SetSet],Sets,",") # For each set in a set of sets except the last... for (SetNum = 1; SetNum < NumSets; SetNum++) { s1 = Sets[SetNum] L1 = length(s1) for (Pos1 = 1; Pos1 <= L1; Pos1++) # If any of the options in this set was given, check whether # any of the options in the other sets was given. Only check # later sets since earlier sets will have already been checked # against this set. if ((c1 = substr(s1,Pos1,1)) in Options) for (OSetNum = SetNum+1; OSetNum <= NumSets; OSetNum++) { s2 = Sets[OSetNum] L2 = length(s2) for (Pos2 = 1; Pos2 <= L2; Pos2++) if ((c2 = substr(s2,Pos2,1)) in Options) ErrStr = ErrStr "\n"\ sprintf("Cannot give both %s and %s options.", c1,c2) } } } if (ErrStr != "") return substr(ErrStr,2) return "" } # The value of each instance of option Opt that occurs in Options[] is made an # index of Set[]. # The return value is the number of instances of Opt in Options. function Opt2Set(Options,Opt,Set, count) { if (!(Opt in Options)) return 0 for (count = Options[Opt,"count"]; count >= 1; count--) Set[Options[Opt,count]] return Options[Opt,"count"] } # The value of each instance of option Opt that occurs in Options[] that # begins with "!" is made an index of nSet[] (with the ! stripped from it). # Other values are made indexes of Set[]. # The return value is the number of instances of Opt in Options. function Opt2Sets(Options,Opt,Set,nSet, value,aSet,ret) { ret = Opt2Set(Options,Opt,aSet) for (value in aSet) if (substr(value,1,1) == "!") nSet[substr(value,2)] else Set[value] return ret } # The value of each instance of option Opt that occurs in Options[] is made an # element of OptVals[], with indexes starting with 1. # The return value is the number of instances of Opt in Options. # If Sep is given, the value of each instance is split on this pattern, # and each resulting string is treated as a separate value. # Return value: The number of values stored in OptVals[]. function Opt2Arr(Options,Opt,OptVals,Sep, numInst,instance,i,nVals,elem,valNum) { if (!(Opt in Options)) return 0 i = 0 numInst = Options[Opt,"count"] for (instance = 1; instance <= numInst; instance++) if (Sep) { nVals = split(Options[Opt,instance],elem,Sep) for (valNum = 1; valNum <= nVals; valNum++) OptVals[++i] = elem[valNum] } else OptVals[++i] = Options[Opt,instance] return i } # Returns the number of options in the string OptList that were given, # as indicated by the data in Options[]. # If any of Arg, Env, or File are true, the given opts are only considered to # have been set if they were set in the command line arguments, environment, # or in a configuration file, respectively. # The first instance of each option that was set is set in Results[], with the # values (if any) assigned to it. # Results["list"] is set to the list of options found. function OptsGiven(Options,OptList,Arg,Env,File,Results, numOpt,i,Opt,instance,Source,count) { if (!Arg && !Env && !File) Arg = Env = File = 1 numOpt = length(OptList) count = 0 for (i = 1; i <= numOpt; i++) { Opt = substr(OptList,i,1) for (instance = 1; (Opt,"num",instance) in Options; instance++) { Source = Options[Opt,"source",instance] if (Arg && Source == "a" || File && Source == "f" || Env && Source == "e") { Results[Opt] = Options[Opt,instance] Results["list"] = Results["list"] Opt count++ break } } } return count } # A list of options is passed in the string OptList. # These options are searched for first in the options set on the command # line; if no such were given, then in the options set in the environment; # and if no such were given, then in the options set in the configuration # file(s). The resulting options are set in the array Results[], with their # values (if any) assigned to them. Only the first instances of options are # copied to Results[]. # In other words, when a source is found that has any of the options set in it, # any options set in that source are copied to Results[] and searching ceases. # Results["list"] is set to the list of options found. # Return value: The number of options stored in Results[]. function prioOpt(Options,OptList,Results, num) { if (num = OptsGiven(Options,OptList,1,0,0,Results)) return num else if (num = OptsGiven(Options,OptList,0,1,0,Results)) return num else if (num = OptsGiven(Options,OptList,0,0,1,Results)) return num return 0 } # addPseudoOpt(): Add a pseudo-option. # Input variables: # optDesc is the option description. # Value is the value to assign to it. # Output variables: # The option value is stored in Options[] # Return value: # On success, a null string. # On failure, a string describing the error. function addPseudoOpt(varDesc,Value,Options, optParms, err) { if ((err = _parseVarDesc(varDesc, optParms)) != "") return "Error in pseudo-option description \"" varDesc "\":\n" err if (_assignVal(optParms["options", 1, "char"], Value, Options, optParms, 1, optParms["options", 1, "name"], 0, "p") < 0) return _procArgv_err "." return "" } # varValOpt: Process all instances of an option that takes an option-value of # the form var=value. # Input variables: # Options[] is the option data. # Opt is the option to process. # If Sep is given, it is the var/value separator. It defaults to "=", # and is treated as a regular expression. # If Lower is given, all variable names are pushed to lower case. # If Pat is given, all variable names are required to match it (after # being pushed to lower case, if Lower is true). # If errVar is non-null, this function will not abort and return an error if an # option instance does not include the var/val separator or a bad variable # name is given. Instead, the entire value of the option instance is # assigned to the pseudo-variable name given by errVar. # Output variables: # The results are stored in varVals[] and allVarVals[]. # allVarVals[varname,n] is set to the value given for the nth instance of # varname that is encountered. # allVarVals[varname,"count"] is set to the number of instances of varname that # are given. # varVals[varname] is set identically to varVals[varname,1]. # Return value: # On success, the number of instances of Opt that were encountered. # On failure, -1. In this case, the global variable _varValOpt_err is set to a # string describing the problem. # If errVar is passed, no failure return will occur. function varValOpt(Options,Opt,varVals,allVarVals,Sep,Lower,Pat,errVar, assignment,OptVals,numOpt,i,varname,value) { if (Sep == "") Sep = "=" numOpt = Opt2Arr(Options,Opt,OptVals) for (i = 1; i <= numOpt; i++) { assignment = OptVals[i] if (!(match(assignment,Sep)) && errVar == "") { _varValOpt_err = "No instance of the separator '" Sep \ "' in this assignment: " assignment return -1 } if (RSTART) { varname = substr(assignment,1,RSTART-1) if (Lower) varname = tolower(varname) if (Pat != "" && varname !~ Pat) if (errVar) varname = errVar else { _varValOpt_err = \ "Invalid variable name in this assignment: " \ assignment return -1 } value = substr(assignment,RSTART+RLENGTH) } else { varname = errVar value = assignment } allVarVals[varname,++allVarVals[varname,"count"]] = value if (allVarVals[varname,"count"] == 1) varVals[varname] = value } return numOpt } ### End-lib ProcArgs ### Begin-lib ps # # The current version of this library can be found by searching # http://www.armory.com/~ftp/ # # @(#) ps 2.0 2003-04-24 # 1996-10-09 john h. dubois iii (john@armory.com) # 1996-02-11 Added Debug flag. # 1996-05-09 Added COMM field. # 1996-05-23 Added selection args, and saving of "ps" PID. # 1996-05-25 Added makePSline() # 1996-10-09 Added RUSER field. # 1996-12-14 Added CMDT field. # 1998-04-30 1.2 Added makePSformat() # 2001-06-14 1.2.1 Added _getPS_input_file. Avoid need for array lib. # 2003-04-24 2.0 getPS() rewritten to always use ps -o, # allowing any -o field to be requested. # getPS(): Gather information on currently running processes and store it # indexed by pid and field name. # # Input vars: # # Fields: Comma-separated list of fields to put in Procs[]. # The following is a list of ps fields that may be requested on SCO OpenServer. # Other fields are available under other operating systems. In the first # field of the description, O, L, and B indicate whether the field is available # under OpenServer, Linux, and BSD. An "l" indicates that the field may be # requested but the field will always return "-". # A request for a field that is not available will cause ps, and thus getPS(), # to fail. # A request for a field not listed here will produce incorrect results if the # value displayed by ps for the field includes spaces. # # OLB pid Process ID (always selected) # OLB ppid Parent process ID (always selected) # OLB pgid Process group ID # OLB uid Integer user ID # OLB user Effective user ID as name if available, else integer # OLB ruser Real user ID as name if available, else integer # OL group Effective group ID as name if available, else integer # OLB rgroup Real group ID as name if available, else integer # OLB nice Nice value # OLB pri Priority value # OLB pcpu % of CPU time recently used # OLB sess Session leader ID; under BSD, session pointer # OL size Size of data+stack, in KB # OLB vsz Size of data+stack+text, in KB # Ol addr Address of entry in proc table # Ol class Scheduler class # OLB time CPU time used, in seconds (with fraction) # OL stime Time when process started. Some formats in use are: hh:mm, # hh:mm:ss, [hh:]mm:ss.ss, month-day (e.g. Apr-11), # MonDay (e.g. Apr11). # OL etime Time since process started, as [[days-]hh:]mm:ss # OLB tty tty name ("/dev/" is stripped but leading "tty" is not) # OLB wchan Wait channel # OL S Process state # OL C Recent CPU usage indication # OL F Process flags # OL comm Process accounting name of process: the name of the executable # file, without path. # OL args Command name (argv[0]) + arguments (possibly truncated) # # These pseudo-fields may be requested: # _s_tty tty name with any leading "tty" removed. # _cmd First element of argument vector. # _cmdt Like cmd, but just the tail (leading path components removed), unless # the path ends with /, in which case it is identical to cmd. # _cmdta Like cmdt, but with arguments included. # _line Entire ps output line. # # These fields are returned as "-" in the case of a defunct process: # rgroup sz vsz addr class stime elapsed tty wchan # # These fields are returned as "" in the case of a defunct process: # comm args # # The list of fields requested is not case-sensitive. However, for historical # reasons, the field names used as indexes in Procs[] are always in upper case. # # If Debug is true, debugging info is output. # selectionArgs may be set to ps options that will report on selected processes # (e.g. -usomeone -ttty01). The default for selectionArgs is -e, which # causes information on all processes to be recorded under SCO UNIX and # Linux. On BSD systems, pass "-a" to get this effect. # # If writeLines is true, the raw ps output is written to the standard output. # This can be used to capture a dataset for use with _getPS_input_file. # # Output vars: # PIDs[]: the set of all PIDs seen. # Also, the element with index "ps" is set to the PID for the ps process. # Procs[pid,fieldname]: The value of each selected field for each process. # fieldname is always in upper case. # The header line read is also put in Procs with the index "Header". # The PIDs of the children of each process are put in a comma-separated list # in Children[pid]. If selectionArgs is used, this list may be incomplete. # # Return value: # On success, the number of processes found. # -1 if an error occurs reading from ps. # -2 if ps indicates an invocation error (for example, as a result of an # invalid field name being passed). In this case, the error returned # by ps is stored in the global _getPS_ps_err. # # Globals: # FS is set to " " # If the global _getPS_input_file is set, its contents are used instead of # ps output, for debugging purposes. It should contain output in the form # produced by the ps command issued by this function, *including* the # initial ps pid line. For sample output, do: sh -c "" # where is the ps command printed when Debug is set. # function getPS(PIDs, Procs, Fields, Children, Debug, selectionArgs, writeLines, e, i, fields_req, pfields_req, numFields, wanted, fieldReqArgs, cmd, header, fname2num, fieldNum, pid, ppid, pFieldMap, args_i, args, num_procs, elem) { split("", fname2num) # Make awk know this is an array split("", Children) # Make awk know this is an array FS = " " # Make sure FS has its default behavior split("_s_tty=tty,_cmd=args,_cmdt=args,_cmdta=args,_line=",e,",") for (i in e) { split(e[i],elem,"=") # Map pseudo-fields to the source of information required pFieldMap[elem[1]] = elem[2] } Fields = "pid,ppid," tolower(Fields) if (Debug) print "Field list: " Fields > "/dev/stderr" # Make pid and ppid always be included, and be fields 1 & 2 numFields = split(Fields,wanted,",") fieldNum = 0 for (i = 1; i <= numFields; i++) { field = wanted[i] if (field == "") continue if (field in pFieldMap) { pfields_req[field] field = pFieldMap[field] # may map to null field name if (field == "") continue } else fields_req[field] if (field in fname2num) continue if (Debug) printf "Asked for field: %s\n",field > "/dev/stderr" if (field == "comm") want_comm = 1 else if (field == "args") want_args = 1 else { fieldReqArgs = fieldReqArgs " -o" field fname2num[field] = ++fieldNum # map field name to field number } } # Put these two at the end because they may contain spaces if (want_comm) { fieldReqArgs = fieldReqArgs " -ocomm" fname2num["comm"] = ++fieldNum } if (want_args) { fieldReqArgs = fieldReqArgs " -oargs" fname2num["args"] = ++fieldNum } if (selectionArgs == "") selectionArgs = "-e" if (_getPS_input_file) cmd = "cat " _getPS_input_file else # The -o options are XPG4 extensions; on some OSes this requires setting # UNIX95=1 in the environment cmd = "echo $$; 2>&1 UNIX95=1 exec /bin/ps " selectionArgs " " fieldReqArgs num_procs = 0 if (Debug) printf "ps command is:\n%s\n",cmd > "/dev/stderr" if ((cmd | getline PIDs["ps"]) != 1) return -1 if (writeLines) print PIDs["ps"] if ((cmd | getline header) != 1) return -1 if (writeLines) print header if (header ~ /^[^ ]*:/) { _getPS_ps_err = header return -2 } Procs["Header"] = header if (want_args) args_i = match(header,/\/) while ((cmd | getline) == 1) { if (writeLines) print $0 pid = $1 ppid = $2 PIDs[pid] if (Debug) printf "Process %d (%d fields): %s\n",pid,NF,$0 > "/dev/stderr" if (ppid in Children) Children[ppid] = Children[ppid] "," pid else Children[ppid] = pid for (field in fields_req) Procs[pid,toupper(field)] = $fname2num[field] if (want_args) args = substr($0,args_i) if ("args" in fields_req) Procs[pid,"ARGS"] = args if ("_cmd" in pfields_req) Procs[pid,"_CMD"] = $fname2num["args"] if ("_cmdt" in pfields_req) { Procs[pid,"_CMDT"] = $fname2num["args"] if (Procs[pid,"_CMDT"] !~ "/$") sub(".*/","",Procs[pid,"_CMDT"]) } if ("_cmdta" in pfields_req) { Procs[pid,"_CMDTA"] = args if (args !~ "^[^ ]+/ ") sub("^[^ ]*/","",Procs[pid,"_CMDTA"]) } if ("_cmdta" in pfields_req) Procs[pid,"_CMDTA"] = args if ("_line" in pfields_req) Procs[pid,"_LINE"] = $0 if ("_s_tty" in pfields_req) { Procs[pid,"_S_TTY"] = $fname2num["tty"] sub("^tty","",Procs[pid,"_S_TTY"]) } if (Debug) print "" > "/dev/stderr" num_procs++ } close(cmd) return num_procs } # makePSline: generate a line containing desired fields from ps data. # pid is the ID of the process to generate a line for. # If a pid of -1 is passed, a header line is returned. # Procs[] is the ps data, as generated by getPS(). # Fields[] is the set of fields desired in the output, with indexes starting # at 1. The values are field names as e.g. passed to getPS(). # Sep is the separator to put between fields. If null, a single space is used. # Return value: a line consisting of the fields requested, in the order of # their indices in Fields[]. # Example: # split("USER,PID,PPID,C,STIME,TTY,TIME,_CMD",FieldNames,",") # makePSline(pid,psOut,FieldNames) function makePSline(pid,Procs,Fields,Sep, i,fieldName,line,width,value) { if (Sep == "") Sep = " " if (!("PID" in _makePSlineWidths)) { # Make TIME be right-adj; some ps drop leading 0 fields from it. _makePSlineWidths["USER"] = _makePSlineWidths["COMM"] = -8; _makePSlineWidths["PID"] = _makePSlineWidths["PPID"] = 5; _makePSlineWidths["STIME"] = _makePSlineWidths["TIME"] = 8; _makePSlineWidths["TTY"] = -4; } for (i = 1; i in Fields; i++) { fieldName = Fields[i] if (fieldName in _makePSlineWidths) width = _makePSlineWidths[fieldName] else width = "" if (pid == -1) value = fieldName else if (fieldName == "PID") value = pid else value = Procs[pid,fieldName] if (fieldName == "TTY") sub("^tty","",value) line = line Sep sprintf("%" width "s",value) } return substr(line,length(Sep)+1) } # Convert format string format into a substitution formatting template. # This requires the subformat library. # Input variables: # format is the format string. # %specs are mapped to PS fields as follows: # u: USER: User ID; name if available, else number. # r: RUSER: Real user ID; name if available, else number. # p: PID. # P: PPID: Parent process ID. # S: C: CPU scheduling. # s: STIME: Start time # l: TTY: tty name # L: _S_TTY: tty name with any leading "tty" removed. # t: TIME: CPU time used. # c: _CMD: First element of arg vector. # T: _CMDT: Like _CMD, but just the tail. # a: ARGS: Entire (truncated) arg vector (command + args). # A: _CMDTA: Like T, but with arguments included. # C: COMM: Name of the executable file, # Extra specifiers to allow along with the above may be given in extraFields. # Specifiers are given as sequences of 3/4tuples, with the elements of the # tuples separated by commas and the tuples separated from each other by # semicolons. The first element of each tuple is the specifier character, # the second is the default format, the third is the field name, and the 4th, # if given, is the name to use in a header. Example: # i,8s,IDLETIME;I,8s,IDLESINCE,IDLESINC # Output variables: # template[] is the template data for use with sf_doformat(). # If ret["header"] is set, its value is set to a header line. # fieldSet[] is made the set of names of the fields requested. # Return value: # On success, a comma-separated list of fields, for use by getPS(). # Extra fields from extraFields are not included. # On failure, a "-" followed by an error message. function makePSformat(template,format,ret,fieldSet,extraFields, i,errpos,subs,fmts,map,h,c,header,fields,name,elem,specs,n,notField,psFields) { if (extraFields != "") extraFields = ";" extraFields split(\ "u,-8s,USER;r,-8s,RUSER;p,5d,PID;P,5d,PPID;S,2d,C;s,8s,STIME;l,-7s,TTY;"\ "L,-4.4s,_S_TTY,TTY;t,8s,TIME;c,s,_CMD,CMD;T,s,_CMDT,CMD;a,s,ARGS;C,s,COMM;"\ "A,s,_CMDTA,CMD" extraFields,specs,";") for (i = 1; i in specs; i++) { if (Debug > 8) printf "spec: %s\n",specs[i] > "/dev/stderr" n = split(specs[i],elem,",") c = elem[1] subs[c] = elem[2] map[c] = elem[3] h[c] = elem[n] if (!notField) psFields[map[c]] if (c == "A") notField = 1 } if ((errpos = sf_mktemplate(template,format,subs)) > 0) return "-" sf_mkErrMsg(format,errpos) if ("header" in ret) # make header before changing map[] ret["header"] = sf_mkheader(template,h) for (i = 1; (i,"i") in template; i++) { name = map[template[i,"i"]] if (Debug > 8) printf "name: %s\n",name > "/dev/stderr" template[i,"i"] = name fieldSet[name] if (name in psFields) fields = fields "," name } return substr(fields,2) } ### End-lib ps