Thursday, September 21, 2006

A general purpose script used for processing table data.

http://www.sun.com/bigadmin/scripts/submittedScripts/match.sh.txt

#!/bin/ksh
# @(#)match 1.3 06/05/06
# seppeler@yahoo.com
#
# match() is a general purpose script used for processing table data.
#
# A line of input constitutes a table row. Each line is a list of
# values which constitutes a table column. The first line of input
# is a list of field names which identifies each column of data. The
# /etc/passwd file is an example of a table without the field header.
#
# Example usage:
#
# (echo "name:passwd:uid:gid:gcos:home:shell";cat /etc/passwd)
# | match IFS=: shell=/bin/false name uid
#
# The above example creates the input stream with a one line field descriptor
# followed by a table of data. This input stream is then filtered by
# the match script. The field separater used by the table is
# specified via 'IFS=:'. The table is then searched for all entries having
# the 'shell' field matching the value of '/bin/false'. Output is then
# confined to only printing the 'name' and 'uid' fields.
#
# NOTE1: match() can be used for filtering data, or it can be used as a means
# to cut/rearrange fields within a table. The order of command line arguments
# is not important, however the order of output field names is. Output
# will be in the order in which output field names appear. So to rearrange
# the /etc/passwd table such that 'uid' is printed first, followed by 'name'
# then 'gcos' information, one would do:
#
#(echo "name:passwd:uid:gid:gcos:home:shell";cat /etc/passwd)
# | match IFS=: uid name gcos
#
# NOTE2: patterns are allowed as part of an expression, however patterns
# follow ksh syntax. Ksh patterns are not the same as regular expressions.
# In 'grep', zero or more characters is written '.*'. In ksh, zero or more
# character is simply '*'.
#
# NOTE3: The trick to understanding this script is to understand
# how the following code works:
#
# (
# read vars
# while read ${vars}; do
# for var in ${vars}; do
# eval value=\$${var}
# print "${var}=${value} \c"
# done
# print
# done
# ) <<-.
# fruit color
# apple red
# grape purple
# bannana yellow
# .
#


#
# This boolean function returns true if the current
# ${line} contains a ${field}:${value} pair matching any
# one of ${expressions}. By default, if there are
# no ${expressions}, then assume a 'match' is found.
#
fields_match() {
expressions="$*"

if [[ -z "${expressions}" ]]; then
return 0
fi

for expression in ${expressions}; do
eval field=\"\$${expression%%=*}\"
value="${expression#*=}"

if [[ "${field}" == ${value} ]]; then
return 0
fi
done

return 1
}

#
# This output function prints the ${values} of all
# the specified ${fields}. By default, if there are
# no ${fields} specified, then print the current ${line}.
#
output() {
fields="$*"
values=''

if [[ -z "${fields}" ]]; then
print -- "${line}"
return
fi

for field in ${fields}; do
eval value=\"\$${field}\"
values="${values:+${values}${IFS}}${value}"
done
print -- "${values}"
}

#
# Main function
#
# Input is read from stdin. The first line of input is a list of field names
# (ie: ${header}). The field names are treated as variables within the
# script, so field names must be unique as well as conform to variable name
# specifications.
#
# Command line arguments are one of ${expression} or ${field}. An ${expression}
# is a ${field}=${value} pair. Due to the way matching is performed in
# the field_matches() function, ${value} can be any valid ksh pattern. A list
# of ${field} arguments specifies which ${fields} will be output() for a
# ${line} matching any one of ${expressions}.
#
# The special ${expression} of the form 'IFS=' will not be used in
# ${line} matching. It will be evaluated prior to processing any input.
# This ${expression} is used for setting the field separator (ie: IFS)
# character used in the input stream for delimiting fields.
#
# Output consists of a one line list of ${field} names followed by
# each matching ${line} of input. If specific ${field} names are specified
# as command line arguments, then only those ${field} ${values} will be
# output.
#
match() {(
if (( $# == 0 )); then
print "usage: match [ IFS= ] [ = [...] ] [ [...] ]"
return
fi

for arg in $*; do
if [[ "$arg" == IFS=* ]]; then
eval $arg
elif [[ "$arg" == *=* ]]; then
expressions="${expressions:+${expressions}${IFS}}${arg}"
else
fields="${fields:+${fields}${IFS}}${arg}"
fi
done

set -f

read header

print "${fields:-${header}}"

while read line; do

print -- "${line}" | read ${header}

if fields_match ${expressions}; then
output ${fields}
fi
done
)}

#
# Main
#
# This is the entry point to the script. It serves as a wrapper
# to the main function. This script may be executed directly via:
#
# chmod a+x match.ksh; cat input | match.ksh field=value ...
#
# or it may be used as a ksh function by sourcing it from a ksh shell:
#
# . ./match.ksh
# cat input | match field=value
#
if [[ "${0##*/}" == 'match'* ]]; then
match $*
fi

No comments: