#!/usr/bin/mawk -We
# *********************************************************************
#  Written by and copyright Carlo Strozzi <carlos@linux.it>.
#
#  column: extracts specific columns (i.e. a "projection") from a
#          NoSQL table.
#  Copyright (C) 1998-2001 Carlo Strozzi <carlos@linux.it>
# 
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  2001-01-03 Ported to NoSQL v3
#  2001-02-07 Added option '-n'
#  2001-02-25 Added option '-L'
#  2001-04-17 Added inline help
#  2001-08-17 Added stdio portability
#
#  $Id$
# *********************************************************************

BEGIN {
  NULL = ""; FS = OFS = "\t";

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-l" || ARGV[i] == "--last") pick_last = 1
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-n" || ARGV[i] == "--null") nullval = ARGV[++i]
    else if (ARGV[i] == "-L" || ARGV[i] == "--like") l_file = ARGV[++i]
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' @NOSQLPATH@/nosql/help/column.txt")
       rc = 1
       exit(rc)
    }
    else {
      j++
      command_cols[j] = ARGV[i]

      # Remove invalid characters from column name, just in case
      gsub(/[^A-Za-z0-9_]/, NULL, command_cols[j])
    }
  }

  if (l_file != NULL) {
     getline like_list < l_file
     close(l_file)
     j = split(like_list,command_cols)
  }

  # If no columns are specified then print all columns.
  if (j) command_cols[0] = j

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = "@STDOUT@"
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }
}

#
# Main loop
#

NR == 1 {
  # Load the column position array.
  while (++p <= NF) {

    # Unless '-l' was specified, make sure we pick the first occurrence
    # of duplicated column names (it may happen after a join).

    if (P[$p] == NULL) auto_col = auto_col " " $p

    if (pick_last) P[$p] = p
    else {
      if (P[$p] == NULL) P[$p] = p
    }
  }

  # Build the list of columns, allowing for the insertion of new ones.

  if (!j) {
    sub(/^ */, NULL, auto_col)
    j = split(auto_col, command_cols, " ")
    command_cols[0] = j
  }

  for (i = 1; i <= command_cols[0]; i++) {
    out_rec = out_rec OFS command_cols[i]

    # Is it a new column ?
    if (P[command_cols[i]] == NULL) P[command_cols[i]] = NF+1
  }

  if (!no_hdr) {
    # Remove leading extra OFS from out_rec, then print header and dashline.
    sub(/^\t/, "", out_rec); print out_rec > o_file
    gsub(/[^\t]/, "-", out_rec); print out_rec > o_file
  }

  next
}

# Dashline
NR == 2 { next }

# Table body.
{
  out_rec = $P[command_cols[1]]
  if (out_rec == NULL) out_rec = nullval
  for (i = 2; i <= command_cols[0]; i++) {
    if ($P[command_cols[i]] == NULL) out_rec = out_rec OFS nullval
    else out_rec = out_rec OFS $P[command_cols[i]]
  }

  print out_rec > o_file
}

#
# End of program.
#
