#!/usr/bin/mawk -We
# *********************************************************************
#  Written by and copyright Carlo Strozzi <carlos@linux.it>.
#
#  tabletolist: convert from NoSQL 'table' to 'list' format.
#  Copyright (C) 1998-2001 Carlo Strozzi <carlos@linux.it>
# 
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  2001-01-03 Ported to NoSQL v3
#  2001-04-17 Added inline help
#  2001-08-17 Added stdio portability
#
#  $Id$
# *********************************************************************

BEGIN {
  NULL = ""; FS = OFS = "\t"; unescape = 1

  while (ARGV[++i] != NULL) {
     if (ARGV[i] == "-p" || ARGV[i] == "--prefix") pfx = ARGV[++i]
     else if (ARGV[i] == "-n" || ARGV[i] == "--no-unescape") unescape = 0
     else if (ARGV[i] == "-C" || ARGV[i] == "--chop") chop = 1
     else if (ARGV[i] == "-c" || ARGV[i] == "--chomp") chomp = 1
     else if (ARGV[i] == "-t" || ARGV[i] == "--trim") trim = 1
     else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
     else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
     else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
	system("grep -v '^#' @NOSQLPATH@/nosql/help/tabletolist.txt")
	rc = 1
	exit(rc)
     }
  }

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = "@STDOUT@"
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }

  ncol = table_header(C,D)
}

{
  if (NR == 3) {
     if (!chop) print NULL > o_file
  }  else print NULL > o_file

  for (i = 1; i <= ncol; i++) {

    # Unescape tabs and newlines first.

    if (unescape) $i = NoSQL_Unescape($i)

    # Add a trailing blank to newlines.
    gsub(/\n/, "\n ", $i)

    if (trim) {
       sub(/^ +/, NULL, C[i]); sub(/ +$/, NULL, C[i])
       sub(/^ +/, NULL, $i); sub(/ +$/, NULL, $i)
    }
    printf("%s%s\t%s\n", pfx, C[i], $i) > o_file
  }
}

END {
  if (rc) exit(rc)
  # Handle empty input table.
  if (NR == 2) {
    if (!chop) print NULL > o_file
    for (i = 1; i <= ncol; i++) {
       printf("%s%s\n", pfx, C[i]) > o_file
    }
  }
  if (!chomp) print NULL > o_file
}

#
# Function section.
#

# *********************************************************************
# table_header()
#
# Reads the input table header and builds two arrys out of it, with
# column names and dash fields respectively.
# *********************************************************************
function table_header(C,D,     header,dashes,n) {

  getline header
  getline dashes
  if (dashes !~ /(-+\t)*(-+)/) {
     do {
        header = dashes    
     }  while (getline dashes > 0 && dashes !~ /(-+\t)*(-+)/)
  }

  split(header, C, /\t/)
  n = split(dashes, D, /\t/)

  return n
}

# *********************************************************************
# NoSQL_Unescape(string)
#
# Takes a string and translates any unescaped '\t' and '\n' strings into
# physical tabs and newlines respectively. Returns the converted string.
# *********************************************************************
function NoSQL_Unescape(s,      S,i,s_length,a,escaped) {
  s_length = split(s, a, "")
  s_length++                # Cope with s_length==1
  while ( ++i <= s_length ) {
    if ( a[i] == "\\" && !escaped ) { escaped = 1; continue }
    if ( a[i] == "n" && escaped ) { S = S "\n"; escaped = 0; continue }
    if ( a[i] == "t" && escaped ) { S = S "\t"; escaped = 0; continue }
    if ( escaped ) { S = S "\\" a[i]; escaped = 0; continue }
    S = S a[i]
  }
  return S
}

#
# End of program.
#
