#!/usr/bin/perl ### About This Script ######################################################### # # Title: CSV Parser # Author: Nathan Shaw #______________________________________________________________________________ # ### Disclaimer ################################################################ # # This script is not supported by its author. It is provided AS IS without # warranty of any kind. The entire risk arising out of the use of this script # remains with you. In no event shall the author of this script be liable # for any damages arising out of the use of or inability to use the script. # # As with anything, you should test this script in a separate development # environment before using it anywhere close to your production network. #______________________________________________________________________________ # ### Description ############################################################### # # Take a file that *sometimes* uses a double-quote as a field boundary and # *sometimes does not* and output data with a different delimiter. # # You can also use it to print only select fields from a file and even to # rearrange the order of the fields. # # Please see Version History for more information. #______________________________________________________________________________ # ### Input/Output ############################################################## # # The script requires a file for input. #______________________________________________________________________________ # ### Known Issues ############################################################## # # None yet... #______________________________________________________________________________ # ### Version History ########################################################### # # v1.0.0 - 11/17/2009 - NShaw # Initial release. # v1.0.1 - 12/14/2010 - NShaw # Added support for different input delimiters... So I guess this isn't # just a CSV parser now. :P The -d | --delimiter option has been # changed to -o | --outputdelim to make it more clear. # Added a debug option along with a few debug commands. # Changed to use Text::CSV_XS instead of Text::CSV (only because it's part # of ActivePerl's default install and works the same as Text::CSV for # what I'm doing). # v1.0.2 - 10/06/2011 # Will now print a message to STDERR if it can't parse a line in the file # instead of exiting. #______________________________________________________________________________ # # Future ideas # # Add an --output option. Make the debug messages go to the screen, but the # delimited text file's output would go to the file specified. # use strict; use Text::CSV_XS; use Getopt::Long; our $ERROR_INVALID_USAGE = 1; our $ERROR_MISSING_CSV = 5; our $ERROR_BAD_CSV = 17; our $opt_inputfile; our $opt_output_delim; our $opt_input_delim; our $opt_fields; our $opt_quotes; our $opt_help; our $opt_debug; our $VERSION = "1.0.2"; GetOptions("c|csv=s" => \$opt_inputfile, "o|outputdelim=s" => \$opt_output_delim, "i|inputdelim=s" => \$opt_input_delim, "f|fields=s" => \$opt_fields, "q|quotes" => \$opt_quotes, "d|debug" => \$opt_debug, "h|help" => \$opt_help); usageHelp() if $opt_help; usageHelp($ERROR_INVALID_USAGE,"You must specify a file to parse!") if !$opt_inputfile; $opt_output_delim = "," if !$opt_output_delim; $opt_input_delim = "," if !$opt_input_delim; print "Output Delimiter: $opt_output_delim\n" if $opt_debug; print "Input Delimiter: $opt_input_delim\n" if $opt_debug; #$opt_fields = 0 if !$opt_fields; # Makes sure that the first field always prints my @printfields = split(",",$opt_fields); if ($opt_debug) { print "\@printfields\n"; for (@printfields){ print "$_\n"; } print "\n"; } # Create CSV object my $objCSV = Text::CSV_XS->new ({ sep_char => $opt_input_delim }); open INPUTFILE, $opt_inputfile or usageHelp($ERROR_MISSING_CSV,"Couldn't open input file: $opt_inputfile, for reading!",1); my @csvline = (); my $linenum = 1; foreach my $line (@csvline) { if (!$objCSV->parse($line)){ # usageHelp($ERROR_BAD_CSV,"Encountered invalid delimited data on line $linenum: \'$line\'",1); print STDERR "ERROR parsing line $linenum: $line"; } else { # Store the value of each of the fields from the file into the @csvfields array my @csvfields; @csvfields = $objCSV->fields(); if ($opt_debug) { print "\@csvfields\n"; for (@csvfields){ print "$_\n"; } print "\n"; } my $data; # data to be printed my $index; my $i = 0; #print "\$#csvfields = $#csvfields\n"; # If print fields were defined, only print them if (@printfields) { for (@printfields) { $index = $_; # Error if the field index is out of range if ($index >= 0 && $index <= $#csvfields) { $data = $data . "$csvfields[$index]" if !$opt_quotes; $data = $data . "\"$csvfields[$index]\"" if $opt_quotes; $data = $data . $opt_output_delim; } else { usageHelp($ERROR_INVALID_USAGE,"Field $index is out of range for line $linenum!",1); } $i++; } # No print fields were defined, so lets print them all } else { for (@csvfields) { $data = $data . "$_" if !$opt_quotes; $data = $data . "\"$_\"" if $opt_quotes; $data = $data . $opt_output_delim; } } # Strip any trailing delimeters - Probably a better way to do this but oh well.. my $delen = length($opt_output_delim); $data =~ s/[$opt_output_delim]{$delen}$//; print "$data\n"; } $linenum++; } sub usageHelp() { # 1st argument is the error code (required) # 2nd argument is error message (optional) # 3rd argument is bolean to hide the usage info (optional) my $code=$_[0]; print $_[1]."\n" if $_[1]; if (!$_[2]) { print "\n"; print "CSV Parser v$VERSION - Parse data from a delimited text file.\n"; print "\nUsage: $0 -c [file]\n\n"; print "Required:\n"; print " -c | --csv [File]\t\tSpecify a file with the input data\n"; print "\n"; print "Options:\n"; print " -i | --inputdelim [char]\tInput file delimiter (Default: \",\")\n"; print " -o | --outputdelim [char]\tOutput using different delimiter (Default: \",\")\n"; print " -f | --fields [#,#,#,#]\tFields to print (base 0) - Order will be maintained. (Default: All)\n"; print " -q | --quotes\t\tInclude quotes around the fields (Default: Off)\n"; print " -h | --help\t\t\tHelp!\n"; print " -d | --deubg\t\t\tEnable debugging info\n"; print "\n"; } print "\n"; exit($code); }