#!/v/guest/sw/bin/perl # ########################################################################### # # # Tool for sorting the records of two input files into two output files # # each (!) depending on wether the record has a unique key with respect # # to the two input files or a key shared by both of the two input files. # # Each line in the input files is considered to be a record, which is # # supposed to be divided into fields by some field separator character # # or string. Keys may consist of several (not necessarily adjacent!) # # fields. # # # ########################################################################### # # # Version 1.0 06.04.95 # # Version 1.0.1 08.04.95 # # Version 1.1 09.04.95 # # Version 1.1.1 06.11.95 # # # ########################################################################### # # # Copyright (c) 1995, 1996, 1997, 1998 by Steffen Beyer. # # All rights reserved. # # # # This program is free software; you can redistribute it # # and/or modify it under the same terms as Perl itself. # # # ########################################################################### # # Some important default settings... # $version = 'version 1.1.1'; # $self = $0; $self =~ s!^.*/!!; # $[ = 1; # = number of first index into arrays and strings # $FIELD_SEPARATOR = ':'; $FIELD_NUMBER_LIST = $[; # $field_separator = $FIELD_SEPARATOR; $field_number_list = $FIELD_NUMBER_LIST; # # Display usage if tool was called without parameters: # if (@ARGV == 0) { $help = 1; } # # Get and check command line options: # while (@ARGV) { $_ = shift; if (/^-F$/) { $field_separator = shift; } elsif (/^-L$/) { $field_number_list = shift; } elsif (/^-F.+$/) { $field_separator = substr($_,$[+2); } elsif (/^-L.+$/) { $field_number_list = substr($_,$[+2); } elsif (/^-\?$/) { $help = 1; } elsif (/^-h$/) { $help = 1; } elsif (/^-.*/) { $error = 1; unless (defined $option) { $option = $_; } } else { push(@filename, $_); } } # # Was help requested? # if ($help) { print <<"@@"; '$self' $version Usage: $self [ ]* where is one of the following: -F specifies the field separator (default is '$FIELD_SEPARATOR') (the field separator may be a character, a string or a regular expression!) -L specifies the numbers of the fields forming the key where ::= +(,+)* (default is '$FIELD_NUMBER_LIST', meaning the first field) (white space between option letter and value is optional) -h produces this help screen -? produces this help screen @@ exit; } # # Unknown option encountered? # if ($error) { die "Error: Unknown option '$option' encountered!\nEnter '$self -h' for help.\n"; } # # Check the syntax of the field number list: # if ($field_number_list !~ /^\d+(,\d+)*$/) { die "Syntax error in field number list!\nEnter '$self -h' for help.\n"; } # # Are there two filenames specified? # if (@filename != 2) { die "Error: You must specify two filenames!\nEnter '$self -h' for help.\n"; } # # Extract the two filenames: # $file_a = $filename[$[]; $file_b = $filename[$[+1]; # # Do the specified files exist? # unless (($file_a ne "") && (-f $file_a)) { die "Error: Can't find file '$file_a'!\n"; } unless (($file_b ne "") && (-f $file_b)) { die "Error: Can't find file '$file_b'!\n"; } # # Prepare index list: # @index_list = split(/,/, $field_number_list); # # Scan first file, Pass 1: # open(FILE_A, "<$file_a") || die "Can't open '$file_a': $!\n"; # while () { chop if /\n$/; undef $key; undef @field; @field = split(/$field_separator/o); foreach $index (@index_list) { if (defined $key) { $key .= "\n" . $field[$index]; } else { $key = $field[$index]; } } if ($intersection{$key} == 1) { if (@index_list == 1) { warn "Warning: Key '$key' in file '$file_a' is not unique!\n"; } else { warn "Warning: Key\n"; foreach $index (@index_list) { warn "'$field[$index]'\n"; } warn "in file '$file_a' is not unique!\n"; } } else { $intersection{$key} = 1; } } # close(FILE_A); # # Scan second file, Pass 1: # $empty_intersection = 1; # open(FILE_B, "<$file_b") || die "Can't open '$file_b': $!\n"; # while () { chop if /\n$/; undef $key; undef @field; @field = split(/$field_separator/o); foreach $index (@index_list) { if (defined $key) { $key .= "\n" . $field[$index]; } else { $key = $field[$index]; } } $code = $intersection{$key}; if ($code > 1) { if (@index_list == 1) { warn "Warning: Key '$key' in file '$file_b' is not unique!\n"; } else { warn "Warning: Key\n"; foreach $index (@index_list) { warn "'$field[$index]'\n"; } warn "in file '$file_b' is not unique!\n"; } } if ($code == 1) { $intersection{$key} = 3; $empty_intersection = 0; } else { if ($code != 3) { $intersection{$key} = 2; } } } # close(FILE_B); # # No common keys in the two files? # if ($empty_intersection) { printf("\nField separator = '%s'\n", $field_separator); printf("Key field number list = '%s'\n\n", $field_number_list); printf("The intersection of the two sets of keys of\n"); printf("file '%s'\n", $file_a); printf("and\n"); printf("file '%s'\n", $file_b); printf("is empty.\n\n"); exit; } # # Prepare output file names: # $file_a_0 = $file_a . '.0'; $file_a_1 = $file_a . '.1'; # $file_b_0 = $file_b . '.0'; $file_b_1 = $file_b . '.1'; # # Scan first file, Pass 2: # open(FILE_A, "<$file_a") || die "Can't open '$file_a': $!\n"; open(FILE_A_0, ">$file_a_0") || die "Can't write '$file_a_0': $!\n"; open(FILE_A_1, ">$file_a_1") || die "Can't write '$file_a_1': $!\n"; # while () { chop if /\n$/; undef $key; undef @field; @field = split(/$field_separator/o); foreach $index (@index_list) { if (defined $key) { $key .= "\n" . $field[$index]; } else { $key = $field[$index]; } } if ($intersection{$key} == 3) { print FILE_A_0 $_, "\n"; } else { print FILE_A_1 $_, "\n"; } } # close(FILE_A); close(FILE_A_0); close(FILE_A_1); # # Scan second file, Pass 2: # open(FILE_B, "<$file_b") || die "Can't open '$file_b': $!\n"; open(FILE_B_0, ">$file_b_0") || die "Can't write '$file_b_0': $!\n"; open(FILE_B_1, ">$file_b_1") || die "Can't write '$file_b_1': $!\n"; # while () { chop if /\n$/; undef $key; undef @field; @field = split(/$field_separator/o); foreach $index (@index_list) { if (defined $key) { $key .= "\n" . $field[$index]; } else { $key = $field[$index]; } } if ($intersection{$key} == 3) { print FILE_B_0 $_, "\n"; } else { print FILE_B_1 $_, "\n"; } } # close(FILE_B); close(FILE_B_0); close(FILE_B_1); # # Display results: # printf("\nField separator = '%s'\n", $field_separator); printf("Key field number list = '%s'\n\n", $field_number_list); printf("Wrote records of file '%s'\n", $file_a); printf("with shared keys into '%s'\n", $file_a_0); printf("with unique keys into '%s'\n\n", $file_a_1); printf("Wrote records of file '%s'\n", $file_b); printf("with shared keys into '%s'\n", $file_b_0); printf("with unique keys into '%s'\n\n", $file_b_1); # # Done. #