#!/opt/bin/perl # ########################################################################### # # # Tool for sorting the records of two input files into two output files # # each (!) depending on wether the record has a unique key with respect # # to the two input files or a key shared by both of the two input files. # # Each line in the input files is considered to be a record, which is # # supposed to be divided into fields by ':'. # # # ########################################################################### # # # Version 1.0 - Written 06.11.95 by Steffen Beyer # # # ########################################################################### # # # Copyright (C) 1995 by software design & management GmbH & Co. KG # # # ########################################################################### # # Some important default settings... # $version = 'version 1.0'; # $self = $0; $self = s!^.*/!!; # # Display usage if tool was called without parameters: # if (@ARGV == 0) { $help = 1; } # # Get and check command line options: # while (@ARGV) { $_ = shift; if (/^-\?$/) { $help = 1; } elsif (/^-h$/) { $help = 1; } elsif (/^-.*/) { $error = 1; unless (defined $option) { $option = $_; } } else { push(@filename, $_); } } # # Was help requested? # if ($help) { print <<"@@"; '$self' $version Usage: $self [ ]* where is one of the following: -h produces this help screen -? produces this help screen @@ exit; } # # Unknown option encountered? # if ($error) { die "Error: Unknown option '$option' encountered!\nEnter '$self -h' for help.\n"; } # # Are there two filenames specified? # if (@filename != 2) { die "Error: You must specify two filenames!\nEnter '$self -h' for help.\n"; } # # Extract the two filenames: # $file_a = $filename[$[]; $file_b = $filename[$[+1]; # # Do the specified files exist? # unless (($file_a ne "") && (-f $file_a)) { die "Error: Can't find file '$file_a'!\n"; } unless (($file_b ne "") && (-f $file_b)) { die "Error: Can't find file '$file_b'!\n"; } # # Scan first file, Pass 1: # open(FILE_A, "<$file_a") || die "Can't open '$file_a': $!\n"; # while () { chop if /\n$/; ($user, $name) = (split(/:/,$_,6))[0,4]; $name =~ s/[\t\r\n\f]/ /g; $name =~ s/ / /g; $name =~ s/^\s*//; $name =~ s/\s*$//; $name =~ s/\s*\(.*\)\s*$//; $name =~ s/^\s*Dr\.\s*//; $name =~ tr/A-Z/a-z/; $name =~ m/\s*(\S+)\s*$/; $lastname = $1; $firstname = $`; $firstname =~ s/^\s*//; $firstname =~ s/\s*$//; if ($lastname eq "") { $lastname = $firstname; $firstname = ""; } $users{$user} = 1; $names{$name} = 1; } # close(FILE_A); # # Scan second file, Pass 1: # $empty_intersection = 1; # open(FILE_B, "<$file_b") || die "Can't open '$file_b': $!\n"; # while () { chop if /\n$/; ($user, $name) = (split(/:/,$_,6))[0,4]; $name =~ s/[\t\r\n\f]/ /g; $name =~ s/ / /g; $name =~ s/^\s*//; $name =~ s/\s*$//; $name =~ s/\s*\(.*\)\s*$//; $name =~ s/^\s*Dr\.\s*//; $name =~ tr/A-Z/a-z/; $name =~ m/\s*(\S+)\s*$/; $lastname = $1; $firstname = $`; $firstname =~ s/^\s*//; $firstname =~ s/\s*$//; if ($lastname eq "") { $lastname = $firstname; $firstname = ""; } if ($users{$user} == 1) { $users{$user} = 3; $empty_intersection = 0; } else { $users{$user} = 2; } if ($names{$name} == 1) { $names{$name} = 3; $empty_intersection = 0; } else { $names{$name} = 2; } } # close(FILE_B); # # No common keys in the two files? # if ($empty_intersection) { printf("The intersection of the two sets of keys of\n"); printf("file '%s'\n", $file_a); printf("and\n"); printf("file '%s'\n", $file_b); printf("is empty.\n\n"); exit; } # # Prepare output file names: # $file_a_0 = $file_a . '.0'; $file_a_1 = $file_a . '.1'; # $file_b_0 = $file_b . '.0'; $file_b_1 = $file_b . '.1'; # # Scan first file, Pass 2: # open(FILE_A, "<$file_a") || die "Can't open '$file_a': $!\n"; open(FILE_A_0, ">$file_a_0") || die "Can't write '$file_a_0': $!\n"; open(FILE_A_1, ">$file_a_1") || die "Can't write '$file_a_1': $!\n"; # while () { chop if /\n$/; ($user, $name) = (split(/:/,$_,6))[0,4]; $name =~ s/[\t\r\n\f]/ /g; $name =~ s/ / /g; $name =~ s/^\s*//; $name =~ s/\s*$//; $name =~ s/\s*\(.*\)\s*$//; $name =~ s/^\s*Dr\.\s*//; $name =~ tr/A-Z/a-z/; $name =~ m/\s*(\S+)\s*$/; $lastname = $1; $firstname = $`; $firstname =~ s/^\s*//; $firstname =~ s/\s*$//; if ($lastname eq "") { $lastname = $firstname; $firstname = ""; } if (($users{$user} == 3) || ($names{$name} == 3)) { print FILE_A_0 $_, "\n"; } else { print FILE_A_1 $_, "\n"; } } # close(FILE_A); close(FILE_A_0); close(FILE_A_1); # # Scan second file, Pass 2: # open(FILE_B, "<$file_b") || die "Can't open '$file_b': $!\n"; open(FILE_B_0, ">$file_b_0") || die "Can't write '$file_b_0': $!\n"; open(FILE_B_1, ">$file_b_1") || die "Can't write '$file_b_1': $!\n"; # while () { chop if /\n$/; ($user, $name) = (split(/:/,$_,6))[0,4]; $name =~ s/[\t\r\n\f]/ /g; $name =~ s/ / /g; $name =~ s/^\s*//; $name =~ s/\s*$//; $name =~ s/\s*\(.*\)\s*$//; $name =~ s/^\s*Dr\.\s*//; $name =~ tr/A-Z/a-z/; $name =~ m/\s*(\S+)\s*$/; $lastname = $1; $firstname = $`; $firstname =~ s/^\s*//; $firstname =~ s/\s*$//; if ($lastname eq "") { $lastname = $firstname; $firstname = ""; } if (($users{$user} == 3) || ($names{$name} == 3)) { print FILE_B_0 $_, "\n"; } else { print FILE_B_1 $_, "\n"; } } # close(FILE_B); close(FILE_B_0); close(FILE_B_1); # # Display results: # printf("Wrote records of file '%s'\n", $file_a); printf("with shared keys into '%s'\n", $file_a_0); printf("with unique keys into '%s'\n\n", $file_a_1); printf("Wrote records of file '%s'\n", $file_b); printf("with shared keys into '%s'\n", $file_b_0); printf("with unique keys into '%s'\n\n", $file_b_1); # # Done. #