#!/usr/bin/perl -w # sha1_create.pl and sha1_compare.pl # sha2_create.pl and sha2_compare.pl # Script to create SHA1/SHA2 files or compare against them. Does not follow or examine # symbolic links. Zero-length files are ignored. If multiple files have the same SHA1 # digest (i.e., identical content), all will be added to the SHA1 file but only the first # will be reported as matches when a file is found in a different location with the same # SHA1 digest. # There are four categories of report from sha1_compare.pl: # 1. Matches where the filename and SHA1 digest match the SHA1 file. # These are not reported by default, unless -v (verbose) or -m (matches only) # options are used. # 2. Discrepancies where the filename matches a filename in the SHA1 file, but # the SHA1 digest is different. These are reported by default, unless -m # (matches only) option is used. # 3. Matches where the filename does not match but the SHA1 digest matches a # file in the SHA1 file. These are reported by default and if -m (matches # only) option is used. These are not reported if -n (nonmatches only) is used. # 4. Files that are not found in the SHA1 file. These are reported by default, # unless the -m (matches only) or -q (quiet) options are used. # No options: Displays 2-4. # -m: Displays 1 and 2. # -m -q: Displays 1 and 2. # -m -v: Displays 1 and 2. # -v: Displays 1-4. # -n: Displays 2 and 4. # -n -q: Displays 2. # -r (root) option is used to prepend a root directory name to all pathnames when creating # or comparing. Otherwise relative paths from the working directory are used. The # root_dir doesn't get displayed unless it's the path stored in the SHA1 file. # -q (quiet) option is used to suppress non-fatal errors. # -R (recurse) option is used to recurse through subdirectories. # -i (interactive) option means that SHA1 digests are provided to STDIN, with no # file names on the command line, one per line. These may be in any of the # following formats: # 1. SHA1 digest. No filename; no filename tests will occur. This format # is not permitted for sha1_create.pl. # 2. SHA1 digest followed by whitespace followed by filename. # 3. "sha1" command output format ("SHA1 () = "). # To make a SHA1 file out of a subdirectory containing installation binaries for a # particular version of an operating system, say, OpenBSD 3.5 i386 release binaries, # do, from the base directory: # perl sha1_create.pl -R -r / OpenBSD_3.5_i386 * # To test if a particular file matches the release binaries: # perl sha1_compare.pl -v OpenBSD_3.5_i386 /bin/date # To test if an entire directory's contents match the release binaries, only # displaying discrepancies: # perl sha1_compare.pl OpenBSD_3.5_i386 /bin # To test an entire hierarchy: # perl sha1_compare.pl -R OpenBSD_3.5_i386 /usr/local # To show all and only files in a directory that match SHA1 digests in the SHA1 # file: # perl sha1_compare.pl -m OpenBSD_3.5_i386 /etc # To Do: # * Allow creation/comparison against SHA1 databases (e.g., MySQL). # To do this most effectively, change all the hash references and "defined" # checks to use subroutines that either do those same checks or the # equivalent with a database--with a database there should also be a # description associated with each SHA1 digest. # * Allow importation of SHA1 files into a SHA1 database. The description # field should be populated by default from the name of the SHA1 file, # minus the ".sha1" suffix. # Adding those two features would allow trivial creation of a web # application like Sun's Solaris Fingerprint Database # (http://sunsolve.sun.com/pub-cgi/fileFingerprints.pl), though # it wouldn't be too hard as is, except there's no field to identify # OS version or source of the matching file. # I should also probably replace the option processing with # Getopt::Long and allow each of the match/nonmatch/partialmatch # possibilities to be individually turned on and off, and to allow # error levels to be verbose/normal/quiet. # Finally, I should perhaps print some keyword at the front of each # match/nonmatch/partial match line so that the output could easily be # fed to a simple grep. # Written 2004-08-27 by Jim Lippard. # 2005-11-07: Added SHA2. ### Required packages. use strict; use Digest::SHA1; use Digest::SHA2; use File::Basename; use Getopt::Std; ### Global constants. my $CREATE = 0; my $COMPARE = 1; my $SHA1_FILE_SUFFIX = '.sha1'; my $SHA2_FILE_SUFFIX = '.sha2'; my $SHA_FILE_SUFFIX = '.sha2'; my $sha_name = 'SHA2'; my $WHOAMI = $0; $WHOAMI = basename ($WHOAMI); ### Global variables. my ($create_or_compare, $create, $compare, %opts, $verbose, $quiet, $interactive, $matches_only, $nonmatches_only, $recurse, $root_dir, $sha1_file, %sha1_to_path, %path_to_sha1, $recursion_level); ### Main program. if ($WHOAMI eq 'sha1_create.pl') { $create_or_compare = $CREATE; $create = 1; $sha_name = 'SHA1'; $SHA_FILE_SUFFIX = $SHA1_FILE_SUFFIX; } elsif ($WHOAMI eq 'sha1_compare.pl') { $create_or_compare = $COMPARE; $compare = 1; $sha_name = 'SHA1'; $SHA_FILE_SUFFIX = $SHA1_FILE_SUFFIX; } elsif ($WHOAMI eq 'sha2_create.pl') { $create_or_compare = $CREATE; $create = 1; $sha_name = 'SHA2'; $SHA_FILE_SUFFIX = $SHA2_FILE_SUFFIX; } elsif ($WHOAMI eq 'sha2_compare.pl') { $create_or_compare = $COMPARE; $compare = 1; $sha_name = 'SHA2'; $SHA_FILE_SUFFIX = $SHA2_FILE_SUFFIX; } else { die "I'm being invoked with something other than the names I know myself by, namely \"$0\".\n"; } # Get options. if ($create) { getopts ('vqiRr:', \%opts) || exit; } elsif ($compare) { getopts ('vqimnRr:', \%opts) || exit; } $verbose = $opts{'v'}; $quiet = $opts{'q'}; $interactive = $opts{'i'}; $matches_only = $opts{'m'}; $nonmatches_only = $opts{'n'}; $recurse = $opts{'R'}; $root_dir = $opts{'r'}; if ($interactive && $recurse) { die "-i and -R options are incompatible.\n"; } if ($matches_only && $nonmatches_only) { die "-m and -n options are incompatible.\n"; } if ((!$interactive && $#ARGV < 1) || ($interactive && $#ARGV != 0)) { if ($create) { die "Usage: $WHOAMI [-vqiRr] ${sha_name}_file dirs-or-files (no dirs-or-files with -i)\n"; } elsif ($compare) { die "Usage: $WHOAMI [-vqimRr] ${sha_name}_file dirs-or-files (no dirs-or-files with -i)\n"; } } $sha1_file = shift (@ARGV); $sha1_file = &add_suffix ($sha1_file, $SHA_FILE_SUFFIX); if ($create && -e $sha1_file) { die "$sha_name file already exists. $sha1_file\n"; } elsif ($compare && !-e $sha1_file) { die "$sha_name file does not exist. $sha1_file\n"; } if ($create) { &create_sha1_file ($sha1_file, @ARGV); } elsif ($compare) { &compare_sha1_file ($sha1_file, @ARGV); } ### Subroutines. # Subroutine to create a SHA1 file. sub create_sha1_file { my ($sha1_file, @files) = @_; my ($user, $date, $file, $sha1); open (SHA1_FILE, ">$sha1_file") || die "Cannot open $sha_name file for writing. $! $sha1_file\n"; print "Creating $sha_name file $sha1_file.\n" if ($verbose); $user = getpwuid($<); $date = localtime (time()); print SHA1_FILE "# $sha_name file created with $WHOAMI by user $user on $date.\n"; if ($interactive) { while () { ($sha1, $file) = &parse_sha1_line ($create_or_compare, $_); if (defined ($sha1)) { &recursive_sha1 ($create_or_compare, $file, $sha1); } } } else { foreach $file (@files) { &recursive_sha1 ($create_or_compare, $file); } } close (SHA1_FILE); } # Subroutine to compare to a SHA1 file. sub compare_sha1_file { my ($sha1_file, @files) = @_; my ($sha1, $path, $file); # Read digests from SHA1 file. open (SHA1_FILE, $sha1_file) || die "Cannot open $sha_name file for reading. $! $sha1_file\n"; while () { if (/^\s*$|^\s*#/) { # Blank line or comment, ignore. } else { chop; if (!/\s/) { die "Invalid line in $sha_name file. $_\n"; } ($sha1, $path) = split (/\s+/, $_, 2); if (length ($sha1) != 40 || $sha1 !~ /^[a-f0-9]+$/) { die "Invalid SHA1 in $sha_name file. $sha1\n"; } if (defined ($sha1_to_path{$sha1})) { # We allow redundant SHA1 digests, but only the first one in the file # goes into the sha1_to_path hash. Both go into the path_to_sha1 hash. } if (defined ($path_to_sha1{$path})) { die "Redundant path. $path\n"; } $sha1_to_path{$sha1} = $path; $path_to_sha1{$path} = $sha1; } } close (SHA1_FILE); if ($interactive) { while () { ($sha1, $file) = &parse_sha1_line ($create_or_compare, $_); if (defined ($sha1)) { &recursive_sha1 ($create_or_compare, $file, $sha1); } } } else { foreach $file (@files) { &recursive_sha1 ($create_or_compare, $file); } } } # Subroutine to parse a line from STDIN for SHA1 digest and filename (optional # in compare mode). sub parse_sha1_line { my ($create_or_compare, $line) = @_; my ($sha1, $file); chop ($line); if ($line =~ /^$sha_name \((.*)\) = ([a-f0-9]+)$/) { $file = $1; $sha1 = $2; } elsif ($line =~ /^([a-f0-9]+)\s+(\S+)$/) { $file = $2; $sha1 = $1; } elsif ($create_or_compare == $COMPARE && $line =~ /^[a-f0-9]+$/) { $sha1 = $line; } else { print "Invalid format. $line\n" unless ($quiet); return; } if (length ($sha1) != 40) { print "Invalid $sha_name digest. $sha1\n" unless ($quiet); return; } return ($sha1, $file); } # Subroutine to do creation or comparison, of files or interactively. # The sha1 argument is only supplied for interactive use--we don't look # at the file system at all in that case. sub recursive_sha1 { my ($create_or_compare, $file, $sha1) = @_; my (@files, $subfile, $root_file, $ctx, $digest); $digest = $sha1 if ($interactive); $recursion_level++; # $root_file is used for comparison purposes; it has $root_dir prepended if present. # $file may be undefined if using -i option. if ($root_dir && defined ($file)) { if ($root_dir eq '/') { $root_file = $root_dir . $file; } else { $root_file = $root_dir . '/' . $file; } } else { $root_file = $file; } # If file doesn't exist, complain but keep going. if (!$interactive && !-e $file) { print "File does not exist. $file\n" unless ($quiet); return; } elsif (!$interactive && (-l $file || -z $file)) { # Ignore symbolic links or zero-length files when creating. Otherwise # we may end up with redundant SHA1 digests in the SHA1 file, or # complaining about files not in SHA1 file when comparing. } # If it's a directory, list its contents and recurse, if -R has been specified. elsif (!$interactive && (-d $file && ($recursion_level == 1 || $recurse))) { if (opendir (DIR, $file)) { @files = grep (!/^\.$|^\.\.$/, readdir (DIR)); foreach $subfile (@files) { &recursive_sha1 ($create_or_compare, "$file/$subfile"); } closedir (DIR); } else { print "Could not open directory. $! $file\n" unless ($quiet); } } # If it's a file, get its SHA1 and either add it to the output file or # compare it to hashes. If interactive, either add the supplied SHA1 # and filename to the output file or compare it to hashes. elsif ($interactive || -f $file) { if (!$interactive && open (FILE, $file)) { if ($sha_name eq 'SHA1') { $ctx = Digest::SHA1->new; $ctx->addfile(*FILE); $digest = $ctx->hexdigest; } else { $ctx = Digest::SHA2->new; $ctx->addfile(*FILE); $digest = $ctx->hexdigest; } close (FILE); } elsif (!$interactive) { print "Cannot open file. $! $file\n" unless ($quiet); return; } if ($create_or_compare == $CREATE) { print SHA1_FILE "$digest $root_file\n"; } elsif ($create_or_compare == $COMPARE) { if (defined ($file) && defined ($path_to_sha1{$root_file})) { if ($path_to_sha1{$root_file} eq $digest) { print "$file $sha_name digest matches $sha_name file.\n" if (!$nonmatches_only && ($verbose || $matches_only)); } else { print "$file $sha_name digest does not match $sha_name file.\n" unless ($matches_only); } } elsif (defined ($sha1_to_path{$digest})) { print "$file $sha_name digest matches $sha_name file digest for $sha1_to_path{$digest}.\n" unless ($nonmatches_only); } elsif (defined ($file)) { print "$file is not in $sha_name file.\n" unless ($matches_only || $quiet); } } } $recursion_level--; } # Subroutine to add a suffix to a filename. sub add_suffix { my ($file, $suffix) = @_; if (substr ($file, length ($file) - length ($suffix), length ($suffix)) ne $suffix) { return ("$file$suffix"); } else { return ($file); } }