md5check directories

July 12th, 2008 by webstersprodigy

This is a python script that recursively md5sums all the files in your directory and compares it with another directory.  It is similar, and probably less good than

find /dirone -type f -print0 | md5sum

but this was coded to check if the directory structure copied cleanly to a *windows* box.  It seems to work ok.  TODO: only read line by line if file is over a certain size, else read line by line like it does now.

#!/usr/bin/env python

import os, sys, getopt
from Crypto.Hash import MD5
from Crypto.Hash import SHA

def usage():
  print """
  DESCRIPTION
    compares topdir1 to topdir2 using a hash algorithm

  USAGE
    hashsum.py -h
      prints this message
    hashsum.py topdir1 topdir2 [sha1|md5]

  """

def sumcont(hasharg, dirname, fnames):
  for file in fnames:
    try:
      myfile = open(os.path.join(dirname, file))
      for i in myfile.readlines():
        hasharg.update(i)
      myfile.close()
    except:
      pass
  print "*",

if len(sys.argv) < 3:
  usage()
  sys.exit(-1)

#md5 is default
md5_1 = MD5.new()
md5_2 = MD5.new()

#trivial to add other hashing algorithms here
if len(sys.argv) >3:
  if sys.argv[3].lower() == 'sha1':
    print "HASH ALGORITHM: sha1"
    md5_1 = SHA.new()
    md5_2 = SHA.new()
  else:
    print "HASH ALGORITHM: md5"
else:
  print "HASH ALGORITHM: md5"

os.path.walk(sys.argv[1], sumcont, md5_1)
os.path.walk(sys.argv[2], sumcont, md5_2)
print '\n'
print 'First  dir (',sys.argv[1],') hash : \n', md5_1.hexdigest()
print 'Second dir (',sys.argv[2],') hash : \n', md5_1.hexdigest()

Tags:

Leave a Reply


No computers were harmed in the 0.254 seconds it took to produce this page.