I had to get some statistics about file sizes today, but couldn’t really find a tool for the job, so naturally, I wrote one.
import os, sys, re from os.path import join, getsize, exists def median(numbers): s = sorted(numbers) l = len(numbers) if l % 2 == 0: a, b = s[l / 2 - 1 : l / 2 + 1] if a != b: return a + b / 2.0 else: return a else: return s[l / 2] sizes = [] req_re = None target = '.' if len(sys.argv) > 1: target = sys.argv[1] if len(sys.argv) == 3: req_re = re.compile(sys.argv[2]) for root, dirs, files in os.walk(target): for name in files: absp = join(root, name) if exists(absp): if not req_re or req_re.search(absp): sizes.append(getsize(absp)) num = len(sizes) total = sum(sizes) print "Num files: %d" % num print "Average : %0.2f KB" % ((total / num) / 1024.0) print "Median : %0.2f KB" % (median(sizes) / 1024.0) print "Min : %0.2f KB" % (min(sizes) / 1024.0) print "Max : %0.2f KB" % (max(sizes) / 1024.0)
Usage should be self-explanatory.