Never been to CodeSnippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)

Find largest files on Mac OS X

# find files greater than 50 MB, 100 MB, ...
mdfind 'kMDItemFSSize > 52428800'
mdfind "kMDItemFSSize > $[50*1024*1024]"
mdfind "kMDItemFSSize > $[100*1024*1024]"


# find the 10 largest files on your system (> 50 MB)
/usr/bin/sudo -H -i
/usr/bin/find -x / -type f -size +$[50*1024*1024]c -ls 2>/dev/null | /usr/bin/sort -rn -k 7,7 | /usr/bin/head | /usr/bin/nl

exit


mkfile 10m ${HOME}/Desktop/'te:st\file:'

# convert file sizes to megabyte with awk (> 1 MB)
/usr/bin/find -x ~/Desktop -type f -size +$[1*1024*1024]c -print0 2>/dev/null | xargs -0 stat -f "%z:  %N" | \
      sort -rn | awk -F: '{printf "%-20s", $1/(1024*1024.0); for (i=2;i<NF+1;i++) {printf "%s%s",$i,(i==NF) ? "\n" : ":"}}'


# find the 10 largest directories in the current directory
/usr/bin/find -x . -mindepth 1 -maxdepth 1 -type d -print0 | xargs -0 du -hs
/usr/bin/find -x . -mindepth 1 -maxdepth 1 -type d -print0 | xargs -0 du -ks | sort -rn | head

# awk: print the first field and from the second field to the end (just in case there are file paths with spaces)
/usr/bin/find -x . -mindepth 1 -maxdepth 1 -type d -print0 | xargs -0 du -ks | sort -rn | head | \
    awk -F' ' '{printf "%-20s", $1/1024.0; for (i=2;i<NF+1;i++) {printf "%s ",$i}; print ""}'


#--------------------------------------------------------------------


# sort file sizes & file paths containing newlines \n
# cf. Sorting arrays in Bash, http://codesnippets.joyent.com/posts/show/1592

mkfile 10m ${HOME}/Desktop/'te:st\file'.txt
mkfile 10m ${HOME}/Desktop/'te:st\file:'
mkfile 10m ${HOME}/Desktop/$'te:s\nt\\file:'


declare -a file_path_array

man ruby | less -p '777'

IFS=$'\777'
file_path_array=($(/usr/bin/find -x ~/Desktop -type f -size +$[1*1024*1024]c -print0 2>/dev/null | xargs -0 -n 500 printf "%s\777"))
IFS=$' \t\n'


echo "${#file_path_array[@]}"
echo "${file_path_array[@]}"
printf "%s\n" "${file_path_array[@]}"  | ruby -n -e 'p $_.to_s'


declare -a file_path_array2
for ((i=0; i < "${#file_path_array[@]}"; i++)); do 
   mbyte=$(/usr/bin/stat -f "%z" "${file_path_array[$i]}" | awk '{print $1/(1024*1024.0);}')
   file_path_array2[${i}]="${mbyte}: ${file_path_array[$i]}"
done

echo "${#file_path_array2[@]}"
echo "${file_path_array2[@]}"



IFS=$'\n'

declare -a file_path_array2_sorted
file_path_array2_sorted=( $(printf "%s\000\n" "${file_path_array2[@]}" | sed -e :a -e '$!N; s/\n/NEWLINE/g; ta' | tr '\000' '\n' | \
      sed -e 's/^NEWLINE//' | sort -rn -t . -k 1,1 -k 2,2) )

IFS=$' \t\n'

for ((i=0; i < "${#file_path_array2_sorted[@]}"; i++)); do printf "%s\n" "${file_path_array2_sorted[$i]//NEWLINE/$'\n'}"; done | nl
for ((i=0; i < "${#file_path_array2_sorted[@]}"; i++)); do printf "%s\n" "${file_path_array2_sorted[$i]//NEWLINE/\n}"; done | nl
for ((i=0; i < "${#file_path_array2_sorted[@]}"; i++)); do 
   printf "%s" "${file_path_array2_sorted[$i]//NEWLINE/\n}" | ruby -0777 -n -e 'p $_.to_s'
done | nl


#-----------------


# alternative

declare -i i=0
declare -a file_path_array


while read -d $'\000' filepath; do 
   mbyte=$(/usr/bin/stat -f "%z" "${filepath}" | awk '{print $1/(1024*1024.0);}') 
   file_path_array[${i}]="${mbyte}:   ${filepath}"
   let i++
done < <(/usr/bin/find -x ~/Desktop -type f -size +$[1*1024*1024]c -print0 2>/dev/null | sed -E '/\\/s/\\/\\\\/g')


IFS=$'\n'

declare -a file_path_array_sorted
file_path_array_sorted=( $(printf "%s\000\n" "${file_path_array[@]}" | sed -e :a -e '$!N; s/\n/NEWLINE/g; ta' | tr '\000' '\n' | \
      sed -e 's/^NEWLINE//' | sort -rn -t . -k 1,1 -k 2,2) )

IFS=$' \t\n'


for ((i=0; i < "${#file_path_array_sorted[@]}"; i++)); do printf "%s\n" "${file_path_array_sorted[$i]//NEWLINE/$'\n'}"; done | nl
for ((i=0; i < "${#file_path_array_sorted[@]}"; i++)); do printf "%s\n" "${file_path_array_sorted[$i]//NEWLINE/\n}"; done | nl
for ((i=0; i < "${#file_path_array_sorted[@]}"; i++)); do 
   printf "%s" "${file_path_array_sorted[$i]//NEWLINE/\n}" | ruby -0777 -n -e 'p $_.to_s' 
done | nl


#----------------


# same for directory paths containing possible newlines \n

declare -i i=0
declare -a dir_path_array

while read -d $'\000' dirpath; do 
   dir_size=$(/usr/bin/du -ks "${dirpath}" | /usr/bin/awk '/^[[:digit:]]+/{print $1/1024.0}') 
   dir_path_array[${i}]="${dir_size}     ${dirpath}"
   let i++
done < <(/usr/bin/find -x . -mindepth 1 -maxdepth 1 -type d -print0 2>/dev/null | sed -E '/\\/s/\\/\\\\/g')

echo "${#dir_path_array[@]}"
echo "${dir_path_array[@]}"
printf "%s\n" "${dir_path_array[@]}"  | ruby -n -e 'p $_.to_s'


IFS=$'\n'

declare -a dir_path_array_sorted
dir_path_array_sorted=( $(printf "%s\000\n" "${dir_path_array[@]}" | sed -e :a -e '$!N; s/\n/NEWLINE/g; ta' | tr '\000' '\n' | \
      sed -e 's/^NEWLINE//' | sort -rn -t . -k 1,1 -k 2,2) )

IFS=$' \t\n'


for ((i=0; i < "${#dir_path_array_sorted[@]}"; i++)); do printf "%s\n" "${dir_path_array_sorted[$i]//NEWLINE/$'\n'}"; done | nl
for ((i=0; i < "${#dir_path_array_sorted[@]}"; i++)); do printf "%s\n" "${dir_path_array_sorted[$i]//NEWLINE/\n}"; done | nl
for ((i=0; i < "${#dir_path_array_sorted[@]}"; i++)); do 
   printf "%s" "${dir_path_array_sorted[$i]//NEWLINE/\n}" | ruby -0777 -n -e 'p $_.to_s' 
done | nl


#-------


# get file sizes via ls command
ls -alS
ls -alSr

ls -ahlS
ls -ahlSr

ls -alSrR

Get remote file size, following redirects (PHP)

function get_remote_file_size($url, $readable = true){
   $parsed = parse_url($url);
   $host = $parsed["host"];
   $fp = @fsockopen($host, 80, $errno, $errstr, 20);
   if(!$fp) return false;
   else {
       @fputs($fp, "HEAD $url HTTP/1.1\r\n");
       @fputs($fp, "HOST: $host\r\n");
       @fputs($fp, "Connection: close\r\n\r\n");
       $headers = "";
       while(!@feof($fp))$headers .= @fgets ($fp, 128);
   }
   @fclose ($fp);
   $return = false;
   $arr_headers = explode("\n", $headers);
   foreach($arr_headers as $header) {
			// follow redirect
			$s = 'Location: ';
			if(substr(strtolower ($header), 0, strlen($s)) == strtolower($s)) {
				$url = trim(substr($header, strlen($s)));
				return get_remote_file_size($url, $readable);
			}
			
			// parse for content length
       $s = "Content-Length: ";
       if(substr(strtolower ($header), 0, strlen($s)) == strtolower($s)) {
           $return = trim(substr($header, strlen($s)));
           break;
       }
   }
   if($return && $readable) {
			$size = round($return / 1024, 2);
			$sz = "KB"; // Size In KB
			if ($size > 1024) {
				$size = round($size / 1024, 2);
				$sz = "MB"; // Size in MB
			}
			$return = "$size $sz";
   }
   return $return;
}