Never been to CodeSnippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)

1 total

Batch download code snippets

Batch download snippets from http://codesnippets.joyent.com and convert them to text files using man textutil (available on Mac OS X 10.4 or later).

Note: Old snippet versions will be automatically replaced by the downloaded snippets without a backup!

Author: jv
License: The MIT License, Copyright (c) 2008 jv

Usage:
# usage: bds [-p num] [-t tag] [-u user] tag
bds vim
bds -p 1280
bds -u jvs
bds -t plistbuddy
bds -t tar
bds -t ipfw -u jvs



#!/opt/local/bin/bash

# "batch download snippets" from http://codesnippets.joyent.com and
# convert them to text files using man textutil (available on Mac OS X 10.4 or later).
#
# Note: Old snippet versions will be automatically replaced by the downloaded snippets without a backup!
#      An alternative to man textutil is html2text, http://www.mbayer.de/html2text/ (which is available via MacPorts).
#
# Author: jv
# License: The MIT License, http://www.opensource.org/licenses/mit-license.php
# Copyright (c) 2008 jv
#
# cat /usr/local/bin/bds
#
# usage: bds [-p num] [-t tag] [-u user] tag


declare BaseURL='http://codesnippets.joyent.com'
declare download_dir="${HOME}/Desktop/Snippets"

# make sure there is no trailing slash
BaseURL="${BaseURL%/}"
download_dir="${download_dir%/}"

declare BasePostURL="${BaseURL}/posts/show"
declare BaseTagURL="${BaseURL}/tag"
declare BaseUserURL="${BaseURL}/user"

# make sure there is no trailing slash
BasePostURL="${BasePostURL%/}"
BaseTagURL="${BaseTagURL%/}"
BaseUserURL="${BaseUserURL%/}"


# man textutil
declare InputEncoding='utf-8'
declare OutputEncoding='utf-8'

export IFS=$' \t\n'


# function to download a single post specified by a post number: bds -p num
# cf. snippet, http://codesnippets.joyent.com/posts/show/1282

function snippet() {

   declare NL OPWD file outputfile postnum title url

   if [[ "${1//[[:digit:]]/}" != "" ]]; then echo "Argument error. No positive integer: ${1}"; return 1; fi

   postnum="${1}"
   url="${BasePostURL}/${postnum}"
   download_dir="${download_dir}/single-downloads"
   /bin/mkdir -p "${download_dir}"
   OPWD="${PWD}"
   cd "${download_dir}"
   /usr/bin/curl -L -O -s --max-time 25 "${url}" || exit 1    # download snippet web page
   file="${download_dir}/${url##*/}"
   trap '/bin/rm -f "${file}"; exit 0' 0 1 2 13 15

   # get title of downloaded web page
   #title="$(/usr/bin/sed -E -n -e '/<[tT][iI][tT][lL][eE]>/{s/^.*<[tT][iI][tT][lL][eE]>(.*)<\/[tT][iI][tT][lL][eE]>.*$/\1/p;q;}' "${file}" | \
   #         /usr/bin/sed -E -e 's/\[[^][:space:]]*\]//g')"    # delete [xxx] tag elements of title

   title="$(/usr/bin/egrep -m 1 -io '<title>.*</title>' "${file}" | /usr/bin/sed -E -e 's/^<title>[[:space:]]*|[[:space:]]*<\/title>$//g' \
             -e 's/\[[^][:space:]]*\]//g')"    # delete [xxx] tag elements of title


   title="${title//CodeSnippets:/}"
   title="${title//\//:}"
   title="${title// /_}"
   title="${title//[[:cntrl:]]/}"
   title="${title%"${title##*[!_]}"}"   # remove trailing underscores

   if [[ $title == '_CodeDrive_Snippets_courtesy_of_Peter_Coopers_handy_little_app' ]] || [[ -z "$title" ]]; then
      printf "\e[0K\e[31m%s\e[0m:  %s\n" "couldn't access" "${url}"
      /bin/rm "${file}"
      return 1
   fi

   outputfile="${download_dir}/${postnum}_${title}.txt"
   #outputfile="${download_dir}/${title}.txt"  # without post number prefix
   #outputfile="${outputfile//__/_}"  # uniq underscores

   printf "\n\e[0K\e[1;30m%s\e[0m:  %s\n\n" "saved as" "${outputfile}"

   /usr/bin/textutil -output "${outputfile}" -convert txt -inputencoding "${InputEncoding}" -encoding "${OutputEncoding}" "${file}"
   /bin/rm "${file}"

   # escape backslashes
   # man bash 2>/dev/null | less -p 'Each command in a pipeline'
   #outputfile="$(printf "%q" "${outputfile}")"  # cf. help printf
   outputfile="${outputfile//\\/\\\\}"

   NL=$'\\\n'

cat <<EOF | /bin/ed -s "${outputfile}"
H
,g/Snippets is a public source code repository/1,/Snippets is a public source code repository/d
,g/You need to create an account or log in to post comments to this site//You need to create an account or log in to post comments to this site/,\$d
,g|(See related posts)$|s|.See related posts.|${NL}${NL}|
,g|^to.* by.* on .*[[:digit:]]$|s|^to\(.*\) by\(.*\) on \(.*[[:digit:]]\)$|${NL}${NL}Author:\2${NL}Date: \3${NL}URL: ${url}${NL}Tags:\1${NL}|
,g|^Comments on this post$|s|\(Comments on this post\)|${NL}\1:|
,g| posts on .* at |s|\(.* posts on .* at .*\)|${NL}\1:|
w
EOF

# additional ed commands
# delete line numbers
# ,g|^[[:space:]]*[[:digit:]]\{1,\}[[:space:]]\{1,3\}|s|^[[:space:]]*[[:digit:]]\{1,\}[[:space:]]\{1,3\}\(.*\)$|\1|
# delete range of lines
# 4,11d


   cd "${OPWD}"
   return 0

}



#----------------------------------------- end of function snippet



declare pflag tflag uflag
declare cnt count dir_name file no_posts_check NL OPWD outputfile postnum tagsite title url urls website 

if [[ $# -eq 0 ]]; then 
   printf "%s\n%s\n" 'No arguments given!' "Usage: ${0##*/} [-p num] [-t tag] [-u user] tag" 1>&2
   exit 1
fi


while getopts ":p:t:u:" option
do
  case $option in
    p) pflag="$OPTARG" ;;
    t) tflag="$OPTARG" ;;
    u) uflag="$OPTARG" ;;
    [?]) printf "%s\n%s\n" 'Argument error!' "Usage: ${0##*/} [-p num] [-t tag] [-u user] tag" 1>&2; exit 1;;
    *) ;;
  esac
done

shift $(($OPTIND - 1))


if [[ $# -eq 1 ]]; then

   dir_name="${1}"
   tagsite="${BaseTagURL}/${1}"

elif [[ $# -gt 1 ]]; then

   printf "%s\n%s\n" 'Too many arguments!' "Usage: ${0##*/} [-p num] [-t tag] [-u user] tag" 1>&2
   exit 1

elif [[ -n "${pflag}" ]]; then
   snippet "${pflag}"
   exit 0

elif [[ -n "${tflag}" ]] && [[ -n "${uflag}" ]]; then

   dir_name="${tflag}-${uflag}"
   tagsite="${BaseUserURL}/${uflag}/tag/${tflag}"

elif [[ -n "${tflag}" ]]; then

   dir_name="${tflag}"
   tagsite="${BaseTagURL}/${tflag}"

elif [[ -n "${uflag}" ]]; then

   dir_name="${uflag}"
   tagsite="${BaseUserURL}/${uflag}"

else

   printf "%s\n%s\n" 'Argument error!' "Usage: ${0##*/} [-p num] [-t tag] [-u user] tag" 1>&2
   exit 1

fi


tagsite="${tagsite%/}"

#echo $dir_name
#echo $tagsite

count=1
cnt=0
curl_max_time=20
website=''
no_posts_check=''
NL=$'\\\n'
download_dir="${download_dir}/${dir_name//\//:}"
download_dir="${download_dir%/}"
/bin/mkdir -p "${download_dir}"
OPWD="${PWD}"
cd "${download_dir}"

# print download directory
printf "\n\e[0K\e[1;30m%s\e[0m:  %s\n\n" "download directory" "${download_dir}"


while [[ -z "${no_posts_check}" ]]; do

   # download website of the form: 
   # http://somewebsite.com/tag/bash/1,
   # http://somewebsite.com/user/name/1 or 
   # http://somewebsite.com/user/name/tag/bash/1

   website="$(/usr/bin/curl -L -s --max-time $curl_max_time "${tagsite}/${count}" )"

   if [[ $? -ne 0 ]]; then 
      printf "\e[0K\e[31m%s\e[0m:  %s\n" "curl_max_time ${curl_max_time}" "${tagsite}/${count}"
      exit 1
   fi

   #if [[ -n "$(printf "%s" "${website}" | /usr/bin/egrep -o 'Application error \(Apache\)')" ]]; then 
      #no_posts_check='Application error (Apache)'
      #printf "\e[0K\e[31m%s\e[0m:  %s\n" "no further posts" "${no_posts_check}"
   #fi

   if [[ -n "$(printf "%s" "${website}" | /usr/bin/egrep -o '>No posts<')" ]]; then 
      no_posts_check='>No posts<'
      #printf "\e[0K\e[31m%s\e[0m:  %s\n" "no further posts" "${no_posts_check}"
   fi

: <<-'COMMENT'

   # works for Bash 3.0 or later
   if [[ "${website}" =~ '>No posts<' ]]; then 
      no_posts_check="${BASH_REMATCH[0]}"
      #printf "\e[0K\e[31m%s\e[0m:  %s\n" "no further posts" "${no_posts_check}"
   fi

COMMENT


   if [[ -z "${no_posts_check}" ]]; then

      # extract relevant post URLs
      #urls=( $(printf "%s\n" "${website}" | /usr/bin/sed -E -n -e "s|^.* href=\"(/posts/show/[[:digit:]]+)\".*$|${BaseURL}\1|p;g") )
      urls=( $(printf "%s\n" "${website}" | /usr/bin/egrep -o 'href="/posts/show/[[:digit:]]+"' | /usr/bin/sed -E -n -e "s|href=\"(/posts/show/[[:digit:]]+)\"|${BaseURL}\1|p;g") )

      for ((i=0; i < "${#urls[@]}"; i++)); do

         url="${urls[${i}]}"

         postnum="${url##*/}"
         file="${download_dir}/${postnum}"
         trap '/bin/rm -f "${file}"; exit 0' 0 1 2 13 15

         /usr/bin/curl -L -O -s --max-time $curl_max_time "${url}"

         if [[ $? -ne 0 ]]; then 
            printf "\e[0K\e[31m%s\e[0m:  %s\n" "curl_max_time ${curl_max_time}" "${url}"
            continue
         fi
 

         # get title of downloaded web page
         #title="$(/usr/bin/sed -E -n -e '/<[tT][iI][tT][lL][eE]>/{s/^.*<[tT][iI][tT][lL][eE]>(.*)<\/[tT][iI][tT][lL][eE]>.*$/\1/p;q;}' "${file}" | \
         #    /usr/bin/sed -E -e 's/\[[^][:space:]]*\]//g')"    # delete [xxx] tag elements of title

         title="$(/usr/bin/egrep -m 1 -io '<title>.*</title>' "${file}" | /usr/bin/sed -E -e 's/^<title>[[:space:]]*|[[:space:]]*<\/title>$//g' \
                -e 's/\[[^][:space:]]*\]//g')"    # delete [xxx] tag elements of title


         title="${title//CodeSnippets:/}"
         title="${title//\//:}"
         title="${title// /_}"
         title="${title//[[:cntrl:]]/}"
         title="${title%"${title##*[!_]}"}"   # remove trailing underscores

         #printf "%s\n" "${title}"

         if [[ $title == '_CodeDrive_Snippets_courtesy_of_Peter_Coopers_handy_little_app' ]] || [[ -z "$title" ]]; then
            printf "\e[0K\e[31m%s\e[0m:  %s\n" "couldn't access" "${url}"
            /bin/rm "${file}"
            continue
         fi

         outputfile="${download_dir}/${postnum}_${title}.txt"
         #outputfile="${download_dir}/${title}.txt"  # without post number prefix
         #outputfile="${outputfile//__/_}"  # uniq underscores

         let cnt++
         printf "\e[0K\e[1;32m%-6s\e[0m  %s\n" "${cnt}" "${outputfile##*/}"

         /usr/bin/textutil -output "${outputfile}" -convert txt -inputencoding "${InputEncoding}" -encoding "${OutputEncoding}" "${file}"

         /bin/rm "${file}"


         # escape backslashes
         # man bash 2>/dev/null | less -p 'Each command in a pipeline'
         #outputfile="$(printf "%q" "${outputfile}")"  # cf. help printf
         outputfile="${outputfile//\\/\\\\}"

# edit $outputfile in-place with man ed
# first delete lines at the beginning & end,
# then remove the string 'See related posts' and add some newlines with $NL,
# then convert the line 'to...by...on' to line 'Author:...', line 'Date:...', line 'URL:...' and line 'Tags:...'
# and finally the last two ed commands insert two further newlines with $NL

cat <<EOF | /bin/ed -s "${outputfile}"
H
,g/Snippets is a public source code repository/1,/Snippets is a public source code repository/d
,g/You need to create an account or log in to post comments to this site//You need to create an account or log in to post comments to this site/,\$d
,g|(See related posts)$|s|.See related posts.|${NL}${NL}|
,g|^to.* by.* on .*[[:digit:]]$|s|^to\(.*\) by\(.*\) on \(.*[[:digit:]]\)$|${NL}${NL}Author:\2${NL}Date: \3${NL}URL: ${url}${NL}Tags:\1${NL}|
,g|^Comments on this post$|s|\(Comments on this post\)|${NL}\1:|
,g| posts on .* at |s|\(.* posts on .* at .*\)|${NL}\1:|
w
EOF

# additional ed commands
# delete line numbers
# ,g|^[[:space:]]*[[:digit:]]\{1,\}[[:space:]]\{1,3\}|s|^[[:space:]]*[[:digit:]]\{1,\}[[:space:]]\{1,3\}\(.*\)$|\1|
# delete range of lines
# 4,11d


      done  # for

      let count++

   fi

done   # while


   cd "${OPWD}"


exit 0

1 total