#!/bin/bash

#-------------------------------------------------------------------------------
# "THE BEER-WARE LICENSE" (Revision 42):
# <atelszewski@gmail.com> wrote this file.  As long as you retain this notice,
# you can do whatever you want with this stuff.  If we meet some day, and you
# think this stuff is worth it, you can buy me a beer in return.
# See CONDITIONS for usage and redistributions conditions.
#-------------------------------------------------------------------------------

################################################################################
function echoerr() {
################################################################################
  echo "$@" 1>&2
}

################################################################################
function usage() {
################################################################################
  cat <<EOF
Usage: $0 [OPTIONS]

Options are:
  --text-diff-remote  Display textual diff between remote and local ChangeLog.
                      This is the default action.
  --text-diff-local   Display textual diff for local ChangeLog, that is,
                      the result of the most recent --text-diff-remote action.
  --rss-remote        Display remote ChangeLog in RSS format.
  --arch=ARCH         Slackware architecture (x86, x86_64, arm). Default: ${ARCH:-detect}.
  --release=RELEASE   Slackware release (current, 14.1, etc.). Default: $RELEASE.
  --timeout=TIMEOUT   Timeout (in seconds) for fetching the remote ChangeLog.
                      Default: $TIMEOUT seconds.
  --cache-dir=DIR     Specify an alternative directory for storing ChangeLog's data.
                      Default: \$HOME/.cache/slackware.ChangeLog.
  --mirror-file=FILE  Specify an alternative mirror list for remote ChangeLog.
                      Default: \$HOME/.config/slackchlog.mirror.txt.
                      In the case where the default file does not exist and
                      alternative file is not specified, upstream URLs are used.
  --verbose           Be verbose (quiet by default).
  --help              Display this help message and exit.
  --version           Display version and exit.

Note:
  It is perfectly acceptable to specify the same option multiple times.
  In such case, the last one takes precedence.
EOF
}

################################################################################
function get_command_line() {
################################################################################
  local OPTARGS
  local GETOPTTMP

  OPTARGS=""\
"text-diff-remote,"\
"text-diff-local,"\
"rss-remote,"\
"arch:,"\
"release:,"\
"timeout:,"\
"cache-dir:,"\
"mirror-file:,"\
"verbose,"\
"help,"\
"version"

  GETOPTTMP=$(getopt --options "+" --longoptions "$OPTARGS" --name "$0" -- "$@") || exit 1
  eval set -- "$GETOPTTMP" || exit 1

  while true; do
    if   [ $1 = --text-diff-remote ]; then
      LOGTYPE=text-diff-remote
      shift
    elif [ $1 = --text-diff-local  ]; then
      LOGTYPE=text-diff-local
      shift
    elif [ $1 = --rss-remote       ]; then
      LOGTYPE=rss-remote
      shift
    elif [ $1 = --arch             ]; then
      ARCH=$2
      shift 2
    elif [ $1 = --release          ]; then
      RELEASE=$2
      shift 2
    elif [ $1 = --timeout          ]; then
      TIMEOUT=$2
      shift 2
    elif [ $1 = --cache-dir        ]; then
      CACHEDIR=$2
      shift 2
    elif [ $1 = --mirror-file      ]; then
      MIRROR_FILE=$2
      shift 2
    elif [ $1 = --verbose          ]; then
      VERBOSE=1
      shift
    elif [ $1 = --help             ]; then
      usage
      exit 0
    elif [ $1 = --version          ]; then
      echo $VERSION
      exit 0
    elif [ $1 = --                 ]; then
      shift
      break
    else
      echoerr "$0: getopt: internal error"
      exit 1
    fi
  done

  if [ $# -ne 0 ]; then
    echoerr "$0: excessive arguments given"
    exit 1
  fi
}

################################################################################
function detect_arch() {
################################################################################
  local ARCH

  ARCH=$(uname -m)

  case "$ARCH" in
    i?86)   echo x86     ;;
    x86_64) echo x86_64  ;;
    arm*)   echo arm     ;;
    *)      echo "$ARCH" ;;
  esac
}

################################################################################
function check_arch() {
################################################################################
  case "$1" in
    x86)    return ;;
    x86_64) return ;;
    arm)    return ;;
    *)      echoerr "$0: error: unsupported arch: '$1'"; exit 1 ;;
  esac
}

################################################################################
function check_release() {
################################################################################
  if ! [[ "$1" =~ ^((current)|([0-9]+\.[0-9]+))$ ]]; then
    echoerr "$0: error: unsupported release: '$1'"
    exit 1
  fi
}

################################################################################
function check_timeout() {
################################################################################
  if ! [[ "$1" =~ ^[1-9]+[0-9]*$ ]]; then
    echoerr "$0: error: timeout must be positive integer value: '$1'"
    exit 1
  fi
}

################################################################################
function c_arch() {
################################################################################
  case "$1" in
    x86)    echo ""    ;;
    x86_64) echo "64"  ;;
    arm)    echo "arm" ;;
    *)      echo "$1"  ;;
  esac
}

################################################################################
function c_release() {
################################################################################
  case "$1" in
    current) echo "-current" ;;
    *)       echo "-${1}"    ;;
  esac
}

################################################################################
function load_mirror() {
################################################################################
  local ARCH
  local MIRROR_FILE
  local MREGEX

  ARCH=$1
  MIRROR_FILE=$2

  if ! [ -r "$MIRROR_FILE" ]; then
    echoerr "$0: mirror file: '$MIRROR_FILE' is not readable"
    exit 1
  fi

  MREGEX="^MIRROR\[$ARCH\]=\"(.+)\"$"
  while IFS= read -r LINE; do
    if [[ "$LINE" =~ $MREGEX ]]; then
      MIRROR[$ARCH]=${BASH_REMATCH[1]}
      return
    fi
  done < "$MIRROR_FILE" || exit 1

  echoerr "$0: could not determine correct mirror, make sure mirror file has correct format:"
  echoerr "$0: example: MIRROR[$ARCH]=\"http://mirrors.us.kernel.org/slackware/\""
  exit 1
}

################################################################################
# @brief Downloads CHECKSUMS.md5 and extracts ChangeLog's checksum.
# @param $1 CHECKSUMS.md5 URL.
# @return 0 on success, 1 on failure.
# @print ChangeLog's checksum.
################################################################################
function download_checksum() {
################################################################################
  local URL
  local SUMREGEX
  local DPIPE
  local FPIPE
  local CURLOPT
  local CURLPID
  local READ_TIMEOUT
  local CHECKSUM

  URL=$1

  # REGEX to match the ChangeLog's checksum in the CHECKSUMS.md5.
  SUMREGEX="^([a-z0-9]{32})  ./ChangeLog.txt$"

  # Create the pipe securely
  DPIPE=$(mktemp -d) || exit 1
  FPIPE=$DPIPE/queue
  mkfifo "$FPIPE" || { rmdir "$DPIPE"; exit 1; }

  # The ChangeLog's checksum (MD5) can be found at the beginning of the
  # CHECKSUMS.md5. To conserve the bandwidth, CHECKSUMS.md5 is downloaded in
  # background and piped into a loop that searches for ChangeLog's checksum.
  # As soon as the ChangeLog's checksum is found, the background download is
  # stopped.
  [ -n "$VERBOSE" ] && echoerr "downloading checksum:"
  [ -z "$VERBOSE" ] && CURLOPT="--silent --show-error"
  curl --max-time $TIMEOUT --progress-bar $CURLOPT "$URL" > "$FPIPE" &
  CURLPID=$!

  # I'm a bit confused here, but it seems to be working in the following way:
  # 1. If curl succeeds and writes something to the FIFO, the loop can proceed
  #    because it won't block (because the FIFO has been opened for writing).
  # 2. If there is something in the FIFO, but this something has no LF in it,
  #    then the loop's read can successfully timeout if the timeout is exceeded.
  # 3. If curl fails for whatever reason, the loop is not executed, because
  #    I don't know why;) Probably curl 'touches' the FIFO in some way (opens
  #    and closes the writing end?) and so the loop's read can determine the
  #    FIFO is not readable any more and so the loop does not execute, but more
  #    importantly, does not block infinitely. If curl wouldn't touch the FIFO
  #    in some way, then the loop would block on the read, waiting for the
  #    writing end of the FIFO to be opened.
  READ_TIMEOUT=$TIMEOUT
  while IFS= read -t $READ_TIMEOUT -r LINE; do
    # Reduce the read timeout after the first line has been read.
    # 1 line / 2 seconds should be reasonable even on very slow connection.
    READ_TIMEOUT=2

    # ChangeLog's checksum has been found, kill the download background job.
    if [[ "$LINE" =~ $SUMREGEX ]]; then
      CHECKSUM=${BASH_REMATCH[1]}
      kill -TERM $CURLPID
      break
    fi
  done < "$FPIPE"

  wait $CURLPID &>/dev/null

  rm    "$FPIPE" || exit 1
  rmdir "$DPIPE" || exit 1

  if [ -n "$CHECKSUM" ]; then
    echo "$CHECKSUM"
    return 0
  else
    echoerr "$0: could not determine the ChangeLog's checksum"
    return 1
  fi
}

################################################################################
function download_changelog() {
################################################################################
  local CHANGELOGURL
  local CURLOPT

  CHANGELOGURL=$1

  [ -n "$VERBOSE" ] && echoerr "downloading ChangeLog:"
  [ -z "$VERBOSE" ] && CURLOPT="--silent --show-error"
  # Download ChangeLog into single string variable.
  curl --max-time $TIMEOUT --progress-bar $CURLOPT "$CHANGELOGURL" || return 1
}

################################################################################
function check_checksum() {
################################################################################
  local ICHECKSUM
  local CHANGELOGTEXT
  local OCHECKSUM

  ICHECKSUM=$1
  CHANGELOGTEXT=$2

  OCHECKSUM=$(md5sum <<< "$CHANGELOGTEXT") || exit 1
  OCHECKSUM=( $OCHECKSUM )

  if [ "$ICHECKSUM" != "$OCHECKSUM" ]; then
    echoerr "$0: checksum verification: FAILED"
    echoerr "$0: expected: $ICHECKSUM, got: $OCHECKSUM"
    return 1
  else
    [ -n "$VERBOSE" ] && echoerr "checksum verification: OK"
    return 0
  fi
}

################################################################################
# @brief Prints textual diff between remote and local ChangeLog.
# @param $1 Architecture (x86, x86_64, etc.).
# @param $2 Release (current, 14.1, etc.).
# @param $3 ChangeLog's text as single text variable.
# @print Textual diff between remote and local ChangeLog.
################################################################################
function changelog_diff_remote() {
################################################################################
  local CHANGELOGTEXT
  local ARCH
  local RELEASE
  local C_ARCH
  local C_RELEASE
  local CFILE
  local DFILE
  local DLINE
  local DTEXT

  ARCH=$1
  RELEASE=$2
  CHANGELOGTEXT=$3

  C_ARCH=$(c_arch "$ARCH")
  C_RELEASE=$(c_release "$RELEASE")

  # Current ChangeLog file.
  CFILE=$CACHEDIR/slackware$C_ARCH$C_RELEASE.ChangeLog.txt
  # ChangeLog diff file.
  DFILE=$CACHEDIR/slackware$C_ARCH$C_RELEASE.ChangeLog.diff.txt

  mkdir -p "$CACHEDIR" || exit 1
  # Save the current ChangeLog.
  echo "$CHANGELOGTEXT" > "$CFILE" || exit 1

  # Create the local ChangeLog diff file if it does not exist yet.
  # Let's start with the Epoch Time (TM).
  if [ ! -f "$DFILE"  ]; then
    echo "$(date -u -d "@0")" > "$DFILE" || exit 1
  fi

  # Fetch the first line of the ChangeLog diff (UTC date).
  DLINE=$(head -n1 "$DFILE") || exit 1

  # Sanity check (UTC date string length).
  if [ ${#DLINE} -ne 28 ]; then
    echoerr "$0: malformed local diff file: $DFILE"
    exit 1
  fi

  # Outputs text till first match (excluding the match itself).
  DTEXT=$(sed "/^${DLINE}$/Q" <<< "$CHANGELOGTEXT")

  if [ -n "$DTEXT" ]; then
    echo "$DTEXT" > "$DFILE" || exit 1
    echo "$DTEXT"
  elif [ -n "$VERBOSE" ]; then
    echoerr "nothing to show"
  fi
}

################################################################################
# @brief Prints textual diff for local ChangeLog.
# @param $1 Architecture (x86, x86_64, etc.).
# @param $2 Release (current, 14.1, etc.).
# @print Textual diff for local ChangeLog.
################################################################################
function changelog_diff_local() {
################################################################################
  local ARCH
  local RELEASE
  local C_ARCH
  local C_RELEASE
  local DFILE

  ARCH=$1
  RELEASE=$2

  C_ARCH=$(c_arch "$ARCH")
  C_RELEASE=$(c_release "$RELEASE")

  # ChangeLog diff file.
  DFILE=$CACHEDIR/slackware$C_ARCH$C_RELEASE.ChangeLog.diff.txt

  if [ -f "$DFILE" ]; then
    cat "$DFILE" || exit 1
  elif [ -n "$VERBOSE" ]; then
    echoerr "no local diff available: $DFILE"
  fi
}

################################################################################
# @brief Converts ChangeLog from TEXT to RSS format. RSS item body is composed
# of the text found between opening (UTC date) and
# closing ('+', 26 '-', '+' signs) tags.
# @param $1 Architecture (x86, x86_64, etc.).
# @param $2 Release (current, 14.1, etc.).
# @param $3 ChangeLog's text as single text variable.
# @print Converted ChangeLog in RSS format.
################################################################################
function changelog_rss() {
################################################################################
  local CHANGELOGTEXT
  local ARCH
  local RELEASE
  local C_ARCH
  local C_RELEASE
  local OTAG
  local CTAG
  local RSSTEXT
  local DATEUTC
  local GUID
  local DATERFC

  ARCH=$1
  RELEASE=$2
  CHANGELOGTEXT=$3

  C_ARCH=$(c_arch "$ARCH")
  C_RELEASE=$(c_release "$RELEASE")

  # Escape HTML special characters
  # (http://stackoverflow.com/questions/12873682/short-way-to-escape-html-in-bash)
  CHANGELOGTEXT=$(sed 's/&(?!amp;)/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g; s/"/\&quot;/g; s/'"'"'/\&#39;/g' <<< "$CHANGELOGTEXT")

  # Item opening tag (UTC date).
  OTAG="^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) {1,2}[0-9]{1,2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC [0-9]{4}$"
  # Item closing tag ('+', 26 '-', '+' signs).
  CTAG="+--------------------------+"

  # Make sure there is a closing tag at the end of the ChangeLog's text.
  CHANGELOGTEXT=$CHANGELOGTEXT$'\n'$CTAG

  # RSS feed header.
  RSSTEXT+="<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>"$'\n'
  RSSTEXT+="<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">"$'\n'
  RSSTEXT+="  <channel>"$'\n'
  RSSTEXT+="    <title>slackware$C_ARCH$C_RELEASE</title>"$'\n'
  RSSTEXT+="    <link />"$'\n'
  RSSTEXT+="    <description>Slackware$C_ARCH$C_RELEASE ChangeLog</description>"$'\n'
  RSSTEXT+="    <language>en</language>"$'\n'
  RSSTEXT+="    <docs>http://www.rssboard.org/rss-specification</docs>"$'\n'
  RSSTEXT+="    <generator>slackchlog</generator>"$'\n'

  # Scan ChangeLog's text for opening and closing tags and generate RSS item's body.
  while IFS= read -r LINE; do

    # If DATEUTC is empty, we're looking for opening tag.
    if [[ -z "$DATEUTC" && "$LINE" =~ $OTAG ]]; then
      DATEUTC=$LINE
      # Some (all?) feed readers (e.g. Liferea 1.8.16) treat the item GUID
      # globally, even if the items belong to different subscriptions. This
      # means that, if two items, each in different subscription, have the same
      # GUID, then marking one of them as read/unread also marks the other item
      # (belonging to different subscription) as read/unread.
      # Composing the GUID of date, arch, and release solves the problem.
      GUID=$(printf "0x%016x-$ARCH-$RELEASE" "$(date -d "$DATEUTC" "+%s")")
      # Date in RFC format as expected by RSS feed readers.
      DATERFC=$(date -Rd "$DATEUTC")

      # RSS item header.
      RSSTEXT+="    <item>"$'\n'
      RSSTEXT+="      <title>slackware$C_ARCH$C_RELEASE changes for $DATEUTC</title>"$'\n'
      RSSTEXT+="      <guid>$GUID</guid>"$'\n'
      RSSTEXT+="      <pubDate>$DATERFC</pubDate>"$'\n'
      RSSTEXT+="      <description><![CDATA[<pre>"

      continue
    fi

    # Once the opening tag has been found, print every line that does not
    # constitute the closing tag.
    if [ "$LINE" != "$CTAG" ]; then
      RSSTEXT+="$LINE"$'\n'
    # Closing tag has been found.
    else
      # Re-enable looking for opening tag in the next loop iteration.
      DATEUTC=
      # Remove possible new line at the end of the item so that the item
      # looks prettier.
      [ "${RSSTEXT: -1}" = $'\n' ] && RSSTEXT=${RSSTEXT%?}
      RSSTEXT+="</pre>]]></description>"$'\n'
      RSSTEXT+="    </item>"$'\n'
    fi

  done <<< "$CHANGELOGTEXT"

  # RSS feed footer.
  RSSTEXT+="  </channel>"$'\n'
  RSSTEXT+="</rss>"$'\n'

  # Print the ChangeLog in RSS format.
  echo -n "$RSSTEXT"
}

################################################################################
## main() {
################################################################################
VERSION=0.1
LOGTYPE=text-diff-remote
ARCH=
RELEASE=current
TIMEOUT=10
VERBOSE=
CACHEDIR=
MIRROR_FILE=
CHECKSUMURL=
CHECKSUM=
CHANGELOGURL=
CHANGELOGTEXT=

# Default ChangeLog's URLs for given arch.
declare -A MIRROR
MIRROR[x86]="http://ftp.osuosl.org/pub/slackware/"
MIRROR[x86_64]="http://ftp.osuosl.org/pub/slackware/"
MIRROR[arm]="http://ftp.arm.slackware.com/slackwarearm/"

get_command_line "$@"
[ -z "$ARCH" ] && ARCH=$(detect_arch)

check_arch    "$ARCH"
check_release "$RELEASE"
check_timeout "$TIMEOUT"

if [ -z "$CACHEDIR" ]; then
  if [ -z "$HOME" ]; then
    echoerr "$0: '\$HOME' is not set and no '--cache-dir' given"
    exit 1
  fi

  CACHEDIR=$HOME/.cache/slackware.ChangeLog
fi

if [ -n "$MIRROR_FILE" ]; then
  load_mirror "$ARCH" "$MIRROR_FILE"
elif [ -f "$HOME/.config/slackchlog.mirror.txt" ]; then
  MIRROR_FILE=$HOME/.config/slackchlog.mirror.txt
  load_mirror "$ARCH" "$MIRROR_FILE"
fi

# arch/release dependent repository checksum URL.
CHECKSUMURL=${MIRROR[$ARCH]}slackware$(c_arch "$ARCH")$(c_release "$RELEASE")/CHECKSUMS.md5
# arch/release dependent ChangeLog's URL.
CHANGELOGURL=${MIRROR[$ARCH]}slackware$(c_arch "$ARCH")$(c_release "$RELEASE")/ChangeLog.txt

if [ -n "$VERBOSE" ]; then
  echoerr "type:         $LOGTYPE"
  echoerr "arch:         $ARCH"
  echoerr "release:      $RELEASE"
  echoerr "timeout:      $TIMEOUT"
  echoerr "cache dir:    $CACHEDIR"
  echoerr "mirror file:  ${MIRROR_FILE:-built-in}"
  echoerr "md5:          $CHECKSUMURL"
  echoerr "url:          $CHANGELOGURL"
  echoerr ""
fi

if [ $LOGTYPE = text-diff-remote -o $LOGTYPE = rss-remote ]; then
  CHECKSUM=$(download_checksum "$CHECKSUMURL") || exit 1
  [ -n "$VERBOSE" ] && echoerr ""
  [ -n "$VERBOSE" ] && echoerr "checksum: $CHECKSUM"
  [ -n "$VERBOSE" ] && echoerr ""

  # Download ChangeLog into single text variable.
  # See Note at the end of the script (preventing trailing newlines removal).
  CHANGELOGTEXT="$(download_changelog "$CHANGELOGURL" && printf "x")" || exit 1
  # Remove 'x' at the end of the string and one more character. I have no clue
  # on why I need to remove this one more character, but only then it works.
  CHANGELOGTEXT=${CHANGELOGTEXT:0:-2}

  [ -n "$VERBOSE" ] && echoerr ""
  check_checksum "$CHECKSUM" "$CHANGELOGTEXT" || exit 1
  [ -n "$VERBOSE" ] && echoerr ""
fi

if [ $LOGTYPE = text-diff-remote ]; then
  changelog_diff_remote "$ARCH" "$RELEASE" "$CHANGELOGTEXT"
elif [ $LOGTYPE = text-diff-local ]; then
  changelog_diff_local "$ARCH" "$RELEASE"
elif [ $LOGTYPE = rss-remote ]; then
  changelog_rss "$ARCH" "$RELEASE" "$CHANGELOGTEXT"
fi

################################################################################
# }
################################################################################

# Note:
# When I started writing this script, I came with the brilliant idea that
# I'm going to use the smallest possible amount of disk files. For that reason,
# the ChangeLog is downloaded and stored in a variable. But as it turned out
# later (and after couple of hours of debugging) command substitution $()
# removes trailing newlines during assignment. In consequence, the trailing
# newlines (if there were multiple) were lost (ChangeLog normally contains
# single newline at the end, but some of them have multiple newlines) and md5
# was failing. To overcome the issue, the trick with "printf x" and later on
# removing the "x" from the end of the string is used.
