generate

#!/bin/bash

usage() {
  cat <<USAGE
Usage: $0 [options]
       $0 -?|-h|--help|help

Convert a Confluence XML space export to GitHub flavoured Markdown pages.

Options

--confluence-url URL  Sets the URL for Confluence links. This should be the
              -u URL  root URL, as the transformation scripts will add /wiki
                      as required.
--jira-url URL        Sets the URL for JIRA links. Defaults to the Confluence
        -j URL        URL. The transformation scripts will add /browse as
                      required for ticket links.
--output PATH         Sets the output path.
      -o PATH
--intput PATH         Sets the intput path.
      -i PATH

--format, -F          If prettier is installed, runs prettier on the output.

--skip-extract, -E    Skip the page extraction and image mapping step.
--skip-images, -I     Skip the image copy step.
--skip-markdown, -M   Skip the page conversion step.
--clean, -C           Clean the output directory before execution. This will
                      only clean the output required for each step.
--format
--force, -f           Force processing.
--debug, -d           Enable debug output. May be repeated to increase debug
                      level.

Debug Levels

0  (default)          No debug logging.
1  (-d)               Tracing will be enabled to show the parameters to
                      xsltproc.
2  (-d -d)            The output of the image copy mapping script will be
                      printed before execution. Unless --force is provided,
                      confirmation will be required.
3  (-d -d -d)         xsltproc verbose output will be enabled and the output
                      will be saved in 'OUTPUT_PATH/log'. This logging is very
                      verbose.
USAGE
}

declare -a xslt_params

declare -i debug

declare extract_pages copy_images convert_pages input_path output_path \
  force script_path confluence_url jira_url clean format
clean=false
convert_pages=true
copy_images=true
extract_pages=true
force=false
format=false
input_path=.
output_path=out
script_path="$(
  cd "$(dirname "$0")" || exit 1
  pwd -P
)" || {
  echo >&2 "Error setting script path."
  exit 1
}

while (($#)); do
  case "$1" in
  -\? | -h | --help | help)
    usage
    exit 0
    ;;
  --clean | -C) clean=true ;;
  --debug | -d) ((debug++)) ;;
  --force | -f) force=true ;;
  --skip-extract | -E) extract_pages=false ;;
  --skip-images | -I) copy_images=false ;;
  --skip-markdown | -M) convert_pages=false ;;
  --format | -F) format=true ;;
  --confluence-url | -u)
    confluence_url="${2:?}"
    xslt_params+=(--stringparam confluence-url "${2:?}")
    shift
    ;;
  --jira-url | -j)
    jira_url="${2:?}"
    xslt_params+=(--stringparam jira-url "${2:?}")
    shift
    ;;
  --output | -o)
    output_path="${2:?}"
    shift
    ;;
  --input | -i)
    if ! [[ -d "${2:?}" ]]; then
      echo >&2 "Input path ${2:?} does not exist."
      exit 1
    fi

    input_path="${2:?}"
    shift
    ;;
  *)
    echo >&2 "Unknown parameter or options '$1'."
    exit 1
    ;;
  esac

  shift
done

if ((debug > 0)); then
  xslt_params+=(--stringparam debug "true")
fi

if ! command -v xsltproc >/dev/null 2>/dev/null; then
  echo >&2 "This requires xsltproc to be present."
  echo >&2 "Check your system instructions for installing libxslt and/or xsltproc."
  exit 1
fi

if ! [[ -s "${input_path}/entities.xml" ]]; then
  echo >&2 "Has a confluence space export been unzipped into ${input_path}?"
  echo >&2 "Cannot find 'entities.xml' in that directory."
  usage >&2
  exit 1
fi

if [[ -d "${output_path}/page-xml" ]]; then
  if ! "${extract_pages}" && ! "${force}" && ! "${clean}"; then
    echo >&2 "Extracted pages already exist. You may want to remove"
    echo >&2 "${output_path} before generating."
    echo >&2 "Halting. Use --force or --clean to continue anyway."
    usage >&2
    exit 1
  fi
fi

xslt_params+=(
  --stringparam input-path "${input_path}"
  --stringparam output-path "${output_path}"
  --stringparam script-path "${script_path}"
)

cat <<STEPS
Input          : ${input_path}/entities.xml
Output         : ${output_path}
Confluence URL : ${confluence_url:-Not supplied}
JIRA URL       : ${jira_url:-Not supplied}

Steps
        Extract: ${extract_pages}
     Image Copy: ${copy_images}
  Convert Pages: ${convert_pages}

${force:+Overwrite enabled.}

STEPS

run() {
  declare -a cmd
  declare -i status
  declare log capture

  while (($#)); do
    case "$1" in
    log=*) log="${1##log=}" ;;
    capture=*) capture="${1##capture=}" ;;
    *) cmd+=("$1") ;;
    esac

    shift
  done

  set -- "${cmd[@]}"

  ((debug > 0)) && echo "$*"

  if [[ -n "${log}" ]] && [[ -n "${capture}" ]]; then
    "$@" >"${capture}" 2>"${log}"
  elif [[ -n "${capture}" ]]; then
    "$@" >"${capture}"
  elif [[ -n "${log}" ]]; then
    "$@" 2>"${log}"
  else
    "$@"
  fi

  status=$?

  if ((status > 0)); then
    echo >&2 "Error ${status} running: $*"
    exit ${status}
  fi
}

mkdir -p "${output_path}"/{page-xml,logs,wiki/images}

if "${extract_pages}"; then
  "${clean}" && rm -rf "${output_path}/page-xml"
  mkdir -p "${output_path}"/{page-xml,logs}
  printf "Extracting page XML and image mapping\n"

  if ((debug > 2)); then
    run xsltproc -v "${xslt_params[@]}" entities.xsl "${input_path}"/entities.xml log="${output_path}/logs/entities.log"
  else
    run xsltproc "${xslt_params[@]}" entities.xsl "${input_path}"/entities.xml
  fi

  printf "Pages extracted.\n\n"
fi

if "${copy_images}"; then
  "${clean}" && rm -rf "${output_path}/wiki/images"
  mkdir -p "${output_path}"/{wiki/images,logs}

  if ! [[ -s "${output_path}"/image-mappings.xml ]]; then
    echo >&2 "${output_path}/image-mappings.xml does not exist."
    echo >&2 "The page extraction step needs to be run at least once."
    exit 1
  fi

  if ! [[ -d "${input_path}/attachments" ]]; then
    echo >&2 "Warning: Cannot find ${input_path}/attachments. Attachment copy may fail."
  fi

  printf "Copying images from attachments\n"

  if ((debug > 1)); then
    xsltproc "${xslt_params[@]}" image-mappings.xsl "${output_path}/image-mappings.xml"
    "${force}" || read -t 5 -r -p "Press RETURN within 5 seconds to continue"
  fi

  if ((debug > 2)); then
    run xsltproc -v "${xslt_params[@]}" \
      image-mappings.xsl \
      "${output_path}/image-mappings.xml" \
      log="${output_path}/logs/image-mapping.log" |
      bash
  else
    run xsltproc "${xslt_params[@]}" \
      image-mappings.xsl \
      "${output_path}/image-mappings.xml" |
      bash
  fi

  printf "Images copied.\n\n"
fi

if "${convert_pages}"; then
  "${clean}" && rm -f "${output_path}"/wiki/*.md

  printf "Converting pages to markdown\n"

  for page in "${output_path}"/page-xml/*.xml; do
    xml="${page##"${output_path}"/page-xml/}"
    markdown="${output_path}/wiki/${xml%%.xml}.md"
    log="${output_path}/logs/${xml%%.xml}.log"

    if ((debug > 2)); then
      run xsltproc -v --path "${PWD}" \
        "${xslt_params[@]}" \
        page.xsl "${page}" capture="${markdown}" log="${log}"
    else
      run xsltproc --path "${PWD}" \
        "${xslt_params[@]}" \
        page.xsl "${page}" capture="${markdown}"
    fi
  done

  printf "Pages converted to markdown.\n"

  if "${format}" && command -v prettier >/dev/null 2>/dev/null; then
    printf "Formatting markdown pages\n"

    prettier -w "${output_path}/wiki" >/dev/null 2>/dev/null &&
      prettier -w "${output_path}/wiki" >/dev/null 2>/dev/null

    printf "Formatted markdown.\n"
  fi
fi