Skip to content
This repository has been archived by the owner on Aug 23, 2022. It is now read-only.

Fix bash script to break files #113

Merged
merged 1 commit into from
Dec 21, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 11 additions & 25 deletions inst/pipeline/break_into.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
#!/bin/bash

if [[ $(sed --help 2>&1 | grep GNU) ]]; then
sed_i () { sed -i "$@"; }
gnused=1
else
sed_i () { sed -i '' "$@"; }
gnused=0
fi
#!/bin/bash -xe

if [[ $# < 2 ]]
then
Expand All @@ -19,28 +11,22 @@ default_ext='partxml'
# <d:xx> or <xx> file?
dchar="d:"
dchar_exist=$(head -2 $1 | grep -c "<${dchar}")
if [ ${dchar_exist} -eq 0 ]
then
dchar=""
fi
[ ${dchar_exist} -eq 0 ] && dchar=""

subject="${dchar}subject"

# Add lines before and after each subject starts
sed -e 's|<'"${subject}"'>|\n<'"${subject}"'>\n|' ${1} > ${1}.tmp
sed -i.orig -e 's|<'"${subject}"'>|\n<'"${subject}"'>\n|' ${1}

# change the end subject for something different - <cut_here>
# so it is not counted in the awk below.
if [[ $gnused == 1 ]]; then
echo "gnu here"
sed -e 's|</'"${subject}"'>|<cut_here>\n|' ${1} > ${1}.tmp
else
sed -e 's|</'"${subject}"'>|<cut_here>\'$'\n|' ${1} > ${1}.tmp
fi
sed -i -e 's|</'"${subject}"'>|<cut_here>\n|' ${1}

# Remove previous files
if [[ -e ${1}_0.${default_ext} ]]; then
rm ${1}*${default_ext}
fi

# Break the file into chunks where <${subject}> occurs.
# Each time <subject> is found, delim will increase
# if delim/maxpatients (2nd argument) == 1 then
Expand All @@ -51,7 +37,7 @@ awk 'BEGIN {delim=-1} \
/\<'"${subject}"'\>/ { delim++ } \
{file = sprintf("'${1}'_%s.'${default_ext}'", int(delim/'${2}'));\
print >> file; } \
END { print "'${1}' has ", delim+1, "subjects"}' ${1}.tmp
END { print "'${1}' has ", delim+1, "subjects"}' ${1}


# extract the header of the file with its meta.
Expand All @@ -60,10 +46,10 @@ awk 'BEGIN {delim=-1} \
# - remove all no printing characters - it seems there's one making the insertion to
# fail afterwards.
# head won't work because some files run over multiple lines
# firstlines=$(sed -n '1,/<'"${subject}"'>/p' ${1}_0.${default_ext} | \
# sed 's/<'"${subject}"'>//' | tr -dc '[:print:]')
firstlines=$(sed -n 's/\(.*\)<'"${subject}"'>/\1/p' ${1}_0.${default_ext} | tr -dc '[:print:]')
firstlines=$(sed -n '1,/<'"${subject}"'>/p' ${1}_0.${default_ext} | \
sed 's/<'"${subject}"'>//' | tr -dc '[:print:]')

# TODO: to extract the footer automatically.
lastline="</${dchar}data></${dchar}context></${dchar}document>"
nfiles=$(ls "${1}"_* | wc -l)

Expand All @@ -89,4 +75,4 @@ do
done

# Remove the temporary file used
rm ${1}.tmp
mv ${1}.orig ${1}