CofeehousePy/services/corenlp/scripts/sentiment/convert_sentiment.sh

82 lines
3.8 KiB
Bash

# Convert five class sentiment data into text files of the type you
# might use a sentence classifier for.
# The default directories are what I set up on my local machine.
# -i and -o change the input and output dirs.
INPUT_DIR=extern_data/sentiment/sentiment-treebank
OUTPUT_DIR=extern_data/sentiment/sst-processed
while getopts "i:o:" OPTION
do
case $OPTION in
i)
INPUT_DIR=$OPTARG
;;
o)
OUTPUT_DIR=$OPTARG
;;
esac
done
echo INPUT DIR: $INPUT_DIR
echo OUTPUT DIR: $OUTPUT_DIR
mkdir -p $OUTPUT_DIR/binary
mkdir -p $OUTPUT_DIR/fiveclass
mkdir -p $OUTPUT_DIR/threeclass
echo $OUTPUT_DIR/fiveclass/train-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt > $OUTPUT_DIR/fiveclass/train-phrases.txt
echo $OUTPUT_DIR/fiveclass/dev-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt > $OUTPUT_DIR/fiveclass/dev-phrases.txt
echo $OUTPUT_DIR/fiveclass/test-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt > $OUTPUT_DIR/fiveclass/test-phrases.txt
echo $OUTPUT_DIR/fiveclass/train-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -root_only > $OUTPUT_DIR/fiveclass/train-roots.txt
echo $OUTPUT_DIR/fiveclass/dev-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only > $OUTPUT_DIR/fiveclass/dev-roots.txt
echo $OUTPUT_DIR/fiveclass/test-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only > $OUTPUT_DIR/fiveclass/test-roots.txt
echo $OUTPUT_DIR/binary/train-binary-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/train-binary-phrases.txt
echo $OUTPUT_DIR/binary/dev-binary-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/dev-binary-phrases.txt
echo $OUTPUT_DIR/binary/test-binary-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/test-binary-phrases.txt
echo $OUTPUT_DIR/binary/dev-binary-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/dev-binary-roots.txt
echo $OUTPUT_DIR/binary/test-binary-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/test-binary-roots.txt
echo $OUTPUT_DIR/threeclass/train-threeclass-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/train-threeclass-phrases.txt
echo $OUTPUT_DIR/threeclass/dev-threeclass-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/dev-threeclass-phrases.txt
echo $OUTPUT_DIR/threeclass/test-threeclass-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/test-threeclass-phrases.txt
echo $OUTPUT_DIR/threeclass/dev-threeclass-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/dev-threeclass-roots.txt
echo $OUTPUT_DIR/threeclass/test-threeclass-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/test-threeclass-roots.txt