2021-01-14 08:07:24 +01:00
|
|
|
# Convert five class sentiment data into text files of the type you
|
|
|
|
# might use a sentence classifier for.
|
|
|
|
|
|
|
|
# The default directories are what I set up on my local machine.
|
|
|
|
# -i and -o change the input and output dirs.
|
|
|
|
|
|
|
|
INPUT_DIR=extern_data/sentiment/sentiment-treebank
|
|
|
|
OUTPUT_DIR=extern_data/sentiment/sst-processed
|
|
|
|
|
|
|
|
while getopts "i:o:" OPTION
|
|
|
|
do
|
|
|
|
case $OPTION in
|
|
|
|
i)
|
|
|
|
INPUT_DIR=$OPTARG
|
|
|
|
;;
|
|
|
|
o)
|
|
|
|
OUTPUT_DIR=$OPTARG
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
echo INPUT DIR: $INPUT_DIR
|
|
|
|
echo OUTPUT DIR: $OUTPUT_DIR
|
|
|
|
|
|
|
|
mkdir -p $OUTPUT_DIR/binary
|
|
|
|
mkdir -p $OUTPUT_DIR/fiveclass
|
|
|
|
mkdir -p $OUTPUT_DIR/threeclass
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/fiveclass/train-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt > $OUTPUT_DIR/fiveclass/train-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/fiveclass/dev-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt > $OUTPUT_DIR/fiveclass/dev-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/fiveclass/test-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt > $OUTPUT_DIR/fiveclass/test-phrases.txt
|
|
|
|
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/fiveclass/train-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -root_only > $OUTPUT_DIR/fiveclass/train-roots.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/fiveclass/dev-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only > $OUTPUT_DIR/fiveclass/dev-roots.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/fiveclass/test-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only > $OUTPUT_DIR/fiveclass/test-roots.txt
|
|
|
|
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/binary/train-binary-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/train-binary-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/binary/dev-binary-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/dev-binary-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/binary/test-binary-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/test-binary-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/binary/dev-binary-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/dev-binary-roots.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/binary/test-binary-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/test-binary-roots.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/threeclass/train-threeclass-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/train-threeclass-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/threeclass/dev-threeclass-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/dev-threeclass-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/threeclass/test-threeclass-phrases.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/test-threeclass-phrases.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/threeclass/dev-threeclass-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/dev-threeclass-roots.txt
|
|
|
|
|
|
|
|
echo $OUTPUT_DIR/threeclass/test-threeclass-roots.txt
|
|
|
|
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/test-threeclass-roots.txt
|
|
|
|
|