
# to use this script under windows:
# 1. install e.g. CYGWIN
# 2. fill in the path to folder containing UNZIPPED quote and trade files "src_dir"
# 3. fill in the path to output folder "des_dir"
# 4. fill in "ticker", "year", "month"
# 5. save & run
# 6. problem? contact polaktox@wiwi.hu-berlin.de

src_dir="/cygdrive/..."				# contains unzipped quote and trade files, e.g. "GOOG_2008_01_q.txt" and "GOOG_2008_01_t.txt"
# e.g. src_dir="/cygdrive/d/RTAQ"
des_dir="/cygdrive/.../split"		# new folder structure for split files will be created in des_dir
# e.g. src_dir="/cygdrive/d/RTAQ/split"
ticker="AA"
year="2008"
month="01"

mkdir $des_dir

# creating folder structure for different days (date folders for weekends and holidays will be created also)
for ((  nday = 1 ;  nday <= 31;  nday++  )); do
 if [ $nday -lt 10 ]
  then
   day="0"$nday
  else
   day=$nday
 fi
 mkdir $des_dir/$year"-"$month"-"$day
done

# removing the first line (the header) in files
awk 'FNR>1{print}' $src_dir"/"$ticker"_"$year"_"$month$"_t.txt" > $src_dir"/"$ticker"_"$year"_"$month$"_tt.txt"
awk 'FNR>1{print}' $src_dir"/"$ticker"_"$year"_"$month$"_q.txt" > $src_dir"/"$ticker"_"$year"_"$month$"_qq.txt"

# splitting the trades and quotes files into the folder structure
cat $src_dir"/"$ticker"_"$year"_"$month"_tt.txt" | awk -F '[ -]+' '{print > "'$des_dir'/'$year'-'$month'-"$3"/'$ticker'_trades.txt" } '
cat $src_dir"/"$ticker"_"$year"_"$month"_qq.txt" | awk -F '[\t-]+' '{print > "'$des_dir'/'$year'-'$month'-"$4"/'$ticker'_quotes.txt" } '

# removing unnecessary files
rm $src_dir"/"$ticker"_"$year"_"$month$"_t.txt"
rm $src_dir"/"$ticker"_"$year"_"$month$"_tt.txt"
rm $src_dir"/"$ticker"_"$year"_"$month$"_q.txt"
rm $src_dir"/"$ticker"_"$year"_"$month$"_qq.txt"

