## =======================================================================================
##
##   Command line script for the sequence analysis as presented
##   Cadot et al. (2020) in JOURNAL
## 
##   21.03.2020 | Klaus Schlaeppi, klaus.schlaeppi@ips.unibe.ch 
##
## =======================================================================================




## =======================================================================================
## =======================================================================================
## prep | environment
## =======================================================================================
## =======================================================================================



#### MiSeq run #2 | CHANGINS 16S (Data from Hu et al. 2018, Nat Comm)
## =======================================================================================
## The bacteria at the Changins location were sequenced in the MiSeq run #2.
## This MiSeq library contains additional samples from experiments that are not related to this study.
## The raw data of MiSeq run #2 was stored at ENA under the study accession PRJEB15152 (Sample: SAMEA54297418).

mkdir MiSeq_run02/a_data
mkdir MiSeq_run02/a_data/gz

# /yourpath/MiSeq_run02/a_data/gz/ with:
s1-amplicon_S1_L001_R1_001.fastq.gz
s1-amplicon_S1_L001_R2_001.fastq.gz

cp ../MiSeq_run02/a_data/gz/s1-amplicon_S1_L001_R1_001.fastq.gz MiSeq_run02/a_data/gz/
cp ../MiSeq_run02/a_data/gz/s1-amplicon_S1_L001_R2_001.fastq.gz MiSeq_run02/a_data/gz/



#### MiSeq run #9 | ZURICH 16S
## =======================================================================================
## The bacteria at the Zurich location were sequenced in the MiSeq run #9.
## This MiSeq library contains additional samples from experiments that are not related to this study.
## The raw data of the MiSeq run #9 is stored at at ENA under the study accession ***TBD*** (Sample: ***TBD***).


mkdir MiSeq_run09/a_data
mkdir MiSeq_run09/a_data/gz

# /yourpath/MiSeq_run09/a_data/gz/ with:
MiSeq_run09_R1.fastq.gz 
MiSeq_run09_R2.fastq.gz

cp ../MiSeq_run09/a_data/gz/MiSeq_run09_R1.fastq.gz MiSeq_run09/a_data/gz/
cp ../MiSeq_run09/a_data/gz/MiSeq_run09_R2.fastq.gz MiSeq_run09/a_data/gz/



#### MiSeq run #11 | CHANGINS ITS (Data from Hu et al. 2018, Nat Comm)
#### MiSeq run #11 | ITHACA 16S (this study)
## =======================================================================================
## MiSeq run #11 contained the fungal data as presented in Hu et al. (2018) ...
## ... plus the bacterial data from the Zurich location of this study.
## The raw data of the MiSeq run #11 is stored at ENA under the study accession PRJEB20127 (Sample: SAMEA4698767).

/yourpath
mkdir MiSeq_run11/
mkdir MiSeq_run11/a_data
mkdir MiSeq_run11/a_data/gz

# /yourpath/MiSeq_run11/a_data/gz/ with:
p1617-3554-01_S1_L001_R1_001.fastq.gz
p1617-3554-01_S1_L001_R2_001.fastq.gz

cp ../MiSeq_run11/a_data/gz/p1617-3554-01_S1_L001_R1_001.fastq.gz MiSeq_run11/a_data/gz/ 
cp ../MiSeq_run11/a_data/gz/p1617-3554-01_S1_L001_R2_001.fastq.gz MiSeq_run11/a_data/gz/



#### MiSeq run #12 | ITHACA ITS 
## =======================================================================================
## The fungi at the Ithaca location were sequenced in the MiSeq run #12.
## This MiSeq library contains additional samples from experiments that are not related to this study.
## The raw data of the MiSeq run #12 is stored at at ENA under the study accession ***TBD*** (Sample: ***TBD***).

mkdir MiSeq_run12/a_data && mkdir MiSeq_run12/a_data/gz

# /yourpath/MiSeq_run12/a_data/gz/ with:
miseq12_S1_L001_R1_001.fastq.gz
miseq12_S1_L001_R2_001.fastq.gz

cp ../MiSeq_run12/a_data/gz/miseq12_S1_L001_R1_001.fastq.gz MiSeq_run12/a_data/gz/
cp ../MiSeq_run12/a_data/gz/miseq12_S1_L001_R2_001.fastq.gz MiSeq_run12/a_data/gz/



#### MiSeq run #13 | ZURICH ITS 
## =======================================================================================
## The fungi at the Zurich location were sequenced in the MiSeq run #13.
## This MiSeq library contains additional samples from experiments that are not related to this study.
## The raw data of the MiSeq run #13 is stored at at ENA under the study accession ***TBD*** (Sample: ***TBD***).

mkdir MiSeq_run13/a_data && mkdir MiSeq_run13/a_data/gz

# /yourpath/MiSeq_run13/a_data/gz/ with:
p2337o4164-S1_S1_L001_R1_001.fastq.gz  
p2337o4164-S1_S1_L001_R2_001.fastq.gz 

cp ../MiSeq_run13/a_data/gz/p2337o4164-S1_S1_L001_R1_001.fastq.gz MiSeq_run13/a_data/gz/
cp ../MiSeq_run13/a_data/gz/p2337o4164-S1_S1_L001_R2_001.fastq.gz MiSeq_run13/a_data/gz/










## =======================================================================================
## =======================================================================================
## A | QC and gz2fq
## =======================================================================================
## =======================================================================================



## ---------------------------------------------------------------------------------------
## A1 | QC - FastQC v0.11.2
## ---------------------------------------------------------------------------------------


for f in */
do
mkdir "$f"/a_data/qc_test/ 
done &

mkdir MiSeq_run02/a_data/qc/ 
fastqc -t 20 -k 8 -q MiSeq_run02/a_data/gz/s1-amplicon_S1_L001_R1_001.fastq.gz MiSeq_run02/a_data/gz/s1-amplicon_S1_L001_R2_001.fastq.gz -o MiSeq_run02/a_data/qc/ &

mkdir MiSeq_run09/a_data/qc/ 
fastqc -t 20 -k 8 -q MiSeq_run09/a_data/gz/MiSeq_run09_R1.fastq.gz MiSeq_run09/a_data/gz/MiSeq_run09_R2.fastq.gz -o MiSeq_run09/a_data/qc/ &

mkdir MiSeq_run11/a_data/qc/ 
fastqc -t 20 -k 8 -q MiSeq_run11/a_data/gz/p1617-3554-01_S1_L001_R1_001.fastq.gz MiSeq_run11/a_data/gz/p1617-3554-01_S1_L001_R2_001.fastq.gz -o MiSeq_run11/a_data/qc/ &

mkdir MiSeq_run12/a_data/qc/ 
fastqc -t 20 -k 8 -q MiSeq_run12/a_data/gz/miseq12_S1_L001_R1_001.fastq.gz MiSeq_run12/a_data/gz/miseq12_S1_L001_R2_001.fastq.gz -o MiSeq_run12/a_data/qc/ &

mkdir MiSeq_run13/a_data/qc/ 
fastqc -t 20 -k 8 -q MiSeq_run13/a_data/gz/p2337o4164-S1_S1_L001_R1_001.fastq.gz MiSeq_run13/a_data/gz/p2337o4164-S1_S1_L001_R2_001.fastq.gz -o MiSeq_run13/a_data/qc/ &


## -t = number of files to process at once
## -k = kmer size

## disk space
# Remove the zipped fasta file - these files are not needed
rm a_data/qc/*.zip



## ---------------------------------------------------------------------------------------
## A2 | GZ > FQ
## ---------------------------------------------------------------------------------------

mkdir MiSeq_run02/a_data/fq/ 
mkdir MiSeq_run09/a_data/fq/ 
mkdir MiSeq_run11/a_data/fq/ 
mkdir MiSeq_run12/a_data/fq/ 
mkdir MiSeq_run13/a_data/fq/ 

# Unzip the files but keep a copy
gunzip -c MiSeq_run02/a_data/gz/s1-amplicon_S1_L001_R1_001.fastq.gz > MiSeq_run02/a_data/fq/run02_S1_L001_R1_001.fastq &
gunzip -c MiSeq_run02/a_data/gz/s1-amplicon_S1_L001_R2_001.fastq.gz > MiSeq_run02/a_data/fq/run02_S1_L001_R2_001.fastq &

gunzip -c MiSeq_run09/a_data/gz/MiSeq_run09_R1.fastq.gz > MiSeq_run09/a_data/fq/run09_S1_L001_R1_001.fastq &
gunzip -c MiSeq_run09/a_data/gz/MiSeq_run09_R2.fastq.gz > MiSeq_run09/a_data/fq/run09_S1_L001_R2_001.fastq &

gunzip -c MiSeq_run11/a_data/gz/p1617-3554-01_S1_L001_R1_001.fastq.gz > MiSeq_run11/a_data/fq/run11_S1_L001_R1_001.fastq &
gunzip -c MiSeq_run11/a_data/gz/p1617-3554-01_S1_L001_R2_001.fastq.gz > MiSeq_run11/a_data/fq/run11_S1_L001_R2_001.fastq &

gunzip -c MiSeq_run12/a_data/gz/miseq12_S1_L001_R1_001.fastq.gz > MiSeq_run12/a_data/fq/run12_S1_L001_R1_001.fastq &&
gunzip -c MiSeq_run12/a_data/gz/miseq12_S1_L001_R2_001.fastq.gz > MiSeq_run12/a_data/fq/run12_S1_L001_R2_001.fastq &

gunzip -c MiSeq_run13/a_data/gz/p2337o4164-S1_S1_L001_R1_001.fastq.gz > MiSeq_run13/a_data/fq/run13_S1_L001_R1_001.fastq &
gunzip -c MiSeq_run13/a_data/gz/p2337o4164-S1_S1_L001_R2_001.fastq.gz > MiSeq_run13/a_data/fq/run13_S1_L001_R2_001.fastq &










## =======================================================================================
## =======================================================================================
## B | Trim low quality ends
## =======================================================================================
## =======================================================================================

# Problem: The merging of reads with low quality endings is difficult. We trim the end off
#          (-20nt) to improve merging success. Alternatively, it would be possible to only 
#          only trim the reverse primer. 
#          We also remove short reads (<100nt) and reads with more than 1 ambiguous nucleotides.
#          Again, we could be most stringent with the filtering and include qf.    

mkdir MiSeq_run02/b_trim/ 
mkdir MiSeq_run09/b_trim/ 
mkdir MiSeq_run11/b_trim/ 
mkdir MiSeq_run12/b_trim/ 
mkdir MiSeq_run13/b_trim/ 

prinseq-lite.pl -verbose -out_format 3 -ns_max_n 1 -min_len 100 -trim_to_len 280 -fastq MiSeq_run02/a_data/fq/run02_S1_L001_R1_001.fastq -fastq2 MiSeq_run02/a_data/fq/run02_S1_L001_R2_001.fastq -out_good MiSeq_run02/b_trim/run02_trim -out_bad MiSeq_run02/b_trim/run02_fail -log MiSeq_run02/b_trim/run02.log &
prinseq-lite.pl -verbose -out_format 3 -ns_max_n 1 -min_len 100 -trim_to_len 280 -fastq MiSeq_run09/a_data/fq/run09_S1_L001_R1_001.fastq -fastq2 MiSeq_run09/a_data/fq/run09_S1_L001_R2_001.fastq -out_good MiSeq_run09/b_trim/run09_trim -out_bad MiSeq_run09/b_trim/run09_fail -log MiSeq_run09/b_trim/run09.log &
prinseq-lite.pl -verbose -out_format 3 -ns_max_n 1 -min_len 100 -trim_to_len 280 -fastq MiSeq_run11/a_data/fq/run11_S1_L001_R1_001.fastq -fastq2 MiSeq_run11/a_data/fq/run11_S1_L001_R2_001.fastq -out_good MiSeq_run11/b_trim/run11_trim -out_bad MiSeq_run11/b_trim/run11_fail -log MiSeq_run11/b_trim/run11.log &
prinseq-lite.pl -verbose -out_format 3 -ns_max_n 1 -min_len 100 -trim_to_len 280 -fastq MiSeq_run12/a_data/fq/run12_S1_L001_R1_001.fastq -fastq2 MiSeq_run12/a_data/fq/run12_S1_L001_R2_001.fastq -out_good MiSeq_run12/b_trim/run12_trim -out_bad MiSeq_run12/b_trim/run12_fail -log MiSeq_run12/b_trim/run12.log &
prinseq-lite.pl -verbose -out_format 3 -ns_max_n 1 -min_len 100 -trim_to_len 280 -fastq MiSeq_run13/a_data/fq/run13_S1_L001_R1_001.fastq -fastq2 MiSeq_run13/a_data/fq/run13_S1_L001_R2_001.fastq -out_good MiSeq_run13/b_trim/run13_trim -out_bad MiSeq_run13/b_trim/run13_fail -log MiSeq_run13/b_trim/run13.log &

## -verbose       = print status information during the processing
## -out_format 3 = what kind of file for the output; in this case FASTQ
## -ns_max_n      = remove sequences with 1 N
## --min_len      = filter sequences shorter than 100
## -trim_to_len   = trim all sequences from 3' end to result in a sequence length 280

## disk space
# Remove the fastq files in a_data/fq - these files are not needed any longer
rm MiSeq_run02/a_data/fq/*.fastq 
rm MiSeq_run09/a_data/fq/*.fastq 
rm MiSeq_run11/a_data/fq/*.fastq 
rm MiSeq_run12/a_data/fq/*.fastq 
rm MiSeq_run13/a_data/fq/*.fastq 










## =======================================================================================
## =======================================================================================
## C | Merge overlap reads - FLASH v1.2.9
## =======================================================================================
## =======================================================================================

mkdir MiSeq_run02/c_merge/ 
mkdir MiSeq_run09/c_merge/
mkdir MiSeq_run11/c_merge/
mkdir MiSeq_run12/c_merge/ 
mkdir MiSeq_run13/c_merge/

flash MiSeq_run02/b_trim/run02_trim_1.fastq MiSeq_run02/b_trim/run02_trim_2.fastq -t 10 -m 15 -M 250 -x 0.25 -d MiSeq_run02/c_merge/ -o run02 &
flash MiSeq_run09/b_trim/run09_trim_1.fastq MiSeq_run09/b_trim/run09_trim_2.fastq -t 10 -m 15 -M 250 -x 0.25 -d MiSeq_run09/c_merge/ -o run09 &
flash MiSeq_run11/b_trim/run11_trim_1.fastq MiSeq_run11/b_trim/run11_trim_2.fastq -t 10 -m 15 -M 250 -x 0.25 -d MiSeq_run11/c_merge/ -o run11 &
flash MiSeq_run12/b_trim/run12_trim_1.fastq MiSeq_run12/b_trim/run12_trim_2.fastq -t 10 -m 15 -M 250 -x 0.25 -d MiSeq_run12/c_merge/ -o run12 &
flash MiSeq_run13/b_trim/run13_trim_1.fastq MiSeq_run13/b_trim/run13_trim_2.fastq -t 10 -m 15 -M 250 -x 0.25 -d MiSeq_run13/c_merge/ -o run13 &

# -t = number of threads
# -m = minimum required overlap length to provide a confident overlap
# -M = maximum overlap number
# -x = maximum allowed ratio between number of mismatched BPs and the overlap length
# -d = output directory

# Remove the fastq files in b_trim/ - these files are not needed any longer
rm MiSeq_run02/b_trim/*.fastq 
rm MiSeq_run09/b_trim/*.fastq 
rm MiSeq_run11/b_trim/*.fastq 
rm MiSeq_run12/b_trim/*.fastq 
rm MiSeq_run13/b_trim/*.fastq



## doubling dataset
fastx_reverse_complement -i MiSeq_run02/c_merge/run02.extendedFrags.fastq -o MiSeq_run02/c_merge/run02.extendedFrags_reversed.fastq -Q33 &&
cat MiSeq_run02/c_merge/run02.extendedFrags.fastq MiSeq_run02/c_merge/run02.extendedFrags_reversed.fastq > MiSeq_run02/c_merge/run02_doubled.fastq &

fastx_reverse_complement -i MiSeq_run09/c_merge/run09.extendedFrags.fastq -o MiSeq_run09/c_merge/run09.extendedFrags_reversed.fastq -Q33 &&
cat MiSeq_run09/c_merge/run09.extendedFrags.fastq MiSeq_run09/c_merge/run09.extendedFrags_reversed.fastq > MiSeq_run09/c_merge/run09_doubled.fastq &

fastx_reverse_complement -i MiSeq_run11/c_merge/run11.extendedFrags.fastq -o MiSeq_run11/c_merge/run11.extendedFrags_reversed.fastq -Q33 &&
cat MiSeq_run11/c_merge/run11.extendedFrags.fastq MiSeq_run11/c_merge/run11.extendedFrags_reversed.fastq > MiSeq_run11/c_merge/run11_doubled.fastq &

fastx_reverse_complement -i MiSeq_run12/c_merge/run12.extendedFrags.fastq -o MiSeq_run12/c_merge/run12.extendedFrags_reversed.fastq -Q33 &&
cat MiSeq_run12/c_merge/run12.extendedFrags.fastq MiSeq_run12/c_merge/run12.extendedFrags_reversed.fastq > MiSeq_run12/c_merge/run12_doubled.fastq &

fastx_reverse_complement -i MiSeq_run13/c_merge/run13.extendedFrags.fastq -o MiSeq_run13/c_merge/run13.extendedFrags_reversed.fastq -Q33 &&
cat MiSeq_run13/c_merge/run13.extendedFrags.fastq MiSeq_run13/c_merge/run13.extendedFrags_reversed.fastq > MiSeq_run13/c_merge/run13_doubled.fastq &



## disk space
# Remove the fastq files in c_merge/ - these files are not needed any longer
### keep c_merge/*_doubled.fastq data!

for f in */
do
rm "$f"/c_merge/*extended*.fastq 
rm "$f"/c_merge/*notCombined*.fastq 
done &










## =======================================================================================
## =======================================================================================
## D | Primer Splitting
## =======================================================================================
## =======================================================================================

## check if files needed authorisation change; check if "c_merge/run02_doubled.fastq" needs to be unzipped. Make d_primer directory

for f in */
do
# chmod 775 "$f"/_demultiplex_*.sh
# gunzip "$f"/c_merge/run*_doubled.fastq
mkdir "$f"/d_primer/
done &



## =======================================================================================
## splitting first by R-primer: 
## =======================================================================================

# cutadapt -a ADAPTER-SEQUENCE input.fastq > output.fastq
# -------------------------------------------------------
# -g ADAPTER, --front = ADAPTER Sequence of an adapter that was ligated to the 5' end.
# -a ADAPTER, --adapter=ADAPTER Sequence of an adapter that was ligated to the 3' end.
# -b ADAPTER, --anywhere=ADAPTER Sequence of an adapter that was ligated to the 5' or 3' end. 

# MiSeq run02
# -------------------------------------------------------
while read BCPR_ID BCPR
do
cd MiSeq_run02
rm d_primer/run02_trim_${BCPR_ID}.log
touch d_primer/run02_trim_${BCPR_ID}.log
./d_primer/_demultiplex_by_R_primer.sh $BCPR_ID $BCPR >> d_primer/run02_trim_${BCPR_ID}.log
cd ..
done < MiSeq_run02/d_primer/_R_primers.txt &


# MiSeq_run09
# -------------------------------------------------------
while read BCPR_ID BCPR
do
cd MiSeq_run09
touch d_primer/run09_trim_${BCPR_ID}.log
./_demultiplex_by_R_primer.sh $BCPR_ID $BCPR >> d_primer/run09_trim_${BCPR_ID}.log
cd ..
done < MiSeq_run09/_R_primers.txt &


# MiSeq_run11
# -------------------------------------------------------
while read BCPR_ID BCPR
do
cd MiSeq_run11
rm d_primer/run11_trim_${BCPR_ID}.log
touch d_primer/run11_trim_${BCPR_ID}.log
./_demultiplex_by_R_primer.sh $BCPR_ID $BCPR >> d_primer/run11_trim_${BCPR_ID}.log
cd ..
done <MiSeq_run11/_R_primers.txt &


# MiSeq_run12
# -------------------------------------------------------
while read BCPR_ID BCPR
do
cd MiSeq_run12
rm d_primer/run12_trim_${BCPR_ID}.log
touch d_primer/run12_trim_${BCPR_ID}.log
./_demultiplex_by_R_primer.sh $BCPR_ID $BCPR >> d_primer/run12_trim_${BCPR_ID}.log
cd ..
done < MiSeq_run12/_R_primers.txt &


# MiSeq_run13
# -------------------------------------------------------
while read BCPR_ID BCPR
do
cd MiSeq_run13
rm d_primer/run13_trim_${BCPR_ID}.log
touch d_primer/run13_trim_${BCPR_ID}.log
./_demultiplex_by_R_primer.sh $BCPR_ID $BCPR >> d_primer/run13_trim_${BCPR_ID}.log
cd ..
done < MiSeq_run13/_R_primers.txt &


## disk space
for f in */
do
gzip "$f"/c_merge/run*_doubled.fastq
done &




## =======================================================================================
## splitting then by F-primer the separately "R-splitted" files: 
## =======================================================================================

# cutadapt -a ADAPTER-SEQUENCE input.fastq > output.fastq
# -------------------------------------------------------
# -g ADAPTER, --front = ADAPTER Sequence of an adapter that was ligated to the 5' end.
# -a ADAPTER, --adapter=ADAPTER Sequence of an adapter that was ligated to the 3' end.
# -b ADAPTER, --anywhere=ADAPTER Sequence of an adapter that was ligated to the 5' or 3' end. 


# MiSeq_run02
# -------------------------------------------------------
cd MiSeq_run02

## R3: De-multiplex samples using bc and primer sequence MiSeq run02
while read BCPF_ID BCPF
do
touch d_primer/run02_trim_R3_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R3_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run02_trim_R3_1193R_${BCPF_ID}.log
done < d_primer/_R3_samples.txt &

## R4: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run02_trim_R4_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R4_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run02_trim_R4_1193R_${BCPF_ID}.log
done < d_primer/_R4_samples.txt &

cd ..


# MiSeq_run09
# -------------------------------------------------------
cd MiSeq_run09 

## R40: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R40_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R40_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R40_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R41: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R41_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R41_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R41_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R42: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R42_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R42_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R42_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R43: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R43_1193R_${BCPF_ID}.log
./_demultiplex_R43_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R43_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R44: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R44_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R44_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R44_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R45: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R45_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R45_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R45_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R47: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R47_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R47_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R47_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R48: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R48_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R48_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R48_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R49: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R49_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R49_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R49_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R50: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R50_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R50_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R50_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt &

## R51: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run09_trim_R51_1193R_${BCPF_ID}_${SMPL}.log
./_demultiplex_R51_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run09_trim_R51_1193R_${BCPF_ID}_${SMPL}.log
done < _799F_primers.txt&

cd ..


# MiSeq_run11 16S (R40-R49)
# -------------------------------------------------------

## R40: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R40_1193R_${BCPF_ID}.log
./_demultiplex_R40_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R40_1193R_ ${BCPF_ID}.log
done < _799F_primers.txt &

## R41: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R41_1193R_${BCPF_ID}.log
./_demultiplex_R41_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R41_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R42: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R42_1193R_${BCPF_ID}.log
./_demultiplex_R42_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R42_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R43: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R43_1193R_${BCPF_ID}.log
./_demultiplex_R43_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R43_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R44: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R44_1193R_${BCPF_ID}.log
./_demultiplex_R44_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R44_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R45: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R45_1193R_${BCPF_ID}.log
./_demultiplex_R45_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R45_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R47: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R47_1193R_${BCPF_ID}.log
./_demultiplex_R47_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R47_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R48: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_ R48_1193R_${BCPF_ID}.log
./_demultiplex_R48_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R48_1193R_${BCPF_ID}.log
done < _799F_primers.txt &

## R49: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R49_1193R_${BCPF_ID}.log
./_demultiplex_R49_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R49_1193R_${BCPF_ID}.log
done < _799F_primers.txt &



# MiSeq run11 ITS
# -------------------------------------------------------

## R7: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R7_ITS2_${BCPF_ID}.log
./_demultiplex_R7_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R7_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R8: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R8_ITS2_${BCPF_ID}.log
./_demultiplex_R8_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R8_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R9: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R9_ITS2_${BCPF_ID}.log
./_demultiplex_R9_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R9_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R10: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R10_ITS2_${BCPF_ID}.log
./_demultiplex_R10_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R10_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R11: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R11_ITS2_${BCPF_ID}.log
./_demultiplex_R11_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R11_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R12: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R12_ITS2_${BCPF_ID}.log
./_demultiplex_R12_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R12_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R13: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R13_ITS2_${BCPF_ID}.log
./_demultiplex_R13_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R13_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R14: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R14_ITS2_${BCPF_ID}.log
./_demultiplex_R14_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R14_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R15: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R15_ITS2_${BCPF_ID}.log
./_demultiplex_R15_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R15_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R16: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R16_ITS2_${BCPF_ID}.log
./_demultiplex_R16_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R16_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R17: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R17_ITS2_${BCPF_ID}.log
./_demultiplex_R17_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R17_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R18: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R18_ITS2_${BCPF_ID}.log
./_demultiplex_R18_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R18_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R19: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R19_ITS2_${BCPF_ID}.log
./_demultiplex_R19_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R19_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R20: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R20_ITS2_${BCPF_ID}.log
./_demultiplex_R20_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R20_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R21: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R21_ITS2_${BCPF_ID}.log
./_demultiplex_R21_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R21_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R22: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run11_trim_R22_ITS2_${BCPF_ID}.log
./_demultiplex_R22_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run11_trim_R22_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &


# MiSeq_run12 - 16S
# -------------------------------------------------------
cd MiSeq_run12

## R40: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R40_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R40_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R40_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R41: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R41_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R41_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R41_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R42: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R42_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R42_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R42_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R43: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R43_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R43_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R43_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R44: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R44_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R44_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R44_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R45: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R45_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R45_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R45_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R46: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R46_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R46_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R46_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R47: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R47_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R47_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R47_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R48: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R48_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R48_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R48_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R49: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R49_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R49_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R49_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R50: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R50_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R50_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R50_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &

## R51: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R51_1193R_${BCPF_ID}.log
./d_primer/_demultiplex_R51_by_F_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R51_1193R_${BCPF_ID}.log
done < d_primer/_799F_primers.txt &


# MiSeq_run12 - ITS (R7-R24)
# -------------------------------------------------------

## R7: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R7_ITS2_${BCPF_ID}.log
./_demultiplex_R7_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R7_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R8: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R8_ITS2_${BCPF_ID}.log
./_demultiplex_R8_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R8_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R9: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R9_ITS2_${BCPF_ID}.log
./_demultiplex_R9_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R9_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R10: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R10_ITS2_${BCPF_ID}.log
./_demultiplex_R10_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R10_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R11: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R11_ITS2_${BCPF_ID}.log
./_demultiplex_R11_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R11_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R12: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R12_ITS2_${BCPF_ID}.log
./_demultiplex_R12_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R12_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R13: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R13_ITS2_${BCPF_ID}.log
./_demultiplex_R13_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R13_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R14: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R14_ITS2_${BCPF_ID}.log
./_demultiplex_R14_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R14_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R15: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R15_ITS2_${BCPF_ID}.log
./_demultiplex_R15_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R15_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R16: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R16_ITS2_${BCPF_ID}.log
./_demultiplex_R16_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R16_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R17: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R17_ITS2_${BCPF_ID}.log
./_demultiplex_R17_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R17_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R18: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R18_ITS2_${BCPF_ID}.log
./_demultiplex_R18_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R18_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R19: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R19_ITS2_${BCPF_ID}.log
./_demultiplex_R19_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R19_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R20: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R20_ITS2_${BCPF_ID}.log
./_demultiplex_R20_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R20_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R21: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R21_ITS2_${BCPF_ID}.log
./_demultiplex_R21_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R21_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R22: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R22_ITS2_${BCPF_ID}.log
./_demultiplex_R22_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R22_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R23: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R23_ITS2_${BCPF_ID}.log
./_demultiplex_R23_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R23_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R24: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run12_trim_R24_ITS2_${BCPF_ID}.log
./_demultiplex_R24_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run12_trim_R24_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

cd ..


# MiSeq_run13
# -------------------------------------------------------
cd MiSeq_run13

## ITS R7-R24 (R15-24 for zurich)

## R7: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R7_ITS2_${BCPF_ID}.log
./_demultiplex_R7_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R7_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R8: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R8_ITS2_${BCPF_ID}.log
./_demultiplex_R8_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R8_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R9: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R9_ITS2_${BCPF_ID}.log
./_demultiplex_R9_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R9_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R10: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R10_ITS2_${BCPF_ID}.log
./_demultiplex_R10_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R10_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R11: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R11_ITS2_${BCPF_ID}.log
./_demultiplex_R11_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R11_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R12: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R12_ITS2_${BCPF_ID}.log
./_demultiplex_R12_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R12_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R13: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R13_ITS2_${BCPF_ID}.log
./_demultiplex_R13_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R13_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R14: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R14_ITS2_${BCPF_ID}.log
./_demultiplex_R14_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R14_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R15: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R15_ITS2_${BCPF_ID}.log
./_demultiplex_R15_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R15_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R16: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R16_ITS2_${BCPF_ID}.log
./_demultiplex_R16_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R16_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R17: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R17_ITS2_${BCPF_ID}.log
./_demultiplex_R17_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R17_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R18: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R18_ITS2_${BCPF_ID}.log
./_demultiplex_R18_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R18_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R19: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R19_ITS2_${BCPF_ID}.log
./_demultiplex_R19_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R19_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R20: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R20_ITS2_${BCPF_ID}.log
./_demultiplex_R20_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R20_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R21: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R21_ITS2_${BCPF_ID}.log
./_demultiplex_R21_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R21_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

## R22: De-multiplex samples using bc and primer sequence
while read BCPF_ID BCPF
do
touch d_primer/run13_trim_R22_ITS2_${BCPF_ID}.log
./_demultiplex_R22_by_ITS1f_primers.sh $BCPF_ID $BCPF >> d_primer/run13_trim_R22_ITS2_${BCPF_ID}.log
done < _ITS1f_primers.txt &

cd ..


### sequence counts
# -------------------------------------------------------
grep -c "@M01106" MiSeq_run02/d_primer/run02_trim_R*_F*.fastq > MiSeq_run02/d_primer/run02_trim_R_and_F_seq_counts.txt 
grep -c "@M01106" MiSeq_run09/d_primer/run09_trim_R*.fastq > MiSeq_run09/d_primer/run09_trim_R_and_F_seq_counts.txt &

grep -c "@M04679" MiSeq_run11/d_primer/run11_trim_R*_F*.fastq > MiSeq_run11/d_primer/__run11_trim_R_and_F_seq_counts.txt &
grep -c "@M04679" MiSeq_run11/d_primer/run11_trim_R*ITS2.fastq > MiSeq_run11/d_primer/__run11_trim_R_ITS_counts.txt &

grep -c "@M04679" MiSeq_run12/d_primer/run12_trim_R*_F*.fastq > MiSeq_run12/d_primer/__run12_trim_R_and_F_seq_counts.txt &
grep -c "@M04679" MiSeq_run12/d_primer/run12_trim_R*ITS2.fastq > MiSeq_run12/d_primer/__run12_trim_R_ITS_counts.txt &
grep -c "@M04679" MiSeq_run12/d_primer/run12_trim_R*1193R.fastq > MiSeq_run12/d_primer/__run12_trim_R_16S_counts.txt &

grep -c "@M04679" MiSeq_run13/d_primer/run13_trim_R*_F*.fastq > MiSeq_run13/d_primer/__run13_trim_R_and_F_seq_counts.txt &
grep -c "@M04679" MiSeq_run13/d_primer/run13_trim_R*ITS2.fastq > MiSeq_run13/d_primer/__run13_trim_R_ITS_counts.txt &
grep -c "@M04679" MiSeq_run13/d_primer/run13_trim_R*1193R.fastq > MiSeq_run13/d_primer/__run13_trim_R_16S_counts.txt &









## =======================================================================================
## =======================================================================================
## E | Quality Filtering - PRINSEQ-lite 0.20.4
## =======================================================================================
## =======================================================================================

mkdir MiSeq_run02/e_qf/ 
mkdir MiSeq_run09/e_qf/ 
mkdir MiSeq_run11/e_qf_16S/
mkdir MiSeq_run11/e_qf_ITS/
mkdir MiSeq_run12/e_qf/ 
mkdir MiSeq_run13/e_qf/ 

for SMPL in `cat MiSeq_run02/_run02_sample_list.txt` (created from Database S1 of the MS)
do
   prinseq-lite.pl -params MiSeq_run02/prinseq.par -fastq MiSeq_run02/d_primer/run02_trim_${SMPL}.fastq -out_good MiSeq_run02/e_qf/run02_trim_${SMPL}_good -out_bad MiSeq_run02/e_qf/run02_trim_${SMPL}_bad -log MiSeq_run02/e_qf/run02_trim_${SMPL}_qc.log
done &

for SMPL in `cat MiSeq_run09/_run09_zurich_16S_sample_list.txt` (created from Database S1 of the MS)
do
   prinseq-lite.pl -params prinseq.par -fastq MiSeq_run09/d_primer/run09_trim_${SMPL}.fastq -out_good MiSeq_run09/e_qf/run09_trim_${SMPL}_good -out_bad MiSeq_run09/e_qf/run09_trim_${SMPL}_bad -log MiSeq_run09/e_qf/run09_trim_${SMPL}_qc.log
done &

for SMPL in `cat MiSeq_run11/_run11_ithaca_16S_sample_list_without_outliers.txt` (created from Database S1 of the MS)
do
   prinseq-lite.pl -params prinseq.par -fastq MiSeq_run11/d_primer/run11_trim_${SMPL}.fastq -out_good MiSeq_run11/e_qf_16S/run11_trim_${SMPL}_good -out_bad MiSeq_run11/e_qf_16S/run11_trim_${SMPL}_bad -log MiSeq_run11/e_qf_16S/run11_trim_${SMPL}_qc.log
done &

for SMPL in `cat MiSeq_run11/_run11_ITS_sample_list.txt` (created from Database S1 of the MS)
do
   prinseq-lite.pl -params prinseq.par -fastq MiSeq_run11/d_primer/run11_trim_${SMPL}.fastq -out_good MiSeq_run11/e_qf_ITS/run11_trim_${SMPL}_good -out_bad MiSeq_run11/e_qf_ITS/run11_trim_${SMPL}_bad -log MiSeq_run11/e_qf_ITS/run11_trim_${SMPL}_qc.log
done &

for SMPL in `cat MiSeq_run12/_run12_ithaca_ITS_sample_list.txt` (created from Database S1 of the MS)
do
   prinseq-lite.pl -params prinseq.par -fastq MiSeq_run12/d_primer/run12_trim_${SMPL}.fastq -out_good MiSeq_run12/e_qf/run12_trim_${SMPL}_good -out_bad MiSeq_run12/e_qf/run12_trim_${SMPL}_bad -log MiSeq_run12/e_qf/run12_trim_${SMPL}_qc.log
done &

for SMPL in `cat MiSeq_run13/_run13_zurich_ITS_sample_list.txt` (created from Database S1 of the MS)
do
   prinseq-lite.pl -params prinseq.par -fastq MiSeq_run13/d_primer/run13_trim_${SMPL}.fastq -out_good MiSeq_run13/e_qf/run13_trim_${SMPL}_good -out_bad MiSeq_run13/e_qf/run13_trim_${SMPL}_bad -log MiSeq_run13/e_qf/run13_trim_${SMPL}_qc.log
done &

## The parameter file contains:
# out_format 1
# # range_len 350-400, not done
# range_gc 30-70
# min_qual_mean 20
# ns_max_n 0
# noniupac
# lc_method dust
# lc_threshold 15
# 22.07.19: added in prinseq.par file: min_len 100

### sequence counts
# -------------------------------------------------------
grep -c ">" MiSeq_run02/e_qf/run02_trim_*_good.fasta > MiSeq_run02/e_qf/run02_trimmed_and_qc_seq_counts.txt &
grep -c ">" MiSeq_run09/e_qf/run09_trim_*_good.fasta > MiSeq_run09/e_qf/run09_trimmed_and_qc_seq_counts.txt &
grep -c ">" MiSeq_run11/e_qf_ITS/run11_trim_*_good.fasta > MiSeq_run11/e_qf_ITS/run11_trimmed_and_qc_seq_counts.txt &
grep -c ">" MiSeq_run11/e_qf_16S/run11_trim_*_good.fasta > MiSeq_run11/e_qf_16S/run11_trimmed_and_qc_seq_counts.txt &
grep -c ">" MiSeq_run12/e_qf/run12_trim_*_good.fasta > MiSeq_run12/e_qf/run12_trimmed_and_qc_seq_counts.txt &
grep -c ">" MiSeq_run13/e_qf/run13_trim_*_good.fasta > MiSeq_run13/e_qf/run13_trimmed_and_qc_seq_counts.txt &



## Add barcode label to reads
# -------------------------------------------------------
for SMPL in `cat MiSeq_run02/_run02_sample_list.txt`
do
   awk -v SMPL=${SMPL} '{if($1~">") print $1";barcodelabel="SMPL";";else print $1}' MiSeq_run02/e_qf/run02_trim_${SMPL}_good.fasta > MiSeq_run02/e_qf/run02_trim_${SMPL}_good_renamed.fasta
done &

for SMPL in `cat MiSeq_run09/_run09_zurich_16S_sample_list.txt`
do
   awk -v SMPL=${SMPL} '{if($1~">") print $1";barcodelabel="SMPL";";else print $1}' MiSeq_run09/e_qf/run09_trim_${SMPL}_good.fasta > MiSeq_run09/e_qf/run09_trim_${SMPL}_good_renamed.fasta
done &

for SMPL in `cat MiSeq_run11/_run11_ithaca_16S_sample_list.txt `
do
   awk -v SMPL=${SMPL} '{if($1~">") print $1";barcodelabel="SMPL";";else print $1}' MiSeq_run11/e_qf_16S/run11_trim_${SMPL}_good.fasta > MiSeq_run11/e_qf_16S/run11_trim_${SMPL}_good_renamed.fasta
done &

for SMPL in `cat MiSeq_run11/_run11_ITS_sample_list.txt`
do
   awk -v SMPL=${SMPL} '{if($1~">") print $1";barcodelabel="SMPL";";else print $1}' MiSeq_run11/e_qf_ITS/run11_trim_${SMPL}_good.fasta > MiSeq_run11/e_qf_ITS/run11_trim_${SMPL}_good_renamed.fasta
done &

for SMPL in `cat MiSeq_run12/_run12_ithaca_ITS_sample_list.txt`
do
   awk -v SMPL=${SMPL} '{if($1~">") print $1";barcodelabel="SMPL";";else print $1}' MiSeq_run12/e_qf/run12_trim_${SMPL}_good.fasta > MiSeq_run12/e_qf/run12_trim_${SMPL}_good_renamed.fasta
done &

for SMPL in `cat MiSeq_run13/_run13_zurich_ITS_sample_list.txt`
do
   awk -v SMPL=${SMPL} '{if($1~">") print $1";barcodelabel="SMPL";";else print $1}' MiSeq_run13/e_qf/run13_trim_${SMPL}_good.fasta > MiSeq_run13/e_qf/run13_trim_${SMPL}_good_renamed.fasta
done &










## =======================================================================================
## =======================================================================================
## F | Move and combine samples per experiments
## =======================================================================================
## =======================================================================================

for f in */
do
mkdir "$f"/f_otu_16S
mkdir "$f"/f_otu_ITS
done &



## MiSeq_run02: 
# -------------------------------------------------------
# all maize samples according to _run02_sample_list.txt

for i in {1..44}
do
f=$(head -$i MiSeq_run02/_run02_sample_list.txt | tail -1)
cp MiSeq_run02/e_qf/*$f*_renamed.fasta MiSeq_run02/e_qf_field/
done &&

cat MiSeq_run02/e_qf_field/*_renamed.fasta > MiSeq_run02/f_otu_16S/run02_changins_16S_field_trimmed_qfiltered_renamed.fasta &
grep -c ">" MiSeq_run02/f_otu_16S/run02_changins_16S_field_trimmed_qfiltered_renamed.fasta          



## MiSeq_run09: 
# -------------------------------------------------------
# all maize samples according to _run09_zurich_16S_sample_list.txt

for i in {1..123}
do
f=$(head -$i MiSeq_run09/_run09_zurich_16S_sample_list.txt | tail -1)
cp MiSeq_run09/e_qf/*$f*_renamed.fasta MiSeq_run09/f_otu_16S/
done &&

cat MiSeq_run09/e_qf/*_renamed.fasta > MiSeq_run09/f_otu_16S/run09_zurich_16S_trimmed_qfiltered_renamed.fasta &
grep -c ">" MiSeq_run09/f_otu_16S/run09_zurich_16S_trimmed_qfiltered_renamed.fasta          



## MiSeq_run11 - 16S: 
# -------------------------------------------------------
# all maize samples according to _run11_ithaca_16S_sample_list.txt

for i in {1..91}
do
f=$(head -$i MiSeq_run11/_run11_ithaca_16S_sample_list.txt | tail -1)
cp MiSeq_run11/e_qf_16S/*$f*_renamed.fasta MiSeq_run11/f_otu_16S/
done &&

cat MiSeq_run11/f_otu_16S/*F_good_renamed.fasta > MiSeq_run11/f_otu_16S/run11_ithaca_16S_trimmed_qfiltered_renamed.fasta &
grep -c ">" MiSeq_run11/f_otu_16S/run11_ithaca_16S_trimmed_qfiltered_renamed.fasta 



## MiSeq_run11 - ITS: 
# -------------------------------------------------------
# all maize samples according to _run11_ITS_sample_list.txt

for i in {1..84}
do
f=$(head -$i MiSeq_run11/_run11_ITS_sample_list.txt | tail -1)
cp MiSeq_run11/e_qf_ITS/*$f*_renamed.fasta MiSeq_run11/f_otu_ITS/
done &&

cat MiSeq_run11/f_otu_ITS/*F_good_renamed.fasta > MiSeq_run11/f_otu_ITS/run11_changins_ITS_trimmed_qfiltered_renamed.fasta &
grep -c ">" MiSeq_run11/f_otu_ITS/run11_changins_ITS_trimmed_qfiltered_renamed.fasta 



## MiSeq_run12: 
# -------------------------------------------------------
# all maize samples according to _run12_ithaca_ITS_sample_list.txt

for i in {1..90}
do
f=$(head -$i MiSeq_run12/_run12_ithaca_ITS_sample_list.txt | tail -1)
cp MiSeq_run12/e_qf/*$f*_renamed.fasta MiSeq_run12/f_otu_ITS/
done &&

cat MiSeq_run12/f_otu_ITS/*F_good_renamed.fasta > MiSeq_run12/f_otu_ITS/run12_ithaca_ITS_trimmed_qfiltered_renamed.fasta &
grep -c ">" MiSeq_run12/f_otu_ITS/run12_ithaca_ITS_trimmed_qfiltered_renamed.fasta 



## MiSeq_run13: 
# -------------------------------------------------------
# all maize samples according to _run13_zurich_ITS_sample_list.txt

for i in {1..120} 
do
f=$(head -$i MiSeq_run13/_run13_zurich_ITS_sample_list.txt | tail -1)
cp MiSeq_run13/e_qf/*$f*_renamed.fasta MiSeq_run13/f_otu_ITS/
done &&

cat MiSeq_run13/f_otu_ITS/*F_good_renamed.fasta > MiSeq_run13/f_otu_ITS/run13_zurich_ITS_trimmed_qfiltered_renamed.fasta &
grep -c ">" MiSeq_run13/f_otu_ITS/run13_zurich_ITS_trimmed_qfiltered_renamed.fasta 



### check and sort sequences length 
# -------------------------------------------------------
awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' MiSeq_run02/f_otu_16S/run02_changins_16S_field_trimmed_qfiltered_renamed.fasta > run02_16S_seqlength.txt &&
sed -n '2~2p' run02_16S_seqlength.txt > run02_16S_only_seqlength.txt &&
sort -n run02_16S_only_seqlength.txt > run02_16S_only_seqlength_sorted.txt &

awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' MiSeq_run09/f_otu_16S/run09_zurich_16S_trimmed_qfiltered_renamed.fasta > run09_16S_seqlength.txt &&
sed -n '2~2p' run09_16S_seqlength.txt > run09_16S_only_seqlength.txt &&
sort -n run09_16S_only_seqlength.txt > run09_16S_only_seqlength_sorted.txt &

awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' MiSeq_run11/f_otu_16S/run11_ithaca_16S_trimmed_qfiltered_renamed.fasta > run11_16S_seqlength.txt &&
sed -n '2~2p' run11_16S_seqlength.txt > run11_16S_only_seqlength.txt &&
sort -n run11_16S_only_seqlength.txt > run11_16S_only_seqlength_sorted.txt &

awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' MiSeq_run11/f_otu_ITS/run11_changins_ITS_trimmed_qfiltered_renamed.fasta > run11_ITS_seqlength.txt &&
sed -n '2~2p' run11_ITS_seqlength.txt > run11_ITS_only_seqlength.txt &&
sort -n run11_ITS_only_seqlength.txt > run11_ITS_only_seqlength_sorted.txt &

awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' MiSeq_run12/f_otu_ITS/run12_ithaca_ITS_trimmed_qfiltered_renamed.fasta > run12_ITS_seqlength.txt &&
sed -n '2~2p' run12_ITS_seqlength.txt > run12_ITS_only_seqlength.txt &&
sort -n run12_ITS_only_seqlength.txt > run12_ITS_only_seqlength_sorted.txt &

awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' MiSeq_run13/f_otu_ITS/run13_zurich_ITS_trimmed_qfiltered_renamed.fasta > run13_ITS_seqlength.txt &&
sed -n '2~2p' run13_ITS_seqlength.txt > run13_ITS_only_seqlength.txt &&
sort -n run13_ITS_only_seqlength.txt > run13_ITS_only_seqlength_sorted.txt &



### disk space
# -------------------------------------------------------
# Remove the *trimmed* fastq files in - these files are not needed any longer
# Remove the *bad.fasta and *good.fasta files in e_qf/ - these files are not needed any longer
# compress the demultiplexed, quality filtered and renamed sequences in e_qf

for f in */
do
rm "$f"/d_primer/run*_trim*.info &
rm "$f"/d_primer/run*_trim*.fastq &
rm "$f"/e_qf*/run*_trim*bad.fasta &
rm "$f"/e_qf*/run*_trim*good.fasta &
gzip "$f"/e_qf*/run*_trim_*renamed.fasta &
done &



### concatenate all locations together
## =======================================================================================



### 16S
# -------------------------------------------------------
mkdir zh_ch_us_combined

# find and replace barcode by barcode.ithaca for ithaca (same combinations as zurich): 
perl -pe 's/799F/799F.ithaca/g' MiSeq_run11/f_otu_16S/run11_ithaca_16S_trimmed_qfiltered_renamed.fasta > MiSeq_run11/f_otu_16S/run11_ithaca_16S_trimmed_qfiltered_renamed_renamed.fasta &

# pull everything together
cat MiSeq_run02/f_otu_16S/run02_changins_16S_field_trimmed_qfiltered_renamed.fasta MiSeq_run09/f_otu_16S/run09_zurich_16S_trimmed_qfiltered_renamed.fasta MiSeq_run11/f_otu_16S/run11_ithaca_16S_trimmed_qfiltered_renamed_renamed.fasta > zh_ch_us_combined/zh_ch_us_trimmed_qfiltered_renamed.fasta &
grep -c ">" zh_ch_us_combined/zh_ch_us_trimmed_qfiltered_renamed.fasta 
gzip zh_ch_us_combined/zh_ch_us_trimmed_qfiltered_renamed.fasta &

# sorted file with sequence length
awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' zh_ch_us_combined/zh_ch_us_trimmed_qfiltered_renamed.fasta > zh_ch_us_16S_seqlength.txt &&
sed -n '2~2p' zh_ch_us_16S_seqlength.txt > zh_ch_us_16S_only_seqlength.txt &&
sort -n zh_ch_us_16S_only_seqlength.txt > zh_ch_us_16S_only_seqlength_sorted.txt &



### ITS
# -------------------------------------------------------
mkdir zh_ch_us_ITS_combined

# rename barcode label
perl -pe 's/ITS1F/ITS1F_ithaca/g' MiSeq_run12/f_otu_ITS/run12_ithaca_ITS_trimmed_qfiltered_renamed.fasta > MiSeq_run12/f_otu_ITS/run12_ithaca_ITS_trimmed_qfiltered_renamed_renamed.fasta &
perl -pe 's/ITS1F/ITS1F_zurich/g' MiSeq_run13/f_otu_ITS/run13_zurich_ITS_trimmed_qfiltered_renamed.fasta  > MiSeq_run13/f_otu_ITS/run13_zurich_ITS_trimmed_qfiltered_renamed_renamed.fasta  &
perl -pe 's/ITS1F/ITS1F_changins/g' MiSeq_run11/f_otu_ITS/run11_changins_ITS_trimmed_qfiltered_renamed.fasta > MiSeq_run11/f_otu_ITS/run11_changins_ITS_trimmed_qfiltered_renamed_renamed.fasta &

# pull everything together
cat MiSeq_run13/f_otu_ITS/run13_zurich_ITS_trimmed_qfiltered_renamed_renamed.fasta MiSeq_run12/f_otu_ITS/run12_ithaca_ITS_trimmed_qfiltered_renamed_renamed.fasta MiSeq_run11/f_otu_ITS/run11_changins_ITS_trimmed_qfiltered_renamed_renamed.fasta > zh_ch_us_ITS_combined/zh_ch_us_ITS_trimmed_qfiltered_renamed.fasta &
grep -c ">" zh_ch_us_ITS_combined/zh_ch_us_ITS_trimmed_qfiltered_renamed.fasta 
gzip zh_ch_us_ITS_combined/zh_ch_us_ITS_trimmed_qfiltered_renamed.fasta &

# sorted file with sequence length
awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' zh_ch_us_ITS_combined/zh_ch_us_ITS_trimmed_qfiltered_renamed.fasta > zh_ch_us_ITS_seqlength.txt &&
sed -n '2~2p' zh_ch_us_ITS_seqlength.txt > zh_ch_us_ITS_only_seqlength.txt &&
sort -n zh_ch_us_ITS_only_seqlength.txt > zh_ch_us_ITS_only_seqlength_sorted.txt &










## =======================================================================================
## =======================================================================================
## G | zOTU clustering (by Jean-Claude Walser)
## =======================================================================================
## =======================================================================================



### 16S

================================================================================
Amplicon Size Variant With Additional Clustering
--------------------------------------------------------------------------------
Project: p327 newrequest_190730
Sample: zh_ch_us_trimmed_qfiltered_renamed
Locus: 16S
--------------------------------------------------------------------------------
UPARSE & UNOISE3
usearch v11.0.667_i86linux64
--------------------------------------------------------------------------------
START: 09:21:50 05/08/2019
--------------------------------------------------------------------------------
â–¶ Deduplicate Amplicons
De-replicates amplicons to obtain unique amplicons.
Determine error rates of amplicon reads. 
--------------------------------------------------------------------------------
â–¶ UPARSE (min abundance = 2)
Clusters OTU at 97% using the UPARSE-OTU algorithm.
Number of OTUs: 11853
--------------------------------------------------------------------------------
â–¶ UNOISE3 (zero-radius OTUs, min abundance = 8)
Uses the UNOISE algorithm to perform denoising (error-correction) of amplicon reads.
Number of ZOTUs: 17653
--------------------------------------------------------------------------------
â–¶ Additional Clustering (-id 0.99, -centroids)
Clusters ZOTUs at different identity levels (i.e. 97%,98% and 99%).
Number of ZOTUs with additional clustering at 99%: 10709
Number of ZOTUs with additional clustering at 98%:  6676
Number of ZOTUs with additional clustering at 97%:  4669
--------------------------------------------------------------------------------
â–¶ Count Table (-strand plus, -id 0.97)
Generates OTU count tables by mapping reads to OTUs.
--------------------------------------------------------------------------------
â–¶ ZOTU Table Report
Creates a report/summary from an OTU table.
--------------------------------------------------------------------------------
â–¶ Octave plots
Octave plots with low-abundance (Z)OTUs and cross-talk information.
--------------------------------------------------------------------------------
â–¶ Alignment and Tree (MSA: muscle/3.8.1551; CLU: Clustering Methode)
Creates multiple sequences alignment and clustered tre files.
The trees will be very approximate in both cases.
--------------------------------------------------------------------------------
â–¶ Uncross
Detects and filters cross-talk (sample mis-assignment) in a OTU table using the UNCROSS algorithm.
--------------------------------------------------------------------------------
END: 02:15:16 06/08/2019
================================================================================



### ITS

================================================================================
Amplicon Size Variant With Additional Clustering
--------------------------------------------------------------------------------
Project: p327 newrequest_190730
Sample: zh_ch_us_ITS_trimmed_qfiltered_renamed
Locus: ITS
--------------------------------------------------------------------------------
UPARSE & UNOISE3
usearch v11.0.667_i86linux64
--------------------------------------------------------------------------------
START: 09:11:05 08/08/2019
--------------------------------------------------------------------------------
â–¶ Deduplicate Amplicons
Dereplicates amplicons to obtain unique amplicons.
Determin error rates of amplicon reads. 
--------------------------------------------------------------------------------
â–¶ UPARSE (min abundance = 2)
Clusters OTU at 97% using the UPARSE-OTU algorithm.
Number of OTUs: 7727
--------------------------------------------------------------------------------
â–¶ UNOISE3 (zero-radius OTUs, min abundance = 8)
Uses the UNOISE algorithm to perform denoising (error-correction) of amplicon reads.
Number of ZOTUs: 5198
--------------------------------------------------------------------------------
â–¶ Additional Clustering (-id 0.99, -centroids)
Clusters ZOTUs at different identity levels (i.e. 97%,98% and 99%).
Number of ZOTUs with additional clustering at 99%: 4246
Number of ZOTUs with additional clustering at 98%: 3737
Number of ZOTUs with additional clustering at 97%: 3414
--------------------------------------------------------------------------------
â–¶ Count Table (-strand plus, -id 0.97)
Generates OTU count tables by mapping reads to OTUs.
--------------------------------------------------------------------------------
â–¶ ZOTU Table Report
Creates a report/summary from an OTU table.
--------------------------------------------------------------------------------
â–¶ Octave plots
Octave plots with low-abundance (Z)OTUs and cross-talk information.
--------------------------------------------------------------------------------
â–¶ Alignment and Tree (MSA: muscle/3.8.1551; CLU: Clustering Methode)
Creates multiple sequences alignment and clustered tre files.
The trees will be very approximate in both cases.
--------------------------------------------------------------------------------
â–¶ Uncross
Detects and filters cross-talk (sample mis-assignment) in a OTU table using the UNCROSS algorithm.
--------------------------------------------------------------------------------
END_UNOISE: 08:54:26 09/08/2019
================================================================================










## =======================================================================================
## =======================================================================================
## H | Taxonomy (by Jean-Claude Walser)
## =======================================================================================
## =======================================================================================

### 16S

==========================================================================================
Taxonomic Assignment Predictions with SINTAX
------------------------------------------------------------------------------------------
Project: p327 newrequest_190730
Sample: zh_ch_us_trimmed_qfiltered_renamed
Locus: 16S
==========================================================================================
SINTAXv11.0.667_i86linux64
Database: 16S/SILVA_128_16S_utax.fa
Tax filter: 0.85
Workflow Summary:
 (a) Adjust DB according to amplicons (usearch_global; strand both; id 0.7)
 (b) Assign taxa (sintax; strand both; sintax_cutoff 0.85)
 (c) Reformat tax information for phyloseq import
 (d) Combine count table and taxa
START_Sintax: 10:50:57 10/08/2019
 Start_F1_trimDB: 10:50:57 10/08/2019
 End_F1_trimDB: 11:52:49 10/08/2019
 Start_F1_Unique_records: 11:52:49 10/08/2019
 End_F1_Unique_records: 11:53:13 10/08/2019
 Start_F1_FilterG: 11:53:13 10/08/2019
 End_F1_FilterG: 11:53:23 10/08/2019
 Start_F1_Build_UPD: 11:53:23 10/08/2019
 End_F1_Build_UPD: 11:54:15 10/08/2019
 Start_F2_for_OTU: 11:54:15 10/08/2019
 End_F2_for_OTU: 13:00:30 10/08/2019
 Start_F2_for_ZOTU: 13:00:30 10/08/2019
 End_F2_for_ZOTU: 16:25:13 10/08/2019
 Start_TaxSummary: 16:25:13 10/08/2019
 End_TaxSummary: 16:25:13 10/08/2019
 Start_ChimeraCheck: 16:25:13 10/08/2019
 End_ChimeraCheck: 16:52:59 10/08/2019
END_Sintax: 16:53:01 10/08/2019
==========================================================================================



### ITS

==========================================================================================
Taxonomic Assignment Predictions with SINTAX
------------------------------------------------------------------------------------------
Project: p327 newrequest_190730
Sample: zh_ch_us_ITS_trimmed_qfiltered_renamed
Locus: ITS
==========================================================================================
SINTAXv11.0.667_i86linux64
Database: ITS/UNITE_UTAX_V7.2_10.10.2017.fasta
Tax filter: 0.85
Workflow Summary:
 (a) Adjust DB according to amplicons (usearch_global; strand both; id 0.7)
 (b) Assign taxa (sintax; strand both; sintax_cutoff 0.85)
 (c) Reformat tax information for phyloseq import
 (d) Combine count table and taxa
START_Sintax: 10:12:45 19/08/2019
 Start_F1_trimDB: 10:12:45 19/08/2019
 End_F1_trimDB: 10:25:15 19/08/2019
 Start_F1_Unique_records: 10:25:15 19/08/2019
 End_F1_Unique_records: 10:25:16 19/08/2019
 Start_F1_Build_UPD: 10:25:16 19/08/2019
 End_F1_Build_UPD: 10:25:18 19/08/2019
 Start_F2_for_OTU: 10:25:18 19/08/2019
 End_F2_for_OTU: 10:25:18 19/08/2019
 Start_F2_for_ZOTU: 10:25:18 19/08/2019
 End_F2_for_ZOTU: 10:25:18 19/08/2019
 Start_TaxSummary: 10:25:18 19/08/2019
 End_TaxSummary: 10:25:19 19/08/2019
 Start_ChimeraCheck: 10:25:19 19/08/2019
 End_ChimeraCheck: 10:30:53 19/08/2019
END_Sintax: 10:30:54 19/08/2019
==========================================================================================