MIDORI - MiFish-U
In-silico PCR
- Primer: MiFish-U
- Refernce: MIDORI based and fish related fragments
for E in {0..3}
do
# PCR (exp size: 172nt without primer)
${u} -search_pcr MIDORI_FishHits.fa \
-db Primer_MiFish.fa -strand both \
-maxdiffs ${E} \
-minamp 75 \
-maxamp 275 \
-pcrout MIDORI_MiFish_e${E}.hits \
-ampout MIDORI_MiFish_e${E}.fa
done
PCR Fragment-Length Distribution:
# N(error0) = 248
# 210 ************************************************************ 178
# 220 ************************ 70
# N(error1) = 4,836
# 210 ************************************************************ 2,633
# 220 ************************************************* 2,163
# 230 * 38
# 240 0
# 250 1
# 260 1
# N(error2) = 5,759
# 210 ************************************************************ 3,162
# 220 *********************************************** 2,462
# 230 *** 133
# 240 0
# 250 1
# 260 1
# N(error3) = 6,349
# 210 ************************************************************ 3,347
# 220 ************************************************** 2,801
# 230 **** 199
# 240 0
# 250 1
# 260 1
# Note: error == mismatch
Problematic Hits
We remove PCR hits with primer mismatches at the last 2 positions at the 3`-end. This choice is based for reproducibility with Thang et al. (2020).
for E in {0..3}
do
echo "Mis-Match: ${E}"
awk -F"\t" '{if($5 == "MiFish-U-F" && $8 == "MiFish-U-R" && substr($7,length($7)-1,length($7)) !~ "[ATCG]" && substr($10,length($10)-1,length($10)) !~ "[ATCG]") print ">"$1"\n"$12}' MIDORI_MiFish_e${E}.hits > MIDORI_MiFish_e${E}_clean.fa
awk -F"\t" '{if($5 == "MiFish-U-F" && $8 == "MiFish-U-R" && (substr($7,length($7)-1,length($7)) ~ "[ATCG]" || substr($10,length($10)-1,length($10)) ~ "[ATCG]")) print ">"$1"\n"$12}' MIDORI_MiFish_e${E}.hits > MIDORI_MiFish_e${E}_out.fa
done
Results (sequence counts):
## Clean Hits
cfa MIDORI_MiFish_e*_clean.fa
# MIDORI_MiFish_e0_clean.fa: 248
# MIDORI_MiFish_e1_clean.fa: 4,833
# MIDORI_MiFish_e2_clean.fa: 5,754
# MIDORI_MiFish_e3_clean.fa: 6,340
## Remove Species
cfa MIDORI_MiFish_e*_out.fa
# MIDORI_MiFish_e0_out.fa: 0
# MIDORI_MiFish_e1_out.fa: 3
# MIDORI_MiFish_e2_out.fa: 5
# MIDORI_MiFish_e3_out.fa: 9
## Removed Records (mismatches at the 3`-end):
* Species: Acrocheilus alutaceus (ID:67537)
* Species: Fonchiiloricaria nanodon (ID:912662)
* Species: Gobiomorus dormitor (ID:308076)
* Species: Gymnocypris potanini (ID:263516)
* Species: Harttiella lucifer (ID:1137733)
* Species: Kyphosus bigibbus (ID:990599)
* Species: Siniperca knerii (ID:214812)
* Species: Siniperca scherzeri (ID:228252)
* Species: Tetraodon lineatus (ID:1220758)
Summary
Step-by-step
for E in {0..3}
do
# Class
awk -F",c:" '{if($1 ~ />/) print $2}' MIDORI_MiFish_e${E}_clean.fa |\
awk -F",|_" '{if(length($1)>0) print $1" (TaxID:"$2")"}' |\
sort -u > MIDORI_MiFish_e${E}_Class.txt
# Order
awk -F",o:" '{if($1 ~ />/) print $2}' MIDORI_MiFish_e${E}_clean.fa |\
awk -F",|_" '{if(length($1)>0) print $1" (TaxID:"$2")"}' |\
sort -u > MIDORI_MiFish_e${E}_Order.txt
# Family
awk -F",f:" '{if($1 ~ />/) print $2}' MIDORI_MiFish_e${E}_clean.fa |\
awk -F",|_" '{if(length($1)>0) print $1" (TaxID:"$2")"}' |\
sort -u > MIDORI_MiFish_e${E}_Family.txt
# Genus
awk -F",g:" '{if($1 ~ />/) print $2}' MIDORI_MiFish_e${E}_clean.fa |\
awk -F",|_" '{if(length($1)>0) print $1" (TaxID:"$2")"}' |\
sort -u > MIDORI_MiFish_e${E}_Genus.txt
done
./PrintSummary.sh MIDORI_MiFish # Create Simple Summary Report including WebLogo