#!/bin/perl
use warnings;
use strict;

unless (@ARGV == 4) {
  print "usage perl GBS_fastq_Demultiplexer_vX.pl barcodes.txt R1.fastq R2.fastq thenameIwantStuckonEveryfile\n";
  die;
}

my $bar = $ARGV[0]; #All barcodes, GBS_2enzyme_barcodes.txt
my $fastq_1 = $ARGV[1];
my $fastq_2 = $ARGV[2];
my $out = $ARGV[3];
my %ph;
#make true to print R1 and R2 to separate files
my $print_mates = 1;
my $read1_enzyme = "TGCAG"; #Enzyme 1 cut site after cutting
my $read2_enzyme = "CGG"; #Enzyme 2 cut site after cutting
#my $print_mates;
# get the barcodes

my %bar;
my %bar_permutations;
my @bases = qw(A T C G N);
open IN, $bar;
while (<IN>){
  chomp;
  my @a = split (/\t/,$_);
  my $direction = $a[0];
  my $n = $a[1];
  my $barcode = $a[2];
  $bar{$direction}{$n}=$barcode;
  #Make all possible permutations of barcodes:
  foreach my $i (0..length($barcode)-1){
    my @parts = split(//,$barcode);
    foreach my $base (@bases){
      my $permutation;
      foreach my $j (0..length($barcode)-1){
        if ($j eq $i){
          $permutation .= $base;
        }else{
          $permutation .= $parts[$j];
        }
      }
      $bar_permutations{$direction}{$n}{$permutation} = $n;
    }
  }
  my $filenameR1 = "$out"."_R1.fastq";
  my $filenameR2 = "$out"."_R2.fastq";
  if (-e $filenameR1) {
    system ("rm $filenameR1");
  }
  if (-e $filenameR2) {
    system ("rm $filenameR2");
  }
}
close IN;
unless($fastq_1 =~ /\.gz$/){
  print "Input fastq files should be gzipped\n";
  exit;
}
open (FAST1, "gunzip -c $fastq_1 |");
open (FAST2, "gunzip -c $fastq_2 |");
my $reads_removed = 0;
my $total_reads = 0;
my $c;
my $line_tracker;
my %read_info;
while (my $seq1 = <FAST1>){
  my $seq2 = (<FAST2>);
  chomp $seq1;
  chomp $seq2;
  $c++;
  $line_tracker++;
  if ($line_tracker == 1){
    $read_info{"R1"}{"header"} = $seq1;
    $read_info{"R2"}{"header"} = $seq2;
  }
  if ($line_tracker == 2){
    $read_info{"R1"}{"seq"} = $seq1;
    $read_info{"R2"}{"seq"} = $seq2;
  }
  if ($line_tracker == 4){
    $line_tracker = '';
    $total_reads++;
    $read_info{"R1"}{"qual"} = $seq1;
    $read_info{"R2"}{"qual"} = $seq2;
    
    my $read1 = $read_info{"R1"}{"seq"};
    my $qual1 = $read_info{"R1"}{"qual"};
    my $read2 = $read_info{"R2"}{"seq"} ;
    my $qual2 = $read_info{"R2"}{"qual"};
    my %bar1_number;
    my %bar2_number;
    my $bar_number;
    my $bc1seq;
    my $bc2seq;
    my $bc1seq_real; #The observed barcode sequenced include possible errors
    my $bc2seq_real;
    my $singlebarcode;
    my $found_barcode;
    foreach my $i (4..9){ 
      my $re_site1 = substr($read1,($i),length($read1_enzyme));
      if ($re_site1 eq $read1_enzyme){
        my $bc1 = substr($read1,0,$i);
        foreach my $n (1..192){
          if ($bar_permutations{"F"}{$n}){
            if ($bar_permutations{"F"}{$n}{$bc1}){
              $found_barcode++;
	      $reads_removed++;
	      my $preview = substr($read1,0,15);
#	      print STDERR "Found F$n:$bc1 in $preview\n";
              goto MOVEON;
            }
          }
        }
      }
      my $re_site2 = substr($read2,($i),length($read2_enzyme));
      if ($re_site2 eq $read2_enzyme){
        my $bc2 = substr($read2,0,$i);
        foreach my $n (1..192){
          if ($bar_permutations{"R"}{$n}){
            if ($bar_permutations{"R"}{$n}{$bc2}){
              $found_barcode++;
	      $reads_removed++;
	      my $preview = substr($read2,0,15);
#	      print STDERR "Found R$n:$bc2 in $preview\n";
              goto MOVEON;
            }
          }
        }
      }
    }
    my $tmp = $read_info{"R1"}{"header"}."\n$read1\n+\n$qual1\n";
    push(@{$ph{$out."_R1.fastq"}},$tmp);
    $tmp = $read_info{"R2"}{"header"}."\n$read2\n+\n$qual2\n";
    push(@{$ph{$out."_R2.fastq"}},$tmp);
    MOVEON:
    if ($c==1000000) {
      foreach my $file (keys %ph){
        open OUT, ">>$file";
        foreach my $read (@{$ph{$file}}){
          print OUT "$read";
        }
      }
      %ph = ();
      $c = 0;
    }
  }
}
foreach my $file (keys %ph){
    open OUT, ">>$file";
    foreach my $read (@{$ph{$file}}){
        print OUT "$read";
    }
}
    
my $percent_removed = $reads_removed/$total_reads;
my $printed_reads = $total_reads - $reads_removed;
print STDERR "$out\t$total_reads total reads\n";
print STDERR "$out\t$printed_reads kept reads\n";
print STDERR "$out\t$reads_removed reads removed\n". 
print STDERR "$out\t$percent_removed% removed\n";
