From 25d730aa80ee3ba8b44e4b05a62ac387102cfcb4 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 10 Aug 2021 15:47:22 +0200
Subject: [PATCH 01/51] Add script for the preprocessing step

---
 bin/extractReads.pl | 487 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 487 insertions(+)
 create mode 100644 bin/extractReads.pl

diff --git a/bin/extractReads.pl b/bin/extractReads.pl
new file mode 100644
index 0000000..2328434
--- /dev/null
+++ b/bin/extractReads.pl
@@ -0,0 +1,487 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ extractReads.pl
+ 
+=head1 DESCRIPTION
+
+ Initailisation du pipeline wf-Illumina-nf
+ Decoupage de la samplesheet
+ Creation du run dans NGL-Bi
+ Parametrage et lancement des analyses qualite via wf-Illumina-nf/main.nf
+ 
+=head1 SYNOPSIS
+
+ extractReads.pl -h | |-sequencer|s type_sequencer] 2>> /work/sbsuser/Logs/cronMACHINE.txt
+
+=head1 OPTIONS
+
+ -sequencer|s : Type de sequenceur (MiSeq ou NovaSeq) -> Obligatoire
+ -test|t : Activer le mode test -> Facultatif
+ -mailTest|m : Preciser l'adresse mail a laquelle envoyer les messages de log -> obligatoire si test
+ -samplesheetDemux|i : i comme IEM pour prÃ©ciser la samplesheet Ã© prendre en compte -> Facultatif
+ -jFlow|j : pour prÃ©ciser la feuille jflow Ã© prendre en compte  -> Facultatif
+ 
+=head1 EXEMPLES
+
+ perl extractReads.pl -s MiSeq
+ perl extractReads.pl -s MiSeq -t -m hermione.granger@poudlard.uk
+
+ 
+=head1 DEPENDENCIES
+
+ - Web service permettant la recuperation des adresses mails a partir de l'id 
+
+=head1 AUTHOR
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use utf8;
+use Log::Log4perl ();
+use Log::Log4perl qw(:easy);#FATAL ERROR WARN INFO DEBUG TRACE
+#use File::Util;
+use File::chdir;
+use File::Copy "cp";
+use File::Copy "move";
+use Cwd 'abs_path';
+
+
+
+
+###################################################################
+#
+#						MAIN
+#
+###################################################################
+MAIN:
+{
+	###############################################################
+	#						INITIALISATION
+	###############################################################	
+	
+	# Initialisation du log
+	Log::Log4perl -> easy_init( { 	level    => $TRACE,
+                            		utf8     => 1,
+                            		layout   => '[%d][%p> extractReads.pl:L%L %M] %m%n' } );
+	my $logger = Log::Log4perl -> get_logger();
+
+	# RÃ©cupÃ©ration des options
+	my $help = 0 ;
+	my $sequencer = "";
+	my $demuxType_int;
+	my $demuxType;
+	my $file_samplesheet = "";
+	my $file_jflow = "";
+	my $arg_timestamp = ""; # on supprime
+	my $arg_jobid = "";	# on supprime
+	my $mailTEST = "";
+	my $checkTest = "";
+
+	GetOptions ('help|h' => \$help, 
+				'sequencer|s=s' => \$sequencer,
+				'samplesheetDemux|i:s'=> \$file_samplesheet, # i forIEM...
+				'jFlow|j:s'=> \$file_jflow,
+				'timestamp:i'=>\$arg_timestamp,
+				'demuxJobid:s'=>\$arg_jobid,
+				'mailTesteur|m:s' => \$mailTEST,
+				'isTest|t' => \$checkTest,
+	);
+	
+	if($help){
+		pod2usage(-verbose => 1 );
+	}
+
+	print STDERR "\n";
+	print STDERR "#    #    #    #    #    #    #    #    #    #\n";
+	print STDERR "#    #    extractReads.pl is happening  #    #\n";
+	print STDERR "#    #    #    #    #    #    #    #    #    #\n";
+	print STDERR "\n";
+		
+	$logger -> info("VÃ©rification des arguments");
+
+	# Verification du sÃ©quenceur
+	$sequencer ne ""? $logger -> info("\tSequenceur = " . $sequencer) : $logger -> logdie("\tPas de sÃ©quenceur prÃ©cisÃ©...");
+	unless ($sequencer eq "MiSeq" or $sequencer eq "NovaSeq"){
+		$logger -> logdie("Erreur dans le nom du sequenceur : ".$sequencer." n'existe pas");
+	}
+
+	# vÃ©rification de la SS
+	$file_samplesheet ne "" ? $logger -> info("\tSamplesheet fournie = " . $file_samplesheet ." !") : $logger -> info("\tPas de samplesheet fournie!");
+	
+	# Gestion du test et/ou des mails
+	$mailTEST ne ""? $logger -> info("\tmailTEST = " . $mailTEST) : $logger -> info("\tPas de mailTEST!");
+	$checkTest ne ""? $logger -> info("\tcheckTEST = " . $checkTest) : $logger -> info("\tPas en mode test!");
+	$checkTest = $checkTest ne ""? 1 : 0;
+	# Si on est en test, on veut une adresse mail!
+	$logger -> logdie("MODE TEST ACTIVE, MERCI DE DONNER UN MAIL AVEC L'OPTION -m MONMAIL\@MONSERVEUR") if( ($checkTest) && ($mailTEST eq "") );
+	my $raw_data="";
+	my $path_to_scripts="";
+	if ($checkTest) {
+		$raw_data = $sequencer eq "MiSeq"? "/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq" : "/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq";
+		$path_to_scripts=abs_path($0);
+	} else {
+		$raw_data="/$sequencer";
+		$path_to_scripts=abs_path($0);
+	}
+	$logger -> info("\tLes donnÃ©es brutes sont ici : $raw_data");
+	
+	# Configuration API NGL-Bi
+	my $ngl_api_base_prod = "/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/";
+	my $ngl_api_base_test = "/save/devcrgs/src/NGL_REST_Client/ngl-bi_client/IG/SystemeInteractionNGL-Bi/";
+	my $ngl_api_base = $checkTest? $ngl_api_base_test : $ngl_api_base_prod;
+	my $ngl_bi_scripts="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl";
+	$ENV{'APIPERL'}=$ngl_api_base;
+	$ENV{'CONFFILE'}=$ngl_api_base."conf/prod_illumina_qc.conf";
+	loadConfFile();
+	unshift @INC,  $ngl_api_base."Common_tools/src/perl/lib/";
+	unshift @INC,  $ngl_api_base."DB_tools/src/perl/lib/";
+	require illumina;
+	require json;
+	$logger -> info("Variables d'environnement pour NGL-Bi chargÃ©es depuis : ".$ngl_api_base);
+	# Initialisation des variables
+	my $runExistsInNGL = 0;
+	my $NGLBiRunCreatedFile = 'RunNGL-Bi.created';
+	my $NGLBiRunName = "";
+	my $NGLSQExperimentCode;
+	
+	# ParamÃ©trage gÃ©nÃ©ral
+	my $prefixLogFolder = "PipelineLogs_Lane";
+
+	
+	###############################################################
+	#					RECHERCHE SAMPLESHEET
+	###############################################################
+	## Recherche SS
+	### parcours des sous rÃ©pertoires de /$sequencer
+	my $regexpPSS = '^[0-9]{8}_.*_BULKDEMUX_.*csv$';
+	#my @run_directories = $f -> list_dir('/'.$sequencer => {dirs_only = 1, no_fsdots = 1}=; # ls 
+	my @run_directories = `ls $raw_data`; $? and $logger -> logdie("[Erreur] Impossible de rÃ©cupÃ©er la liste des dossiers de $raw_data}");
+	foreach my $dir (@run_directories){
+		chomp($dir);
+	    #my @RunInfo = ();
+	    my @RunInfo = split("_", $dir); # [$#dir]
+	    # Extraction des infos contenues dans le nom du rÃ©pertoire
+		my $runDate = $RunInfo[0];
+		my ($annee, $mois, $jour) = ($runDate =~ m/([0-9]{2})([0-9]{2})([0-9]{2})/);
+		my $sequencerID = $RunInfo[1];
+		my $barcodeFlowcell; # Sert Ã© l'unicitÃ© des noms des .fastq.gz
+		if ($RunInfo[3] =~ m/000000000-/){
+			my @FCBarcode = split('-', $RunInfo[3]);
+			$barcodeFlowcell = $FCBarcode[$#FCBarcode];
+		} else {
+			$barcodeFlowcell = $RunInfo[3];
+		}
+
+		# Recherche de la SS
+		$logger -> info("Recherche de SampleSheet dans $raw_data/$dir");
+		chdir "$raw_data/$dir" or $logger -> logdie("[Erreur] Impossible de se dÃ©placer dans $raw_data/$dir");
+		#$CWD = "$raw_data/$dir" or $logger -> logdie("[Erreur] Impossible de se dÃ©placer dans $raw_data/$dir");
+		my $preSampleSheet = "PreSampleSheet.csv";
+		my $lastPSS = `ls -t | egrep $regexpPSS | head -1`; $? and $logger -> logdie("[Erreur] Recup de la derniere BulkSS");
+		chomp($lastPSS);
+		if( $lastPSS ne ""){
+			$logger -> info("Check de PSS ".$lastPSS);
+			my $checkPSS = check_my_samplesheet($lastPSS, $preSampleSheet);
+			
+			###############################################################
+			#					INTEGRATION NGL-Bi
+			###############################################################
+			$NGLSQExperimentCode = getNGLSeqExperimentCode($preSampleSheet);
+			$runExistsInNGL = 1 if($NGLSQExperimentCode ne " -");
+			if ($runExistsInNGL){
+				if (! -e $NGLBiRunCreatedFile){
+					# INTEGRATION DU RUN A NGL-BI	#   #   #   #   #   #   #   #   #   #   #
+					$logger -> info("Pas de fichier $NGLBiRunCreatedFile dans $raw_data/$dir -> Le run NGL-Bi semble ne pas exister ");
+					my $commandNGLBiRun = "perl $ngl_bi_scripts/createNGL-BiRun.pl --sequencer $sequencer --NGLSqExperimentCode $NGLSQExperimentCode";
+					$logger -> info("\tCreation du run avec : ".$commandNGLBiRun);
+					my $result_commandNGLBiRun = `$commandNGLBiRun 2>&1`; 
+						$? and $logger -> logdie("[Erreur]Lancement de createNGL-BiRun.pl\n".$result_commandNGLBiRun);
+					$logger -> info("\n".$result_commandNGLBiRun);
+				}else{
+					$logger -> info("Le run existe dÃ©jÃ  dans NGL-Bi");
+				}
+			}else{
+				$logger -> info("\tRun en autonomie : n'existe pas dans NGL-SQ");
+				`touch $NGLBiRunCreatedFile`; $? and $logger -> logdie("[Erreur] Impossible de crÃ©er le fichier");
+			}
+		} else {
+			$logger -> logdie("Aucune SampleSheet trouvÃ©e dans $raw_data/$dir");
+		}
+	
+		# Recherche du fichier de fin de run
+		my $file2checkForEndOfRun = $sequencerID eq "M07093" ? "RTAComplete.txt" : "CopyComplete.txt";
+		if (! -e $file2checkForEndOfRun){
+			$logger -> info("Pas de fichier de fin de run -> sortie du script!");
+			exit;
+		} else {
+			# DÃ©tection du nombre de lane
+			$logger -> info("DÃ©tection du nombre de headers") ;
+			my $nbHeader = `grep "Header" $preSampleSheet | wc -l` ;  $? and $logger -> logdie("Comptage de [Header] en echec");
+			chomp($nbHeader);
+			$logger -> info("\t$preSampleSheet -> Nb de [header] = ".$nbHeader );
+			
+			# CrÃ©ation des rÃ©pertoires de logs par lane
+			$logger -> info("DÃ©tection des rÃ©pertoires de log");
+			foreach my $count (1..$nbHeader){
+				my $logFolder = $prefixLogFolder.$count;
+				if (! -d "$raw_data/$dir/$logFolder"){ # Si le rep n'existe pas, alors on le crÃ©e
+					$logger -> info("\tCrÃ©ation du rÃ©pertoire".$logFolder." + chmod 770" );
+					mkdir "$raw_data/$dir/$logFolder" or $logger -> logdie("Impossible de crÃ©er le rÃ©pertoire ".$logFolder );
+					chmod 0770, "$raw_data/$dir/$logFolder" or $logger -> logdie($!);
+				} else {
+					$logger -> info("\tLe rÃ©pertoire ".$logFolder." existe dÃ©jÃ©");
+				}
+			}
+			
+			###############################################################
+			#					DECOUPAGE SAMPLESHEET
+			###############################################################
+			$logger -> info("DÃ©coupe de ".$preSampleSheet) ;
+			my $laneExtraite = '';
+			my $counterIEMFiles = 0; #counter to store the number of IEM files found in the bulk file
+			my $IEMFileContent = '';
+			my $IEMFilePrefixe = $preSampleSheet;
+			$IEMFilePrefixe =~ s/BULKDEMUX/IEM/g; # Replace Bulk by IEM
+			$IEMFilePrefixe =~ s/.csv//g; # Supprime le .csv de la fin pour faciliter l'ajout du compteur de lanes
+			$IEMFilePrefixe .= '_Lane';
+		
+			open my $handle, '<', $preSampleSheet;
+			chomp(my @lines = <$handle>);
+			close $handle;
+			
+			foreach my $line (@lines) {
+				if ($line eq '[Header]'){
+					if($counterIEMFiles > 0){ # a 1st line  was already found and $IEMFileContent contains a single IEM file content
+						# ecriture du fichier
+						my $subSampleSheet = "$raw_data/$dir/${prefixLogFolder}${laneExtraite}/${IEMFilePrefixe}_IEM_Lane${laneExtraite}.csv";
+						print2file($IEMFileContent, $subSampleSheet);
+					}
+					$IEMFileContent = '';
+					$counterIEMFiles++;
+				}
+				$IEMFileContent .= $line."\n";
+				($laneExtraite) = $line =~ m/^(\d),/;
+				$laneExtraite = '1' if ($sequencer eq 'MiSeq' );
+			}
+			# ecriture du dernier fichier
+			my $subSampleSheet = "$raw_data/$dir/${prefixLogFolder}${laneExtraite}/${IEMFilePrefixe}_IEM_Lane${laneExtraite}.csv";
+			print2file($IEMFileContent, $subSampleSheet);
+		
+			# DÃ©sactivation de la SampleSheet
+			$logger -> info("DÃ©sactivation de la SampleSheet.");
+			move($lastPSS, $lastPSS.".old") or $logger -> logdie("Le renommage de ".$lastPSS." en .old est en erreur ".$!);
+			
+			###############################################################
+			#					INTEROP DANS NEXTCLOUD
+			###############################################################
+			if (!$checkTest){
+				# RÃ©cupÃ©ration de l'annÃ©e pour le rÃ©pertoire de destination
+				my $year = "20".$annee;
+				
+				# Ecriture de la commande de synchronisation
+				my $aws_source = "$raw_data/$dir/";
+				my $aws_target = "s3://partage/externes/Illumina-SAV/$sequencer/$year/$dir"; #X:\partage\externes\Illumina-SAV\NovaSeq		[$#dir]
+				my $aws_prefixcmd = "aws s3 --endpoint-url https://s3r-tls.stockage.inra.fr";
+				
+				# Ecriture du script de lancement de synchronisation
+				my $aws_script_file = "scriptAWS_$sequencerID.sbatch";
+				my $aws_script = "#!/bin/sh \n";
+				$aws_script .= "#SBATCH -p wflowq\n#SBATCH -t 20\n#SBATCH --mem-per-cpu=200M\n";
+				$aws_script .= "#SBATCH -J $aws_script_file\n#SBATCH -e %x.e%j\n#SBATCH -o %x.o%j\n\n";
+				$aws_script .= "module load system/Python-3.6.7_shared\n";
+				$aws_script .= "$aws_prefixcmd sync $aws_source $aws_target ";
+				$aws_script .= "--exclude \"*\" --include \"[Rr]un[A-Za-z]*.xml\" --include \"InterOp/[A-Za-z]*.bin\" ";
+				$aws_script .= "--exclude \"InterOp/C[0-9]*.1*\"\n";
+				print2file($aws_script, "$aws_source/$aws_script_file");
+				
+				
+				# Lancement du script
+				my $sleepLastingForAWS = 300;
+				my $aws_launchcmd = "sbatch $aws_script_file";
+				my $aws_joboutput = `$aws_launchcmd`; $? and $logger -> logdie("Commande $aws_launchcmd impossible : ".$!);
+				my ($aws_jobID) = $aws_joboutput =~ m/Submitted batch job (\d+)/;
+				chomp($aws_jobID);
+				$logger -> info("\tDossier " . $aws_source." -> JobID : ".$aws_jobID."\nCommande exÃ©cutÃ©e : " . $aws_launchcmd );
+				
+				# Attente de la fin du job
+				my $boolOver = is_my_jobID_over($aws_jobID);
+				while (!$boolOver){
+					$boolOver = is_my_jobID_over($aws_jobID);
+					if (!$boolOver){
+						$logger -> info("\tEn attente de la fin de $aws_jobID, Ã© dans ".($sleepLastingForAWS/60)." minutes!");
+						sleep($sleepLastingForAWS); # toutes les 5 minutes (*60 = 300)
+					}
+				}
+				
+				# VÃ©rification qu'on est bon, sinon envoi d'un mail pour prÃ©venir
+				if (-e $aws_script_file.".e".$aws_jobID){
+					$logger -> info("\tLe fichier d'erreur pour AWS existe bien!");
+					if (! -z $aws_script_file.".e".$aws_jobID){
+						my $testObjectPrefixe = $checkTest? "[TEST]" : "";
+						$logger -> error("\tLe fichier d'erreur pour AWS n'est pas vide, il a dÃ© se passer quelque chose de louche, Ã© investiguer!" );
+						my $mailRecipients = $checkTest? $mailTEST :'get-plage.bioinfo@genotoul.fr';
+						my $mailContent = "Une erreur est survenue lors de la copie des fichiers SAV vers CEPH avec la commande contenue dans\n${aws_source}${aws_script_file}.\n\n";
+						$mailContent .= "Le fichier d'erreur contient \n".`cat $aws_script_file.e$aws_jobID`;
+						send_and_check_my_email($mailContent, "${$testObjectPrefixe}Erreur sauvegarde SAV sur CEPH", $mailRecipients, $mailRecipients);
+					}else{
+						$logger -> info("\tLe fichier d'erreur pour AWS est vide, j'aime quand un plan se dÃ©roule sans accroc!");
+					}
+				}
+			} else { $logger -> info("Nous sommes en mode test : pas besoin de sauvegarder InterOp"); }
+
+			###############################################################
+			#					LANCEMENT DE NEXTFLOW
+			###############################################################
+			# crÃ©ation du dossier dans /work, se dÃ©placer dedans et lancer nextflow
+					
+		} # Fichier de fin de run trouvÃ©
+	} # fin parcours des rÃ©pertoires
+}
+
+###################################################################
+#
+#						FONCTIONS
+#
+###################################################################
+
+sub print2file {
+	my ($content, $file2write) = @_;
+	my $logger = Log::Log4perl -> get_logger('print2file');
+	$logger -> info("\tEcriture du fichier $file2write");
+	open(my $fh, '>', $file2write) or exit 1;
+	print $fh $content;
+	close $fh;
+}
+
+sub check_my_samplesheet{
+	my ($file2check, $file2write) = @_;
+	my $logger = Log::Log4perl -> get_logger('check_my_samplesheet');
+
+	my $isfile2checkwindows;
+	my $isfile2checklinux;
+	
+	$logger -> info("Etude de $file2check");
+	if (-s $file2check){ # $file2check exists and has a non zero size
+		$logger -> info("VÃ©rification des fins de ligne");
+		$isfile2checkwindows = is_my_file_Windows($file2check);
+		$logger -> info("Sortie de is_my_file_Windows : " . $isfile2checkwindows);
+		if ($isfile2checkwindows){
+			$logger -> warn($file2check." a des fins de ligne Windows : on le convertit!");
+			convert_file_2_linux($file2check);
+			my $isfile2checkwindows2 = is_my_file_Windows($file2check);
+			if ($isfile2checkwindows2){
+				$logger -> logdie("La conversion dos2linux n'a pas fonctionnÃ©!");
+			} else {
+				$logger -> info("La conversion dos2linux a fonctionnÃ©!");
+			}
+		}else {
+			$logger -> info("Donc fins de ligne de " . $file2check . " : Linux");
+		}
+		
+		$logger -> info("Etude de $file2write");
+		if(-s $file2write){# $file2write a une taille diffÃ©rente de 0 byte
+			if( $file2write eq $file2check ){#Fichier correct
+				$logger -> info($file2write." est dÃ©jÃ© l'Ã©quivalent de ".$file2check.", on garde!");
+			}else{#Renommer le nouveau fichier CSV $file2write et l'ancien OLD_$file2write
+				chomp($file2check);
+				$logger -> info("Copie de ".$file2write." en OLD_$file2write");
+				cp($file2write,"OLD_$file2write") or $logger -> logdie("Impossible de copier le fichier ".$file2write);
+				$logger -> info("Copie de ".$file2check." en ".$file2write);
+				cp($file2check,$file2write)or $logger -> logdie("Impossible de copier le fichier ".$file2check);
+			}
+		}else{#Si $file2write est vide, on en fait une copie avec le nom correct
+			chomp($file2check);
+			$logger -> info("Copie de ".$file2check." en ".$file2write);
+			cp($file2check,$file2write)or $logger -> logdie("Impossible de copier le fichier ".$file2check);
+		}
+		return 1;
+	}else{
+		$logger -> info("Il n'y a pas de SampleSheet ".$file2check);
+		return 0;
+	}
+}
+
+# RÃ©cupere le code d'expÃ©rience NGL-SQ dans une samplesheet
+sub getNGLSeqExperimentCode{
+	my ($samplesheet) = @_;
+	my $logger = Log::Log4perl -> get_logger('getNGLSeqExperimentCode');
+	my $NGLSQExperimentCode = "";
+	my $experimentName_ligne = `grep "Experiment Name" $samplesheet | head -1` ;  $? and $logger -> logdie("RÃ©cupÃ©ration de 'Experiment Name' dans '".$samplesheet."' en echec" );
+	($NGLSQExperimentCode) = $experimentName_ligne =~ m/Experiment Name,(.+)$/;
+	$logger -> info("NGLSQExperimentCode : ".$NGLSQExperimentCode);
+	$logger -> info("L'expÃ©rience ne sera pas rentrÃ©e dans NGL-Bi car pas de correspondance dans NGL-SQ") if($NGLSQExperimentCode eq '-');
+	$logger -> logdie("Echec de la rÃ©cup du code d'expÃ©rience") if($NGLSQExperimentCode eq "");  
+	return $NGLSQExperimentCode;
+}
+
+# Charge les variables d'environnement du fichier de configuration NGL
+sub loadConfFile{
+	my $logger = Log::Log4perl -> get_logger('loadConfFile');
+	unless ($ENV{CONFFILE}) {
+		$logger -> logdie("$0: Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
+	};
+	my $dbconf_file = $ENV{CONFFILE};
+	unless (-f $dbconf_file) {
+		$logger -> logdie("$0: Database configuration file not exist: $dbconf_file. It's necessary for continue");
+	};
+	open my $handle, '<', $dbconf_file;
+	chomp( my @lines = <$handle> );
+	close $handle;
+	foreach my $line (@lines) {
+		$line =~ s/#.*//o;
+		unless ($line) { next; }
+		if ($line =~ /(.*)=(.*)/o) {
+			my $key = $1;
+			my $value = $2;
+			$key =~ s/^\s*//o;
+			$key =~ s/\s*$//o;
+			$value =~ s/^\s*//o;
+			$value =~ s/\s*$//o;
+			$ENV{$key} = $value;
+		}else {
+			$logger -> logdie("$0: Can't load variable to database configuration file $dbconf_file in line: '$_'");
+		}
+	}
+}
+
+=head2 function is_my_file_Windows
+
+	Title		 : is_my_file_Windows
+	Usage		 : $boolean = is_my_file_Windows($file);
+	Prerequisite : None
+	Function	 : Retourne 0 si les fins de ligne du fichier sont linux, 1 si Windows
+	Returns	     : Nombre
+	Args		 : $file, string
+	Globals	     : none
+
+=cut
+
+sub is_my_file_Windows {
+	my ($file) = @_ ;
+	my $logger = Log::Log4perl -> get_logger('is_my_file_Windows');
+	$logger -> info("Fichier en entrÃ©e : " . $file);
+	my $fileOutput;
+	my $ismyfileWindows = 0;
+	
+	$fileOutput = `file $file`; $? and $logger -> logdie("[Erreur]Lancement de file");
+	chomp($fileOutput);
+	$logger -> info("Message de sortie : " . $fileOutput);
+	if ($fileOutput =~ /with CRLF.* line terminators/){
+		$logger -> info("Le fichier est Windows");
+		$ismyfileWindows = 1;
+	}
+	return $ismyfileWindows;	
+}
+
-- 
GitLab


From c6c4ab8a8fac90cc0c9e2b004865eb64a85a8c89 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 16:53:22 +0200
Subject: [PATCH 02/51] Remove readsets creation #4

---
 bin/extractInfo.pl | 396 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 396 insertions(+)
 create mode 100644 bin/extractInfo.pl

diff --git a/bin/extractInfo.pl b/bin/extractInfo.pl
new file mode 100644
index 0000000..bedf21b
--- /dev/null
+++ b/bin/extractInfo.pl
@@ -0,0 +1,396 @@
+#!/usr/bin/perl -w
+ 
+=head1 NAME
+
+ extractInfo.pl
+ 
+=head1 DESCRIPTION
+
+ RÃ©cupÃ¨re les informations de la SampleSheet et du RunInfo.xml pour Ã©crire le masque rÃ©cupÃ©rÃ© par extractReads.pl
+ 
+=head1 SYNOPSIS
+
+ extractInfo.pl -h | -s SampleSheet.csv -r RunInfo.xml
+
+=head1 OPTIONS
+
+ -s : fichier SampleSheet.csv - input
+ -r : fichier RunInfo.xml - input
+
+=head1 VERSION
+ 
+=head1 DEPENDENCIES
+
+=head1 AUTHOR
+
+ Plateforme genomique Toulouse (get-plage.ngs@genotoul.fr)
+
+=cut
+#############################################################################################################################
+#
+#               LIBRAIRIES
+#
+#############################################################################################################################
+use strict;
+use Getopt::Long;
+use File::Copy "cp";
+use File::Basename;
+use SOAP::Lite;
+use List::MoreUtils qw(indexes);
+use Log::Log4perl ();
+use Log::Log4perl qw(:easy);#FATAL ERROR WARN INFO DEBUG TRACE
+use Pod::Usage;
+use Switch;
+use utf8;
+#local $/ = "\r\n";
+
+#############################################################################################################################
+#
+#               EXEMPLE DE RUNINFO.XML
+#
+#############################################################################################################################
+
+#MiSeq
+#    <Reads>
+#      <Read NumCycles="151" Number="1" IsIndexedRead="N" />
+#      <Read NumCycles="6" Number="2" IsIndexedRead="Y" />
+#      <Read NumCycles="151" Number="3" IsIndexedRead="N" />
+#    </Reads>
+
+#HiSeq3000 Run Simple + Dual index
+#    <Reads>
+#      <Read Number="1" NumCycles="151" IsIndexedRead="N" />
+#      <Read Number="2" NumCycles="8" IsIndexedRead="Y" />
+#      <Read Number="3" NumCycles="8" IsIndexedRead="Y" />
+#      <Read Number="4" NumCycles="151" IsIndexedRead="N" />
+#    </Reads>
+
+
+
+#############################################################################################################################
+#
+#               MAIN
+#
+#############################################################################################################################
+MAIN:
+{
+	# Initialisation du log
+	Log::Log4perl -> easy_init( { 	level    => $TRACE,
+                            		utf8     => 1,
+                            		layout   => '[%d][%p> extractInfo.pl:L%L %M] %m%n' } );
+	my $logger = Log::Log4perl -> get_logger();
+	$logger -> info("EntrÃ©e dans le programme");
+
+	# Parametre du programme
+	my $help = 0 ;
+	my $RunInfo;
+	my $SampleSheet;
+	
+	# Recuperation des options
+	GetOptions (    'help|h' => \$help,
+					'r=s' => \$RunInfo, #string
+					's:s' => \$SampleSheet); #string
+	if($help){
+		pod2usage(
+			-verbose => 99
+		);
+	}
+	
+	##################
+	# Programme 
+	##################
+	
+	my $SSformat;
+	my $checkIEM;
+	my $check10x;
+	my $config_file = "Run.conf";		# fichier d'output qui va etre pris comme input pour GenerateCasavaDir.pl pour les analyses standard.
+	#my $config10X_file = "Run_10X.conf";	# fichier d'output qui va etre pris comme input pour GenerateCasavaDir.pl pour les analyses 10X.
+	
+	if (-s $SampleSheet) {
+		$SSformat = check_my_SSFormat($SampleSheet);
+	}
+	$check10x = ($SSformat eq '10X') ? 1 : 0;
+	$checkIEM = ($SSformat eq 'IEM') ? 1 : 0;
+	
+	if( $checkIEM && $check10x){
+		$logger -> logdie("[Error] Le programme ne fonctionne pas quand on lui donne Illumina ET 10x.");
+	}
+	if( !$checkIEM && !$check10x){
+		$logger -> logdie("[Error] Le programme ne fonctionne pas sans samplesheet.");
+	}
+	$logger -> info("\tcheckIEM : ".$checkIEM." | check10x : ".$check10x);
+	
+	#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	#  Parsing du fichier RunInfo.xml
+	#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	$logger -> info("Analyse du fichier RunInfo.xml");
+	
+	# RÃ©cupÃ©ration de la taille des reads et d'index par le nombre de cycles
+	my $runInfo_lengthR1 = 0;
+	my $runInfo_lengthR2 = "";	
+	my $runInfo_lengthI1 = 0;
+	my $runInfo_lengthI2 = "";
+	
+	# Informations recuperees par capture de regex
+	my $versionRunInfo;
+	my $number = "";
+	my $numCycle = "";
+	my $isIndexed = "";
+	
+	# Configuration du run
+	my $runInfo_config = "single";			#dual|single|noindex
+	
+	open(F,"$RunInfo") or $logger -> logdie("[Erreur] Impossible d'ouvrir le fichier RunInfo.xml");
+	while(my $ligne =<F>){
+		chomp($ligne);		
+		# Recuperation de la version de RunInfo
+		#<RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2"> -> MiSeq
+		#<RunInfo Version="5"> -> Nova		
+		if( $ligne =~ /\<RunInfo / ){
+			($versionRunInfo) = $ligne =~ m/<RunInfo.* Version="(\d)">/;
+			$logger -> info("\tVersion du RunInfo : ".$versionRunInfo);
+			next;
+		}
+		
+		next if( $ligne !~ /\s*<Read /); # Analyse uniquement sur les lignes de read
+		if( $versionRunInfo eq "2"){
+			($numCycle, $number, $isIndexed) = $ligne =~ m/<Read NumCycles="(\d+)" Number="(\d)" IsIndexedRead="(Y|N)" \/\>/;
+		} elsif( $versionRunInfo eq "5"){
+			($number, $numCycle, $isIndexed) = $ligne =~ m/<Read Number="(\d)" NumCycles="(\d+)" IsIndexedRead="(Y|N)"\/\>/;
+		} else {
+			$logger -> logdie("[Erreur] Le numero de version de RunInfo.xml ne correspond Ã  rien de connu" );
+		}
+		$logger -> info("\t\tRÃ©sultat des captures : NumCycle ".$numCycle." | number ".$number." | IsIndexed ".$isIndexed);
+		
+		# Interpretation pour connaitre les longueurs des cycles
+		if ($isIndexed eq "N" && $number eq 1){ # Read 1
+			$runInfo_lengthR1 = $numCycle;
+		}
+		if ($isIndexed eq "N" && $number ne 1){ #Read 2
+			$runInfo_lengthR2 = $numCycle;
+		}
+		if ($isIndexed eq "Y" && $runInfo_lengthI1 eq 0){ #Index 1
+			$runInfo_lengthI1 = $numCycle;
+		}
+		elsif ($isIndexed eq "Y" && $runInfo_lengthI1 ne 0){ #Index 2
+			$runInfo_lengthI2 = $numCycle;
+			$runInfo_config = "dual";
+		}
+	}
+	close F;
+	
+	$logger -> logdie("Impossible de capter les infos de numCycle, number, isIndexed" ) if (($numCycle eq "") ||  ($number eq "") || ($isIndexed eq ""));
+	$runInfo_config = "noindex" if($runInfo_lengthI1 eq 0);
+	$logger -> info("\tConfig : ".$runInfo_config.
+					" | R1 = ". $runInfo_lengthR1 ." | R2 = ". $runInfo_lengthR2.
+					" | I1 = ". $runInfo_lengthI1 ." | I2 = ". $runInfo_lengthI2);
+	
+	#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	# Traitement de la samplesheet
+	#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	
+	# Parametrage  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	
+	my $lane_10x = "";
+	my $cmdOptions_10x = "";
+	
+	my $mask;
+	my $index1; my $index2; # Variables temporaires stockant l'info des colonnes index et index2 pour une lane donnÃ©e
+	my $lane; # Variable temporaire stockant le numÃ©ro de la lane Ã©tudiÃ©e
+	my %info_lane; #Tableau regroupant l'information de configuration des index par lane
+		
+	# Construction du dico %line_interpreter qui rassemble les diffÃ©rents formats de SS IEM possibles
+	my %line_interpreter; # MNL = mono lane | MTL = multi lane | SI = Single index | DI = Dual index
+	$line_interpreter{"MonoLane-SingleIndex"} = "Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description";
+	$line_interpreter{"MonoLane-DualIndex"} = "Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description";
+	$line_interpreter{"MultiLane-SingleIndex"} = "Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description";
+	$line_interpreter{"MultiLane-DualIndex"} = "Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description";
+	my $samplesheet_config = ""; # La config de la SS
+	my %indexHeaderSS_dict = (); # Dico qui associe clÃ©-colonne : valeur-index
+	
+	# Construction du dico %length_index qui associe la longueur d'un index 10X Ã  son prÃ©fixe
+	my %length_index;
+	$length_index{"SI-GA"}{"idx1"}=8; $length_index{"SI-GA"}{"idx2"}=0;
+	$length_index{"SI-NA"}{"idx1"}=8; $length_index{"SI-NA"}{"idx2"}=0;
+	$length_index{"SI-TT"}{"idx1"}=10; $length_index{"SI-TT"}{"idx2"}=10;
+	
+	# Parcours de la Samplesheet  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	$logger -> info("Analyse du fichier ".$SampleSheet);
+	my $headerline_present = 0;
+	open(S,"$SampleSheet") or $logger -> logdie("[Erreur] Impossible d'ouvrir la SampleSheet $SampleSheet");
+	LINE: while(my $ligne = <S>){
+		chomp($ligne);
+		next LINE if not ($ligne =~ /.*,.*,.*/); # Sauter les lignes du dÃ©but qui ont 0 ou 1 virgule
+		if($ligne =~ /.*Sample_ID,.*/){
+			$headerline_present = 1;
+			# DÃ©termination du mode de la Samplesheet
+			# (Tout est dans ce bloc pour Ãªtre exÃ©cutÃ© une seule fois dans la boucle)
+			foreach my $SS_config (keys %line_interpreter){
+				$samplesheet_config = $SS_config if ($line_interpreter{$SS_config} eq $ligne);
+			}
+			$logger -> logdie("[Erreur] Aucune config ne correspond Ã  la SS :(") if( $samplesheet_config eq "" );
+			$logger -> info("\tSS en config $samplesheet_config");
+			
+			# Construction d'un tableau permettant de construire le dico qui associe le numÃ©ro de la colonne au nom de la colonne
+			my @headerSS_tab = split(/,/, $line_interpreter{$samplesheet_config});
+			foreach my $column_name (@headerSS_tab){
+				$indexHeaderSS_dict{$column_name} = indexes { $_ eq $column_name } @headerSS_tab;
+			}			
+			next LINE;
+		}
+		$logger -> logdie("[Erreur] La samplesheet $SampleSheet ne contient pas de header") if( !$headerline_present);
+		next LINE if($info_lane{$lane});  # On considÃ¨re que tous les Ã©chantillons d'une mÃªme lane sont indexÃ©s pareils
+		
+		my @list = split(/,/,$ligne);		
+		$index1 = $list[$indexHeaderSS_dict{'index'}]; # enregistre la sÃ©quence de l'index1 ou SI-GA...
+		$index2 = ($samplesheet_config =~ /DualIndex/) ? $list[$indexHeaderSS_dict{'index2'}] : "" ;
+		$lane = ($samplesheet_config =~ /MultiLane/) ? $list[$indexHeaderSS_dict{'Lane'}] : '1' ;
+		
+		# Contrairement Ã  illumina qui ont la sÃ©quence notÃ©e, les index 10X ont le nom de l'index (sauf les customs!!)
+		if($check10x){
+			$logger -> info("Gestion du 10X");
+			$lane_10x .= $lane.",";
+			my $prefixe_index = substr($index1, 0, 5);
+			if($list[$indexHeaderSS_dict{'I7_Index_ID'}] !~ "Custom_"){
+				$index1 = ("X"x$length_index{$prefixe_index}{idx1}); # dico contenant les longueurs des index 10x pour filouter
+				$index2 = ("X"x$length_index{$prefixe_index}{idx2}) if($samplesheet_config =~ /DualIndex/);
+			}
+		}			
+		# Bilan pour la lane Ã©tudiÃ©e
+		$logger -> info("\tSur la lane ".$lane." -> Index1 : ".$index1. " | Index2 : ".$index2);
+		
+		# Remplissage du dico info_lane : infolane{#1}=8,8 par exemple
+		$info_lane{$lane} = length($index1);
+		$info_lane{$lane} .= ",".length($index2) if($runInfo_config eq "dual") ;
+		$logger -> info("\tLane ".$lane. " : ".$info_lane{$lane});
+	}
+	close S;
+	
+	# Ecriture des options 10X
+	if($check10x){
+		chop $lane_10x;
+		$cmdOptions_10x = "--lanes=".$lane_10x;
+		$cmdOptions_10x .= " --filter-single-index " if(($runInfo_config eq "dual") and ($samplesheet_config =~ /SingleIndex/));
+		$cmdOptions_10x .= " --filter-dual-index " if(($runInfo_config eq "dual") and ($samplesheet_config =~ /DualIndex/));
+	}
+	
+	# Rechercher si bool_change_config ?
+	#my $bool_change_config;
+	#Ecriture du masque  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
+	$logger -> info("Ecriture du masque");
+	my $masque_read1 = "Y".($runInfo_lengthR1-1)."n";
+	my $masque_read2 = ($runInfo_lengthR2 eq "") ? " ": ",Y".($runInfo_lengthR2-1)."n";
+	$logger -> info("masqueR1 : ".$masque_read1." | masqueR2 :".$masque_read2);
+	
+# 	if( $samplesheet_config =~ /MonoLane/){
+# 		$logger -> info("\tEn mono-lane");
+# 		$mask = " --use-bases-mask ".$masque_read1;
+# 		$logger -> info("masque : ".$mask);
+# 		$mask .= ",I$runInfo_lengthI1" if($runInfo_config eq "single");
+# 		$logger -> info("masque : ".$mask);
+# 		$mask .= ",I$runInfo_lengthI1,I$runInfo_lengthI2" if($runInfo_config eq "dual");
+# 		$logger -> info("masque : ".$mask);
+# 		$mask .= "$masque_read2";
+# 		$logger -> info("masque : ".$mask);
+# 		$logger -> info("masqueR1 : ".$masque_read1." | masqueR2 :".$masque_read2);
+# 
+# 	}else{ # Multilane
+# 		$logger -> info("\tEn multi-lane");
+# 		my $nb_n_idx1; # Nombre de n Ã  la fin de l'index 1
+# 		my $nb_n_idx2; # Nombre de n Ã  la fin de l'index 2
+# 		my @idx = keys(%info_lane);
+# 
+# 		foreach my $k (keys(%info_lane)) {
+# 			$mask .= " --use-bases-mask ".$k.":".$masque_read1;
+# 			
+# 			if($runInfo_config eq "single"){
+# 				$mask .= ",n*" if($info_lane{$k} eq "0"); #si la lane est NoIndex, n'a pas d'index 1
+# 				$mask .= ",I".$info_lane{$k}.("n" x ($runInfo_lengthI1-$info_lane{$k})) if($info_lane{$k} ne "0"); #si la lane a 1 index
+# 				
+# 			}elsif($runInfo_config eq "dual"){
+# 				my @list = split(/,/,$info_lane{$k});
+# 				$nb_n_idx1 = $runInfo_lengthI1-$list[0];
+# 				#si la lane est NoIndex ; n'a pas d'index 1 et 2
+# 				if($list[0] eq "0"){
+# 					$mask .= ",n*,n*";
+# 				#si la lane est single index ; l'index 2 est vide
+# 				}elsif($list[1] eq "0"){
+# 					$mask .= ",I".$list[0].("n"x$nb_n_idx1).",n*";
+# 				#si la lane a 2 index
+# 				}else{
+# 					$nb_n_idx2 = $runInfo_lengthI2-$list[1];
+# 					$mask .= ",I".$list[0].("n"x$nb_n_idx1).",I".$list[1].("n"x$nb_n_idx2);
+# 				}
+# 			}
+# 			$mask .= "$masque_read2";
+# 		}
+# 	}
+	my $nb_n_idx1; # Nombre de n Ã  la fin de l'index 1
+	my $nb_n_idx2; # Nombre de n Ã  la fin de l'index 2
+	my @idx = keys(%info_lane);
+
+	foreach my $k (keys(%info_lane)) {
+		$mask .= " --use-bases-mask ".$k.":".$masque_read1;
+		
+		if($runInfo_config eq "single"){
+			$mask .= ",n*" if($info_lane{$k} eq "0"); #si la lane est NoIndex, n'a pas d'index 1
+			$mask .= ",I".$info_lane{$k}.("n" x ($runInfo_lengthI1-$info_lane{$k})) if($info_lane{$k} ne "0"); #si la lane a 1 index
+			
+		}elsif($runInfo_config eq "dual"){
+			my @list = split(/,/,$info_lane{$k});
+			$nb_n_idx1 = $runInfo_lengthI1-$list[0];
+			#si la lane est NoIndex ; n'a pas d'index 1 et 2
+			if($list[0] eq "0"){
+				$mask .= ",n*,n*";
+			#si la lane est single index ; l'index 2 est vide
+			}elsif($list[1] eq "0"){
+				$mask .= ",I".$list[0].("n"x$nb_n_idx1).",n*";
+			#si la lane a 2 index
+			}else{
+				$nb_n_idx2 = $runInfo_lengthI2-$list[1];
+				$mask .= ",I".$list[0].("n"x$nb_n_idx1).",I".$list[1].("n"x$nb_n_idx2);
+			}
+		}
+		$mask .= "$masque_read2";
+	}
+	$logger -> info("\t\tConfig de la Samplesheet : ".$samplesheet_config. " | Masque : " . $mask);
+	
+	#Ecriture du fichier Run.conf pour la samplesheet IEM #  #  #  #  #
+	open(O, ">$config_file") or $logger -> logdie("Error in opening config file $config_file");
+	print O "SAMPLESHEET=$SampleSheet\n";
+	print O "RUNCONFIG=$runInfo_config\n";
+	print O "MASQUE=$mask\n";
+	print O "OPTIONS=$cmdOptions_10x\n" if($check10x);
+	print O "DEMUX=$SSformat\n";
+	close O;
+}
+
+=head2 function check_my_SSFormat
+
+	Title		 : check_my_SSFormat
+	Usage		 : $boolean = check_my_SSFormat( $samplesheet, mode);
+	Prerequisite : None
+	Function	 : Send an email and check if the sending went well
+	Returns	     : Boolean
+	Args		 : $mContent, $mSubject, $mCC, $mRecipients : strings
+	Globals	     : none
+
+=cut
+
+sub check_my_SSFormat {
+	my ($samplesheet_to_test) = @_;
+	my $logger = Log::Log4perl -> get_logger('check_my_SSFormat');
+	
+	my $chemistrySS = `grep Chemistry $samplesheet_to_test`; $? and $logger -> logdie("RÃ©cupÃ©ration de 'Chemistry' en echec" );
+	my ($chemistry) = $chemistrySS =~ m/^Chemistry,(\w+)$/;
+	
+	if ($chemistry eq '10X'){
+		$logger -> info("$samplesheet_to_test au format 10X");
+		return '10X';
+	}elsif($chemistry eq 'Default' or $chemistry eq 'amplicon' ){
+		$logger -> info("$samplesheet_to_test au format 'IEM'");
+		return 'IEM';
+	}else{
+		$logger -> logdie("[Erreur] On aurait du rentrer dans le cas IEM ou 10X" );
+	}
+}
-- 
GitLab


From 18f0ad3ffed4ee7dcc8967d911d66c1cab508f92 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 16:56:52 +0200
Subject: [PATCH 03/51] Scripts for readsets creation #4

---
 bin/checkErrorNGLScripts.pl   |  80 +++++++++++++++++++++
 bin/createNGLBiReadSets.pl    | 127 ++++++++++++++++++++++++++++++++++
 bin/extractInfoForReadSets.pl | 105 ++++++++++++++++++++++++++++
 3 files changed, 312 insertions(+)
 create mode 100644 bin/checkErrorNGLScripts.pl
 create mode 100644 bin/createNGLBiReadSets.pl
 create mode 100644 bin/extractInfoForReadSets.pl

diff --git a/bin/checkErrorNGLScripts.pl b/bin/checkErrorNGLScripts.pl
new file mode 100644
index 0000000..c8a2d87
--- /dev/null
+++ b/bin/checkErrorNGLScripts.pl
@@ -0,0 +1,80 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ checkErrorNGLScripts.pl
+ 
+=head1 DESCRIPTION
+
+ Read log from NGL scripts and search any errors
+ 
+=head1 SYNOPSIS
+
+ checkErrorNGLScripts.pl --file <path>
+
+=head1 OPTIONS
+
+ --file=s : path to a log file
+ 
+=head1 EXEMPLES
+
+ perl checkErrorNGLScripts.pl --file <path>
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+
+##################################################################
+#
+#						INITIALISATION
+#
+##################################################################
+my $file = "";
+
+GetOptions( 
+	"file=s" => \$file, 	# path to error file
+);
+
+if ($file eq "") {
+	print STDERR ("USAGE : checkErrorNGLScripts.pl --file <LOG_FILE>\n");
+	exit 1;
+}
+
+##################################################################
+#
+#							MAIN
+#
+##################################################################
+open my $handle, '<', $file or die "Lecture du fichier $file impossible : $!\n";
+chomp( my @lines = <$handle> );
+close $handle;
+my $ErrorExists = 0;
+foreach my $line (@lines) {
+	if ($line =~ /Erreur/ || $line =~ /ERROR/ || $line =~ /error/) {
+		$ErrorExists = 1;
+		last;
+	}
+}
+
+if ($ErrorExists) {
+	foreach my $line (@lines) {
+		print STDERR "$line\n";	
+	}
+} else {
+	foreach my $line (@lines) {
+		print STDOUT "$line\n";	
+	}
+}
\ No newline at end of file
diff --git a/bin/createNGLBiReadSets.pl b/bin/createNGLBiReadSets.pl
new file mode 100644
index 0000000..fbfe6fd
--- /dev/null
+++ b/bin/createNGLBiReadSets.pl
@@ -0,0 +1,127 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ createNGLBiReadSets.pl
+ 
+=head1 DESCRIPTION
+
+ Performe readSets creation on NGL-Bi
+ 
+=head1 SYNOPSIS
+
+ createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
+
+=head1 OPTIONS
+
+ --infoFile=s : path to the info file
+ --env_ngl_bi=s : environment varible of ngl-bi
+ 
+=head1 EXEMPLES
+
+ perl createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use Log::Log4perl  qw(:easy);;
+
+##################################################################
+#
+#						INITIALISATION
+#
+##################################################################
+Log::Log4perl -> easy_init( {   level    => $TRACE,
+                                utf8     => 1,
+                                layout   => '[%d][%p>createNGLBiReadSets.pl:L%L] %m%n' } );
+
+my $logger = Log::Log4perl -> get_logger();
+
+my $infoFile="";
+my $env_ngl_bi = "";
+
+GetOptions ('infoFile=s' => \$infoFile,
+			"env_ngl_bi=s" => \$env_ngl_bi, 	# environnement path of NGL-Bi
+);
+
+if ($env_ngl_bi eq "" || $infoFile eq "" ) {
+	$logger -> logdie("USAGE : createNGLBiReadSets.pl --infoFile <File> --env_ngl_bi <ENV>\n");
+}
+
+my $experimentName="";
+my $runName="";
+my $laneNumber="";
+my $script_path="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl"; # RÃ©pertoire des scripts de l'API NGL
+
+##################################################################
+#
+#						NGL-Bi ENVIRONMENT
+#
+##################################################################
+
+$ENV{APIPERL}=$env_ngl_bi;
+$ENV{CONFFILE}=$env_ngl_bi."conf/prod_illumina_qc.conf";
+$logger = Log::Log4perl -> get_logger('loadConfFile');
+unless ($ENV{CONFFILE}) {
+	$logger -> logdie("$0 : Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
+}
+my $dbconf_file = $ENV{CONFFILE};
+unless (-f $dbconf_file) {
+	$logger -> logdie("$0 : Database configuration file does not exist : $dbconf_file. It's necessary for continue.");
+}
+open my $handle, '<', $dbconf_file;
+chomp ( my @lines = <$handle> );
+close $handle;
+foreach my $line (@lines) {
+	$line =~ s/#.*//o;
+	unless ($line) {next;}
+	if ($line =~ /(.*)=(.*)/o) {
+		my $key = $1;
+		my $value = $2;
+		$key =~ s/^\s*//o;
+		$key =~ s/\s*$//o;
+		$value =~ s/^\s*//o;
+		$value =~ s/^\s*//o;
+		$ENV{$key} = $value;
+	} else {
+		$logger -> logdie("$0 : Can't load variable to dababase configration file $dbconf_file in line : '$_'");
+	}
+}
+
+unshift @INC, $env_ngl_bi."Common_tools/src/perl/lib";
+unshift @INC, $env_ngl_bi."DB_tools/src/perl/lib";
+
+require illumina;
+require json;
+$logger -> info("\tVariables d'environnement pour NGL-Bi charÃ©es.");
+
+##################################################################
+#
+#						INFO FILE READING
+#
+##################################################################
+$experimentName=`grep "ExperimentName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep ExperimentName impossible : $!");
+$runName=`grep "NGLBiRunName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep NGLBiRunName impossible : $!");
+$laneNumber=`grep "LaneNumber" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep LaneNumber impossible : $!");
+
+chomp($experimentName);
+chomp($runName);
+chomp($laneNumber);
+
+
+my $commandNGLBiReadSets = "perl $script_path/createNGL-BiReadSets.pl --NGLBiRunCode $runName --NGLSqExperimentCode $experimentName --laneNumberToWorkOn $laneNumber";
+$logger -> info("\tCreation des readSets dans NGL-Bi : ".$commandNGLBiReadSets);
+my $result_commandNGLBiReadSets = `$commandNGLBiReadSets 2>&1`; $? and $logger -> logdie("[Erreur]Lancement de createNGL-BiReadSets.pl\n".$result_commandNGLBiReadSets);
\ No newline at end of file
diff --git a/bin/extractInfoForReadSets.pl b/bin/extractInfoForReadSets.pl
new file mode 100644
index 0000000..36bdf05
--- /dev/null
+++ b/bin/extractInfoForReadSets.pl
@@ -0,0 +1,105 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ extractInfoForReaSets.pl
+ 
+=head1 DESCRIPTION
+
+ Extract (from samplesheet and RunNGL-Bi.created) and emit relevant informations for readSets creation
+ 
+=head1 SYNOPSIS
+
+ extractInfoForReaSet.pl --sampleSheet --runNGLBi
+
+=head1 OPTIONS
+
+ -sampleSheet|s : the samplesheet file
+ -runNGLBi|s : the RunNGL-Bi.created file
+ 
+=head1 EXEMPLES
+
+ perl extractInfoForReaSet.pl --sampleSheet 20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv --runNGLBi RunNGL-Bi.created
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use utf8;
+
+###################################################################
+#
+#						INITIALISATION
+#
+###################################################################
+my $sampleSheet="";
+my $runNGLBiFile="";
+
+GetOptions ('samplesheet=s' => \$sampleSheet,
+			'runNGLBi=s'=> \$runNGLBiFile,
+);
+
+if ($sampleSheet eq "" || $runNGLBiFile eq "") {
+	print STDERR ("At least one argument is missing !");
+	print STDERR ("USAGE : extractInfoForReaSet.pl --sampleSheet <File> --runNGLBi <File>\n");
+	exit 0;
+}
+
+my $laneNumber;
+my $experimentName;
+my $runName;
+my $content;
+my $file2write="readSetCreation.info";
+
+###################################################################
+#
+#						MAIN
+#
+###################################################################
+## Extract informations from files
+### SamplSheet
+#### ExperimentName
+my $experimentName_ligne = `grep "Experiment Name" $sampleSheet | head -1`;
+($experimentName) = $experimentName_ligne =~ m/Experiment Name,(.+)$/;
+
+#### LaneNumber
+
+if ($sampleSheet =~ "_MISEQ_") {
+	$laneNumber = "1";
+} else {
+	open (my $handle, '<', $sampleSheet) or exit 1;
+	chomp(my @lines = <$handle>);
+	close $handle;
+	
+	foreach my $line (@lines) {
+		if ($line =~ m/^(\d),/) {
+			($laneNumber) = $line =~ m/^(\d),/;
+			last;
+		}
+	}
+}
+### RunNGL-Bi.created
+$runName = `cat $runNGLBiFile`;
+chomp($runName);
+
+## Write exit file
+$content.="ExperimentName;$experimentName\n";
+$content.="NGLBiRunName;$runName\n";
+$content.="LaneNumber;$laneNumber\n";
+
+open(my $fh, '>', $file2write) or exit 1;
+print $fh $content;
+close $fh;
+
-- 
GitLab


From c107e1f9cce101caa7d00e0d55bd43633fe4ebf4 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 16:57:34 +0200
Subject: [PATCH 04/51] Remove unless files from template

---
 data/MT_rep1_1_Ch6.fastq.gz | Bin 20068 -> 0 bytes
 data/MT_rep1_2_Ch6.fastq.gz | Bin 20037 -> 0 bytes
 data/samples.csv            |   1 -
 3 files changed, 1 deletion(-)
 delete mode 100644 data/MT_rep1_1_Ch6.fastq.gz
 delete mode 100644 data/MT_rep1_2_Ch6.fastq.gz
 delete mode 100644 data/samples.csv

diff --git a/data/MT_rep1_1_Ch6.fastq.gz b/data/MT_rep1_1_Ch6.fastq.gz
deleted file mode 100644
index e2975f131f94a08f60b1a4d94a51d5a2cf425edd..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20068
zcmV(_K-9k<iwFP!000001GT-|mZM10C4B$A#;ob?s;th^2?-Da{DiBohqeC8|NY-$
z+-z?{pok!{x_gun3JD>l40pG~wr%F)^Z)zTKmX5sK3@L+!=bL)TYEenj_vVKZtd-G
zJ~#g>yE(Z0=kb4XE`ME^=kg%9InVmI`~>IZw>-~!PB|=x$_t@9kdx$nt{k01z<qf=
zl*97Z$p^^4Tps8D`SCx`{IuJj_8_NnbSxiVUYp9Zq>swuoToVXsocsFAE(JpVJiRe
z;A4z(R*uhSz$cbdp7J?94#WS9UrS!@uebVkeu-Yh3|YRi2<CDTO`ipk0armp$|B1=
zi$FoHMmcMqXT1;QY5C21zJ7w!50@A9k_CwVc1t4q`@tf}gkKZt`HXsWO8UG!@F`E_
z-}C8~w~TiFzUb%M^>Tbk9z=lh&E=cQ5M~uf9?DT2x><+0oFN0PBV2^XwUNsb^u12s
zCf84|7LGUwKNrcf-hBBfLpo*`UAzrb9OKwl?O0!`dsBBq(^vPgZ~A^1?tUo$XV<#>
zeUMv-p+7fmIj;=-5L;J{jWNX3SN(nbUwYe@yiE6cJ?1hdv&bL#wR6xfsUtsQDvR(s
zytBzv=1Ior>y(#IlbN7znRR~X5uNSM70IX9E$O{0!^bcDU%Tx~-kfh$wH|XFOc}*;
zOdsQ!&Rrd8oyb{NHD<la%3nxsPeo<OWEZ_4Z*jOLuh}jWT;{4y^z74=Q;s>7wOp3D
zJe$&x%Hb*XvbOtFp2Vq4<gUzWKb4tM&Mhy;GB$P2a-wR4T#m-PC;6#8Uasd;{SsYP
zw=7isBU4x&>NhW6U57FAMI6su8#0`_h;inu3K7>Q=v>IEK`>p`3w_$G>-8U5y(w)@
zQPHO8r=m=vFEjoq8Z^qkG#2d_t1zVFF#f`HFQV7Y@pM^<uDVM8WnGxMnlbDY7c*BU
zz0PCNX{z)@R>why&Q~i@t_nXymW@u5cO92s>MfSh%*%u*QirJsyy!V<ytvhQ&zPT6
z@<nl!sZq`!UFy@ArajTmZCzE>X)U?R<pS2HeA}7&N|&mBb0^0{3a92u*L!)O3Q%rP
z`5Bi03R3JqP&-#f{uAw9ZUM4v2A)H`J<*=NE=M^uVd;dX8U4f6_pa}DCBKyW<K?tj
z>T07hwLFv^>{rm~E#sFt8`Po&HFmIqm<uojdJ!Eb2DCfY%U7bWZc%<h8Nj{_q|SK}
zu8gA}Qffmpmg{&A-q+Vh|A=LtrzA?C=!K~~9^_f;AK!_7xfL_AwQK=`1x$K%RMcIO
z`7A3MhcYyP4zIq1^`ROtKpO_io8kKCdt_pKE4to3G4Z%8)3cW=QT%|S@nnAE?38t_
z<f&J`Cr)8s^s21)m6B6mMQ5x^swP}Lu$lHwm#mn#oXs(FwlL&f^lQ|xtD=W*0IM%k
zexu+2g8U*)(zz~D7tb!jXQPqG`towRJoS5$SLI<-ue})^R-LYpHe;N#PGI<Is<pZ7
zbt&jok*kp5$vV9PA}c;uoE)7=G?$-C?p2HkY2-P4R*1bEebSAapaDePCp*QiRQZgt
zynWY?tJ>4y)*Lpo9gEh%snLmwQB#|gMVJiVPFFcRd-d{F+c}*h;;v~7Fsot9@MmTQ
z1YeK&GQ!1epPqSr%*UL{#Rxv7$24_iEf0Pu#;yEc#iExh&`(|8hf!Af`Bq%J822T=
zo{IavG3h#<@M^HIRo(0K#RZ|O0?$sbiLQNI9%=)<K9~xD{*YQX(F-p>F42jb23LfN
z$03G$_jt6A->PP~j$_vs751n_j3%Wxrgm`spl|X;t%bUN{C>U`|F%u4<<#PrpCOGt
zxwI$wwLPBd^V+bf&#rS*Rg+$WnQ}k@KxM1?3xNwj2V7Jbr;IL=Fw^cKV9{%;?|M1!
z+gtHrb8h-u-xVi329d@2(fRA>gZ$i`%2)iBBB)%jejI&qZG4e-EZ$8l(v}NUw7zfd
zWBLES5xtE1?esqCI*y#?x{zh^szsBfpbi`7wnno$k!M7-s>d8HM21rrZ2qX5x2*Kf
z)wizL5`D_(J)hLwvYbV&JJoYTQQi7lEZ2OJXjS4_FMm`AXlPqJ+HXW}s>|iPUFy10
z9mG>5O;2owKcc}W!Zp=`h*MPg>nvF;p?a;RFQ5t46oxN-*k?wZEuI^GpVV%q*yW1o
z8D-kb%9raCq44!}@q34-SiIux?~7iYkIik3P>4fjOBe&DYgu%2pxy%DppKqua8dV+
z-4M8{*E=J~QEMTWD=>)v#<0EI(=Tkgp%Ck<mY<nt^SC_Q^=69Xm-?{AW}F&bow`{q
zST=_piY;fe0OHLv_*Ts)>p)F;Fy}@;Mdtur1o|}Vm9CRDPJ*up8WUvZ>hEj)%PcNO
zLJ3&PNz+iGHZSAtCvo-U1IpwszI!>T&l0cc`^(DzM)LZ0yKE$ff}hoeSzI>=C8Me4
z-Qfn!g=sNof)z8sEX27p1AhIX4fqV2`75`zkN<W{i!&p4dy<!j^{qay7P?zx*O_d-
zr$cBWN7W6T9eo`6Rf~rZ58j_M$>9siBzLAH<a53e-M;=hf>}#C!XOq^)|C~XGS!2f
z9rPo_v)vcHIvp<8t%_6Is}^WR+@VgW3Xu75R=o~CMnrQs6)vtSgACjz>$?Ip{0xZm
z6L03rHT~IH>#L9yoX9vRf6DBazv97uGwAiHh`#pSb)05~){HL$VoiWiin!2_nF|Z4
z0s>t)bGr6%9Rjq1Tqni!J|FYf$eA8rJ%yL*S3~>UIIVn?*H>lNcdy&axvJJ?O=mSC
za*U*-B^P5Q1DlCHWunT9tETD>aE$su0T+jFmIDfHyU)h_ZD8MR-50mbIxuK!b!cDm
z+wD|ud>O#!8Ik3T)v0LvtiZFXK8@<Kqp8ja=b^kY_j4c=qp>S&y!pP{G0!VG(?-r|
zkqvH5@!fFx6vUnvgYH5fLnzUCpW5O82Z>MRGm7FLr$=h`ByUR8ay+gKI@}nzDk`}S
z`hqyYF;jRgFkYL1N^|OSt_PxeD!5!5gmCa^-o`wyrTnE^nqI46it7m=_u-k#r3v5o
z?oE4XE*lL;z%4+DtL294K3jxM2vT)F0SClqirOfqQC<p&aRF?}yJ!dGbBpAD99>sJ
zs!)9I@)O76aQAoT#^Rm3<jY|J9#U$=TNkAD@n_%n8jSr{(C6c!y`7ueR=2APQ_n;_
zHHT;w=>~@klOj}sPl>S4VZo=FWSN<5ae!kG{C?uZZPu~4E!5L%U7pmt!?=dx{|x0K
z<Uw*wda`(}IZcc2SFU4mXZ9p7_ow6WTE7iCbsTdpI>iyHYUF~*4e*0z6}S)-jD+-o
zo)9rfT`zEQ^{0qlzG#Uoi|ZA{=T^x4QGR`x#)xpU9>;2YDEjUsBO&Re6#Fud>*hXq
z=4!I<%LhG@=HW&`G)m;=;-hVUj^j6?*T?F#((V~K1@*`>#c{Lc(-4VUM8dc|(AEQm
z%8(Z+HUPAidcq`dUjmW&lS%Me!<*+Nh7n~Zm(Pn2JmpUm#W$i~&*$bX!v-0VSpv#R
zM|~!q$|T!NCkI{(Pjg`j;t{<Nz`;roTkyfI<@X04WPjmN&%8cTTz(HX-(Cd<jnHRL
z#~n}QzG+U)T6F!Q{O%e0izZ29O<UfYdJDwl^@)YAI;|PrGDn#Wj)tjwAM}@1UPoXb
zZ%N_Et}KY6`Xm`8&Z;~v>%L6;62in1)jW#jD=%K-<r~q@r_1TMneV~84##hfq?6@G
zb>imCoVg<!EpWTegx7*5E}sq6a)HQR*yh+ijx4W7co)6MJ&HwhYTJsY(D@!E!&F}F
zWVk1Q7%opPemCm3x;nh)*c^bw0Hhf<4z8J|k8JAWA=5Fs&H+4GG7Z2Kf<Ay;8Q|<@
z)YDqp<zU*pu21ox4De}ujP1`qZpXvpdb~D6({{ageu{s^w4K_^;dni6+?h;ZfjM!0
zWu{nYort<3cgRF5y&47QEcvcaZ6zRP$!gRkkk-g*GuETvDY;D8qV{z{Pe`)>rzz81
z@{6I2r(kd?Dju24;1Sf2Pw0vt(579<&+V;hs%9g(TM|l+nRCsh&Hx&Lq#DRV*n-K_
zfMlbpf1n(IEex#B1ZKZ--l3wtQtw+&7YEV=r=4mpdr34-dCaQx_awix$LsZUTjS3<
zOw5Nmz>I<D`i#`Eq4Gp92*(h7aL7wKGX!ebVcQY=I!t$<E|ccY^`HENu^8#_C^ot-
zgLxn8)<?xzlpj)@m&Z7cZ86$@3S!Is(09eZlRHVi%V)%($c`5PdXF}9tW>{WSEJ71
zVY*2JH{=n5CI(IEDMn$SvTCo)$kkwKdUyagUa2cJ?mFu0!Cf1-{K;Ve4o}W&Xg>gN
z`Gaz_`H}ZcTM_-%)Yo-(F(aK|jw~i}Si&{{sWa#Vfrt%2x`3-O!?Z1cv{N`P1ki63
zz<G<=EX+(~RA{c{6{N@Qgij)RBx|S~mFti!{|1Lw?uThNS}meixB9lpE;_=_!I&dZ
z*#@p<W?kXF%s|<~!nCj;M}t$v=ym1Ki(e@7;y=o(zKyM@wUjqb4QE8!mHb+adVO6l
z^)P3`GZ};*#AJ|`=_GI}9L5syJRKJ1(V+}xTnfat&SEyX{}g#QK_tx(P4>JU?Q4dr
z$H)#E!;iYQ^?cEJISOq?7&qnr*%7^NueU?Jk$iT5ARH+%A<vdW&uYvZ_-<fG!9jOW
zhs-Ht1n8OhMu&N#YXC6LNAd{QZJDDIowh#2F+P!W5EB=a6XJzb6eQmkC61Y<PH~Cy
zZeRSjYfQe}Qr+HNUrAnW*RnO`Vci^7@oEN)0tz79cHl5GDPKr8@C>y8!7Q7lrx2nU
zcERL4%s%oGi|A=B{Zn=Pl1>G{IBl%H=3!(`z@43@Aukxk>k}u_4f~>3hvRysBQzxi
zlnK>2!%)vg5Ji@lbSOO^nvZdss&lyinN+e2N(m(YMFz#{HogBf9hk(oD@#2U2gXa9
zv3NI{dul@1Wekbrm*%#~rbBfFM3!)Abl?HLx)t!Q4honxa8?2^IV6_gs=p|LmZhEZ
zexhl?mp0*NTAnvgiQOei5fOz+_nHuPS@h(g+n4;>oHw`>Vr)l_NSKWpninI*!~xwr
zLm-)hpN%Y>nV|$;r6E3XclVA(Zf=5E$Tydz9Ti!G1>a!PL3DsXl0^$dzBGs*Cs0;-
ztE8dIGT4{AzTB>-?K+2Gj8Q>tBq~dt1rwf`dddb!=o*>9qQSBUDGV+h@#=R5Mczi~
z;@PD<=p*s$RVwg)#fuTL92i1ncBeSFA&w%X*mnufbNQpwp@aduF6`;hp6lCfi_HWu
zo*jn~ZpeR?IA)Ts-L%(`W{^hQ^TRDpWAMTvNU2TAnFy(BUE2?B_s6BZJ(~J*{pb03
zI`{WS>-WsssoWorr_DlFZVcQMod6Ex#zkzy(2+Q0mIbB_<{xpsCR-T6A@B|0!OJVl
zpl7f?A-bos9|dj{kZ}UaVkR7bi)wu37NzMiEh+XAs!Z7u>T<Lx;rlnDpTw|lM%^?N
zop=slezBToXh9?!z?U#IPMv({ae`WbiOJa+JzD4c1p0?$56Z0>T?w>Z9Q!!>F^1F@
z+x%!B?fswUQ&(cE;c|;Hj?TweLiFah_K$!5alD@TGz<^lcYe5!-nDn340cslJ<7@Z
zVLC*=9yY#v0Dz9kK7$Y%NQrQidgdaqWexacCJ193U}WlM!n9!4$l!mW-9Mr;n<qM;
ziwc*2SmDbXNx0zMW7ndK<mb!rd|U^sn821ppR<`Tps8dEA~XvI%)+7}lSI;#0zCAA
zGUvcdtWO4gMI_*3pM0Id`H%K4+~i_+%1A!${aAHFKVHvW({xQS)#Lam^P>u}k8N9z
zk2Z!7gVGBA=tp@^U5Pg6)2@;e>UX`#FV4<tR2%{{O{SV;3wvZn0v-7WWRc99b1Vn|
zevuc;9_SPqAM~@ZMA_c?`J#Ym1V&6MV`fY+bz<~n&6uXZY@oH^pKnBOu2pl{xHG{Z
z3Zl(G91zGjH=84Gj0!v^%?LDS7zgM%&_uyA)io5{o+B5QxnHbx@zq6-F{vh~tJ4gW
zFZLW0Tufaoamt8isD>$)USHi6{ai-9YMPHeY{*N(aOPs4$!!E9KGT+w7!IE{J9A$F
z+#q2EV}OJ!a?XqB+gyS8W<qeBP*-wnhyQm~9;Zuttgff?CdFtGCP+>o{u%joDRpvK
z-EM)bvgMi7wt;y92gYD~{8L`6WSf%m|Gd!oQdTZdj`B>|KblD@SE9pEmWW2t_VVZ+
zm1Y!sFpGrbUqZDKx#@HeJnP(zZj@>ew}-x;1em&BPS@>(CkzKO&U71)btYJmT}BQm
zSba|jBuoU$O5(lH6T#B6GkCxsNRh<{8}Fen(dc!21V4_xZTt2%X?e>~<nH}{b=0f&
zT-BE~C3MM@W%o)9BlyB>4@Rfb^b!0Ul|3k{fHV<(2p$olRPfH+Cy~om-F<?vElUG5
zK{aNh_$}hBr>PZ&=)=b}hN<(vD*3hC-<tZp%mZIA82?R`n5M+}6D-eWpoDq-b2iTZ
zlCjAQ_GhRMoi1Nu&=Gft&{Ebqr%A*2;=&Fk94+2k3T0h{pM7_0e;Xcm-}@mwQgD9^
z*Y4bPRS4ryoceTooW2qLe7+toTha598^nOYhk?DD;qid>sF;jS*FfTICUUFq4VHdm
znFpA{Aj!SYcTFG{5m2u&guB1r`>wwU=Wh}c!PMPvhrhYO4}wcfKSit5dGdpIG5m;n
zlUV?A;B+p^;kqyS^>}GEgFeq>2?1k6po&V>;4GU2wa2*+m@+UAp}rjonv6Y=F;N^n
z!f!B&kC}(BUjUULXAr4U$mZ+S`RVR^lFP%}`LHFvXX<!WPa1Ir=GkD(fUX()TYmxN
zWJ6-12sANAehF7Sm7?f(DYN^i==fKhdt?w))9b@Twr4Q{u`h=GuDsAmPbm{d$m0Fz
z!!SiR2}?evUHx7jR=2v@P@plh18Ja)x0xCRQ;yh+TR4<?IfOVZQ!l{z3)x83nW+A+
zBU4{;iM+i!eRfI$3uRyQ>b$1C2d2rEq;Y^}lWD}mQpQ8}q*c9uQUxe)Hnu2njm%sH
z*(RS5rsP)E>86TBe^lL-0l!JvMtTshkPM^zDUN#_Wr;Uhttsyq6p74(kc#&OZ!V`T
zRm^yCnTi{v7y36KfIIWuV51S}<Yh6Kqr)1X;K&;!GkQn6e?Gi%HL?p}Cs#>J*2tsF
zAdP`{rabtxM~oE7ua~t8J0pLN2#eod)2RW~Spk^jOehY27Gf-&IrVxnl^-sSKA)MQ
zFy$U&PU66&tc$*cT@U}`_K)*nxD55_=dnKgIQ-*$zsKkv@sHns9{%zB<MjK_fAojb
z@8_TYy#MXz-wr<-?{8!O$Nl)nsdo=o!hH8<44wPNq}NijdfO04;BsVgXt0<;jU_7p
zI4ep~-6AS=fii9Y$Yv7eXSl9&AZ6?u9Co#AzfU%a&cl+%KU*z)=bq?|1TOV?v(i~?
zZ4tVq(lS{TvsEty#v?C;hANKUEtoQHHl<1DAbdxM{g^@j?51nLs~Pxs>HtJzccmB6
zs}ips)`pE_p_%Me#N8PsJxrTgZ8BNV)QX75Gl@2VxfVqCFR=d0%FP9!`^B4o61ID3
zNp<+dFDhRm6;EeJ^z)%TmBWXP_l}eqBR0UTsBnR8;(DHq5K08gup1;4XTxm39uCB9
zs0Q{2JtFDws$fBaVFI)cwI+X5&&Oy9Wt^(lkV%VvU-GJ6FLYKYuw-*dBF#&c77H~9
z&4C^1NFty?Tn;kNKnQO}0s^Tfb{~-8$?1_%!ZOjLiL82&_!Qo)a4CwE>b#`UkKsQJ
z*oxqFb6Sr${F51OOIWZe)S6}{hh1i|HO=)*7R9`j1I+Z`WzB1~XGUDFz~tv6{vlHP
zu+DA$N2%K^*X611-qY}Fd8kxa=Bf6u_ab<6J0IUDLvHa>128sJC!ua9GxIo`+@!xd
znsKTFXh>b_A=@Y5U0?GE{6g8!!l(Gi83gPURtfm&X?fM8va9ggw`QL~ah9yZp<0)$
zqVvQY$aNtNY$nGUh%;9~;Ps#b3s?q3Ddb*BA}4_$2jgFSUzPurV$Z*D&g*jRWgUv>
zQo(mwPj{?fIO(WNMneuonK*u+`N3L;u%LGK8tbrGvxs%?=oc@Hq0hc9an><v@-UGH
z{;KKIVtXmKS92zWXX!+eG#4pZn4%VkNx0!cL>PA!+_Aki$9jXo&M$L?lqf6nfjVne
zjgvXEbd@X{Tlf~{TCn358nE?a5HGIiu(U*H4t#HwKIMBT5!XniCv|(ycPTj+%dGOQ
zT$8&NI1iy8Vha8~IzOblkPx0v5Tk>bgMAIBO;_FuRzVv<iw)InArOFdMtoLhlq%4O
z-ZOJDf$$wDVzU)4v&I84&%a;7w;~cerH)0i>Q9n|78Z20T>QT1=UcPJA;DIi0m0<J
zq)9gRm%+$0$+C`Kohe3SU=6enR=f!;JFr&dtA$QlM2uS2FiD+<R3qo7Q&vQIu~W=e
z_%dGGZTNl&G56UETv?tbAN?<ywNrb!R-1kVq&D))gT-nYRX|WAx57+WDGvxk+3`JU
z&aCXya!|%8|Lfe5oe6*;&r@0-<ZVy#v*0q#M!Taw6j**5nZRlRI?rf5fvb|4H4E@j
zjdB5x#W6Gk8q1X3%+jXMME?jf<c()$k2b8b2_9ZHQ<uDGI>}v>TizY?BKo!2jQXOq
zXTpa8>{bIGEH>ewB1{Kf!vQC0+8Kq=cy&bRMMeB#-DdXdXR8aK4Og(%p9H1kb-yZe
z(E9vuM6b*3_^#v@(r&P@)!1L;#h~iNCOqP?OtyHiQhkMC^)yA|aHa#3K;3Y^RAXz*
zIVljkdWLdTRrJ#!_u|ru=?YTNF!`9{BZgG8yyjq}DG$y;rhcy%LlM2coR8;?BZr<L
z)>A8(e-l(9^<`%Gs5ykC*OoI+h5GMR2xV_!gujQ(@Mh%x%}CQ>3#)#5y)fX%q%nYi
z8w$?nAfy=uZg#`9b1PhvjbUTeiUZz~7fLK5a3tECu5IvzT`*#q^(O%rJ}+iH8%b`a
zi@#sg^v)4e#h%|^0p;BS^l9}WX$kW7B)_z$>bS1T55$iH!BfSOjL5C=2}XQoeP0MP
zU2HD{h#9j5X)56G8Sx7`s%fP|0_i<#Ee@hGiOcjz28Z1+X<Y}sFL(YSsYHOy)8y{K
zx4!TDpUp+M@594APF3Bub-3SreC$hJUAG;z?CY{Z8xm_3rcw8oEQ|qg$flw53Pv4i
z^Sp4!NG&8Vi7uIO%8W>IW^<9PLNi_{7~MffSjb?bkKv-Uk;z}e;9`jP&<)Z>&X*PM
z-ROV#d)x0)1J9MHxa$UzLTVT;DPYDK#*FM2<`f+&lV?yRKr@0lgW0#9gU*Z~B0zMa
z`!Ct__f_(KV2|#9mfT`<IaY7pyZI`BRKOEMCcy<G@CP=-vxL*D*9a~Tc(1DQ8M6Yp
zop(s@aNl24kkvFu(toA|=wTJAmWQMYz9M`H(Ip2{R#9rd5&cwEn|fU%J3G#B7&rv&
zta>2REn6I7i6^2?#*AV!2Uf!b)H~_wfavQ+0?T+YXxoBJ+Y7PrZg9F13Im@kpS7#x
zu5EKTT-L2eK4&b?fyFwkSfN*Lf)b&g7n~X?XAV0VlFs}Bun1B{!fZS4Q8|9b@(0ei
z71c|)B2lU|jg*Fw`caq*S{<TZO^{-dJ;}?%>v03T&(2y4z@9M&0ma2b;8Hjy&<MOc
zqlvxHdM4pm)CWL%(?afJ7u+I-G!Ig(9y4l@Q)11NT&E;{stjn6vcxH%R>dh>lO<eF
zZ7-U42mRKb&(}3g%(BDCl!O2CYJ<k1&F0f`tOH*o5)&*H0q4ya<9apq<zH#K^3j?9
z>{N_&%o`Wr8HZ^|G46UZnsT@vj~iGjud@CYkp*aZH(S#c!Z)PGu;dHP795xcS@W_X
zEPk*N=66JYA?VAYl&19}2Om-&Ps<^^A=$AdeEUZ9sto&EPc7nTAuOGemI{xVDsG{%
z2R2$@*4*55a)gjZ$K(mH4>AY9i=5!4Ym!aMjnnV9_TFAkr_;5q4}Ebr&ehNB>DHZ_
z`q30GrTybLwnOWrhlBKQxZRa+QT%mj=pc7V`!%FTijNT8Zp>Chudj!Cqu+zk*v*9_
zrf7568DlCF!UFXViXW5<T#^|@E_vz9g@qC5zDMZ~lOMXUIgHXoDuMN__17MQ9GM$0
zzFR2sSaVE*%#|NMJc4k*m13jZ0eKG~dcP<7<<M5g^CoUt!t|N_G2z=7*AH3>xkok|
za8?B5tSkaVG0y#L31~;fFFH<1E=5Ttrxgp*)l}O7hB1UPo~bXxCW4GZ)4R4E|2ji#
zADp=e4Y;Yc3}{=vBovVK4oWNB^Cd4qKh}I4a4b0NV9>F$%kmcBCVUsNvBCRpauEr0
z%P?6^N?Yeiy}CFxk3A)K5iacQa^BQvJM>Y25emixL;aRf;1QNmwKK1l9i4nQZ%aeA
zu<+;*4>^t5`s2?({`~RtXMd8WD#P8?uCC6*e@X%U^-({yr{chGEgPmeU`PZqR@}CP
zp0l+;Bq<mM8%*$E92>CY$qo&MXNGlhac!-<x-!xu$CuE}OQE$W!6yhZ6RCl~YrJ)u
zQq@1@V%r0n;byIYP#k;MbKftQ_IlnFr-Q5t0!emOyh#Qay^(pgJStW`G7oT6snLu=
zI6EU9p*fR=z<aY@@HORgzwv|ZkCZoh9OzRT5-WDKX}XXI??EEB7&{mCDzq;pWV>9d
z^D1t^_g0^EgE<8#`Pqy&bEU6?vz>r<kSZO}Z6r{!!wcN6%jd#<BL^&gy7@Y5gRU#w
zsvNV<h(Q|ed?UJ4FK($)^vkjtD0r;QB_s06hQuRWXKD#{aAs2mXUr%ja8NU(3KR5a
zpU7=i?(>p<Aw?3-=zyFvd|AMEWpYO}Oq^1P*^<tBuYEy<Og;L3+-;dyL_Z%6)td9Z
zpyk03h(N>%G9cD=vvI#cibi)l8Uka?Iwo2SvNuY}SNw^*4DSYIem&+hPfsm9g*X<}
zzke<$m3)#kW+@kSU-IU5*km0RqPvx%IdFiC3k>?7GlR!os39=lq8?C2J9ydw;C7?|
zyQRkC-@HM%_%xCQuZB(>MRBL6kA0BN*t;rD!t~pDoppf928O~)G)RRwGZ0D#I}q6X
zyzqZ*V5#0v^Tm!7nO$1kXAPFNE%TD*P+sBOni#W^KTlTvvAHU8C~Dcj9`t@G4-dkX
zS}QqY`?F#vFaDdJg4Y-3j}cu#4-N#g*s#LcEHVB!vbCQkmA3?nyy-Av7iW1it;Bq8
z+Bs+ko(a+FV|Ucc!&@<L>s~u7yhZr!2s*J^g>6<KSLVy8`$Fags68h594u0w3Ck_(
zf7!D~;_Xdvsx|2Ww0=b^v$TqR8s{~7`0>$(TXN}~BGWTSe<kS*F1^9WwA<vqh+fyn
zs(xSUnN3v?lxLFMY3<2{Gw_*!#ln+!3$G}U6>mfXRzrS8oOws#^6L2XY__NAz&n;P
ztye>fi&FkQc8z*nmiqOw=|})*4cR~%V$fcV>55>RCCC|kScG`U!)N3AYiydCZ*`;q
z>)XE6a_PD0FEG7y`ZE6$j!J5|FA?0-1}zbavZGHKajE!>UGIF`H+!5R5&c#j4u_3D
z<Cq>m$B}HUK1nBF|5VZjomKZPy<-iKg}-N^F1(wJ3g6HAP`r)l=Qm@`u`f1<cjeT0
z^l;Dx;`bH2LKKI~7JY}w4h+@Ee{&X!WaiLWl$fCj!3)k-qv=@QWAJ-m*^^9%@ai+P
z_#sl5<)<h$xd}I^_{i$+fK%X0AU2lamD?1C;N37Z&AsnCsZW<|+dZYbkB?sKL8j#U
zVb`jQ=-1}9ZKehf)iC5hIt*Jr>MRIG_agvnaSi7`>XD5U_o{(J#OA&$^7uH|FQYX@
z4O7!X_n$uO?xG1A{M{G*blbGx2v(9q6pEHDu;;H;Fu!g&v&R~BpeaeqR9S<yyzr+#
z&iD73{|-E|Q(*ZGp#6HPKEXDGNU?(9r7!H$>5TbC^6GM4yK*ea3Z%{;JY@?plqxZ!
z4i{FFYJek^IEm;DA7VXt%$=DDLVxDUDLSX^7p5oEZ(BbF<IGJv!qh*NDwx!?No%>g
z^V<GgwziQ+;^6N|ULCY%b)8~lXGo+S0_j7>6miBWU|IpPV-E)hh=L{_@&ceirpDHh
z`-ME)*0bBrW&HEcK}vbk&3PmirN>+jeZ0oob?ExXz4pVNey_@XRb6f?FP7f8TJ68y
zV}d4uNhoj(s4fE>8~vOj`Y?|7>oAP{&~?LoXb$bIx&OGgm#%Ne-nr4=e{1_6*U`EA
zzYW=ZBYIt*kJq&;_x~K0`q=1x<;-mc{6nH~dixWS=DdYFqd|NyPauuoD7os?UXQ0s
z^9ICb)^n*72N%>4bh3;ZmHfh+3EU0>PJYfLfocLGINErt{`;y;=yOG2|AP9zja9^^
z@ojp$uP^2<-@iJy=gW275ctJ>V`4S1ZxS)(jLK7nGwiZt&YK-%sOq?xQ|3IG@w!T$
zS%IJ9;*KD?mMv-~jkII$!w4EhEMbW7fv$pF#y0q%r%FaCC1jBwTKebH&tmJx;Zc^m
z-*exK<d^z-eRJfHSFl_ut4wDLJmA%8mfQjtKzPiY1IP{JTg;@B=e$Hi-$v@&eCtNp
ztzl_U-u*H|@Y+HK54%jUs%nqTp<WZ;Kd`z`tnF;Xz??!d1#BQPO_qzZbf;kHE9$(1
zr6lM4zWw6s0jVU)mI>s$y()LW0xJVvY_>Fh74;sU0ijEjRUX6CmGvc+EmJ=Yd%#o?
zy=jieZLu!-uqfP|NqYlvU)M0}v=DpGbk+m5XW`+QWq_7ZNA>Qql0F-BM6KDoVw~I4
z{oZxgI1G24>e0of9v)ry&qwFZLnjS0#hA&~M7l=&G}ia78b|l&wFOc6pQo@Z`Bl2e
zt*J9$x}gY<R+)M91$vp`p;8-|I!78cFJ!sdYBk9=%*?vY?qmPF&8*%pYtoz>QY~<3
zr}TD4S|EU36(>72HMhfdUx#31K4LTqOB!JejlNNf2hJE{Hu6e1#Tn&gwiYBlpIIKX
z<HG9LrxZ-O<+_kql30j~-dX}c2DE~H%^cKn#(maM5&e8S-L|A5`kBETvH8EAi};nW
zMdMM+sOUg<ZUN=xxnxgW(PU0B>~aB$ns7<V1!QMdfhBhbUE}mi^;W&e39sPkOctPq
zubEDrfp*9+upvXpOrl%KvSZ917LX6UcEaZg#=OcqI7t*^tTW<rbiXgY>nLq@7_cYV
z+roKGp)11#1#i0ZbeQrtqF?IUx}zAJn3)?@E#V4*_Ib8ki?Vwi=Y`^}OiO?;0qR^Z
z0wqG*z%1(*gKlwGu#F5ne_`^2R%VOT;=o9QEf-{S&G>}CMS7o5!74o&qzU?-<V|}n
z>V3@#wX8Tfe=I#Dw}9a%@+!8CINV$KcO=<3%mK3o&Ne6`eQtlcuY)!7d5WazA-Q(B
zMc2kQ76-qVCqq%`{&V+!NAl}I_ZnSqOhzXJ5k{atYpg%?QDeh%q{!jJ0FZV>>qw}`
z?7wHdlwWc07wX+>zG<@Exwc)AF%e`RuL<;aCudBXop|v`iBw{hzY)EzE}LQp2a>+!
zhski`!q35QHui<f%Qcx<+<<6@AQlTIfY@7?$zP*O?aYZcq~1<1>mj2r>ICR!Z=qk0
z?Ww5_r)|8Z1{%8Ap_MUGN+{pJ!sYxn({8~yb8(Q}>bvv`Ff*anZY6pC@PN`y)>Lpp
z$wOKiZi#i9;OZ}z0&hpn8*&d)yuB}ZeLB_EX3Pm90b*jSAnt9z&=fHZaTumq9bFe?
zLud2m3EF|_KRcd=HuqAkD|{hAjYS`)zORO{8QZ>fE)Ma>&+GNv{A~UxvC0s930H>Z
z*0z8Cal1EN;~tMc?&JOUd-J%L!2VJ7f82t!0CqpdUs>p<_E^=MB0k0z`b7hpX$7{g
zSFM)Wj^)*O0bIaf%yvsFkv5YV^8%z&TETbSX7(pLdj{&Zi?#24(Q~6rw?VYSkY%IM
z=`6G{w->b($*bdKt=z3_kKIL_O;zal%+v`zHcVzj@Wh=Fmu2FS3htPGq{ho1(TTLt
zP#@qZv>uR0eu>c|rgnPOaV=~A)fahBa;faOROjv1x=>xOSspUOtPrdPi$>p=78a^f
zwPi~_X2<RenJHH!Fw}kLt`35`YtJ;2U(5d!W3TgHDl<b$_cmc4+8}L|q8AJ9iscTi
zcU6p`zx%#S@=&4|*<!YC+V&=v?{pRscU7Fg6Nl3}r)*^?bl?d^yjZR56BQ8Bu$zoE
zB_bq`ECVnt_4VPsIXHd$ZFZ6-PV>Ju3VeF|%O}(N*n9MnzZ>*o)wcVlBReo7VFK41
zj6kS&pv=xPPz=f4Y<L98F?55Gh;j4?83ugUVCd8CQPT&x?SS%2G+L33SEtyOH7+h~
z*sD{%%6=A?%_db#FdEqP5IQWwF^3Mb+%l00beuvZUmyaDEW4E~v!$9NP1GrGMw^C3
zL6zh#w(aOnhq^ocem-~AaQyS~=b!gO*ZStxja7HQH)B_iw|4w{6XUh0|9k&q>>f^9
zz>aa~N+how+26+Na2>zb?w9sf!j`w4$sC<@t2Yd!HZvpz6Ot=C7)uygn7|X^;W>WJ
z%>Hs<h%Ua&7ixUPeSTq0Q@5Vix-H)pj@9=Cc25?SvA;9fwQ8?TbKAt|nUGrs=m7sa
zESxCnZz76o!|yo5HCcg<Q-Sr{?$vrvK6X3ZQP_~4uie5&-GxjZ-_i3H6-?Mp)@v1`
z>#Dvg<J>txXtd*wl!a+Me=M<DQzpXhm`m%t!)9MH3of$d))=T-%aqKrO>#qYXbomF
zx5ZgSm}9Pq{v41?wQFK>^%lA@jAh#PeRGz5Sp4XH02b@HA9w_vd*u2^;Sq5OgefIi
zC1Hfcc#E}@{+LMubq#C3k-WKHwi|hXq`!2>njJYpr2k~%?Cdf^&Kvqf;(R!E7RbPB
zCAlyI=G@V7n`)nIFnqSq6#)y49Rm5p00*leKl`%qeUMfLyQ1F&g1v6KuH?Kx3v6@g
zAjwSb4B|Er1fb5)?b>RUX$4dbWHSxutT?k%!G;R^D#`rdzkm94oqCbkk+jzFW#^{n
zfHcHz+wW^#)O&s2IPXi}%NJ(O(g%fP;mkU^>V%|m5s*a6)_I3S5*&0$*zsC@Z`q~~
z-rQ%mq}-u@!LqFa+n9>;Ke%dN^wafv+B7`RWB@qihM}HknkN!Msq2{~#!t3w>|Egb
zpL4dj#j$=D7|r)^+-msWyp$J}2E38<(M%|fST4rC<jtwtaHaCH-An*UYGHSwvV#d^
zXFVa8A_pdunM33*bdJm_Oq|SMRwYmQMS~O1T&8R)y4z<T?=gBo+lRgwpj+qMApg-N
z+`7l3bxq&5_pZJE{nFK4--+(~k;Zb8aEFFT{28CRD($72u1$MApN^Xz$QWF5VOf^S
zMmj{6GxG?_e-74IDw{)#<N}GrT=tj+Gd2Bx(kL>5IhVXZZfma4_H4y@b|k+Y%EME&
z&Z;6Lp$%glA0WnTWE!(mMxZjh5s9WiOFrjy^@sI>hQq%R{fia-D+_&nwgU1cqS2-l
z<+>CFzh~2L$F`~t+ig~XP%vW}pu5>g%jjc{ZBUIu!lqL*!xi!cq0nvPHiDUIEh)L>
z5eU0UV`Dr|BzgmU=6N4U6;kRXh-^q{+3Q66O9}s3YNGZfzn!b~282-SGZ`R`gb-GA
zV5I_2WJMYsJd#B8G6l9pMS(h3x*^r72P9OrMV_+O+Ad`GpRvTCv5!Ia7EYJ)zvvdS
zZSwc<kUfa+)$s8C{AeG;X?RFJPD0i3=g<xQ=88Wf-9r0*boE|0+}o)<tg7lQN{8q(
zHmsPrv0z^E!kMU=V_9ytqIB>|(5nJg47)04=GOvf^V)hunVhNZL+{&%e>~jKUhbnS
zvV~y`XE#0`<=a2TD)^x-qDdewt(Jxm#`gB}@Q><NUFvIJ`SCUk-S}70XA!-+9S-$-
z++snC114!pcV-4@f##4=HNT!KMR`&*b4ambKFqW2Rj2d)Mf=rzm&lPyl0Fjm^`ZNG
zj0#7iy^3r|)e29n?`tQs<KHLE+|KRs)Lg2!INh0G0VXa7tEo3_y+}mcZ#&V><aTo-
z1V`dOkPVc=g|NvvC^t-Gj>A)XCKa`<?z;bT2=UhH*h`0s61T_#j$Sq@^V6v>|FdA1
zb@%9GmY3^ue<bJ9P);0+g&W(wjIxM+xo&z(!R|VAmO!$InF4@rIV^NQV~+rWVj9j_
zb0Kyk$V*=f2g1C(W50jiT9tj|`r8Y-+pKm}%SN7<q&rW!lpQVJUQ2=^`t{Hpw%b|c
zdC}V7K)IQTWHofiD05BFS#g#;ok@Up);}1*3({6d(PZfN{JBG}PxA_jiS|17+SM1d
zI#F;9fv%B!#hD=uy5WU(<C^xU-?vM7cs{JRog}9>2c!388td6Q>*@BB?6d{AGFz)u
z+vbAIZ>URL6?PAX5(f!*-Sb=HY@doQOJ2IYN0(&FhfzC)$+itrzHkxduW$Rm9nL=v
zZS(ib<<I~9x9h2G`uq6XkLFf2x4t_CA%b_wce_2mi|E(eZEf19o)%zbW^z4a)z3s3
zXLC+j-r>xPb4y_dxCUVXu$(m@`715IH}n3}qpz*QnRd~gF*F^whea02>%;Z9X(DIF
zD6kJ0*fVI4Yg=c-jWva@&b*#NsGg*ujN*J}`~hpU^kptxZf)w4?q+}uCQ+W9`{Loa
z;+5BK?1yV;Z%tj-fBg3M>!oYQaqNCC7VkG#?D-|0y6|w{tM^OW9FEO;wllO0WWy{W
zi7E3H<;y^rU{WnSGNc+gOETLZ2lB_iklcSf^tBsHMc}1`2bb;Dib^{e2%}&xR=r8#
zW^=v06DU}}LzXW&FphTs92}iIXO)gtxU}%=5YmHm!JrSBo1MUAnYa;jik#mtjK~@;
z-Q&h9i$;`SM8}PQWKAk@n-uu(E_5LbRp-;TUF4Gb2Aw;jP>XnLAT~X}#?b;4gl+*H
za7})Nw5%0ACWEbX4%bqx2J4=c1cN<kND|UE&q|X7P$hd_jG_kxYL%VM1env}HHlRd
znQXb=%eNHK>+`jG+gjH$1dOtvp<af}k=7gdB2tbd#gRDfj3;Bc89>?8_t5T3iE83~
zd}P@-d=jyQyFLm3Pk=YC8@EXjhd_|myT9uOZn4Nd%3mK{PD&Zd`B99<sMq3l*ppmp
z_^yY|zLR7F%w+iiUqR}eD>*N`KXX<x1D9+b8+1fi@M?k=nT4Pq=KH&KUrf48=%s{b
zdeW?eKoqq~F&V+82~!`NauG_vTH=7)W48gvt!XdE?bhk!@41&@qe>&Ckxg|1M;XL!
z`aJM!m|B=$fTcPy=<5;K=p7Zu5H4$<(@COLNuibjRKWQu#HhQISv{36S4Or-JA`g(
zV@yLCYLT%E+IM3pV}Fg#$07P|l&yQF7<M6wrj<?%n~;r(Qut?poj?<U)2?nFlPKz<
zX?}vlSukW4&7Vm-Kobuoea~OfLBCXZ-y92TDA)c{*Y8MP%MOI+!+8zPP+es<OH*jd
z$YG}JgAtxd1>+d2lc&1PGLVj3QvD!HQ*ABVCs^W}m><PWt4Ap9T|Oc${o%_D*%Q4)
ztEc0&-Y~yaf59GQR4dPBsff5@^c}r9kcz@m2EfRLNoJD7-BQvX-s>DxAAb~?_fvX1
z{uJ}()z?X>ixWG<bI`LadfA~>31<7EUz+23!yce3WQ3^IVqF^H=xjkZRiHCqMlS)2
z*+}l6iEm}&QNipZsr*)$tjz?L!riy&s|>N61~Yn6-}fBUu7{<JO<*2BMc>GlE-798
zIPOV)Zi}yfxoyLAL!Y3a*jWY>o5hE$S^$YATZIcd>k!XDK|bvQh;|M{nlGXEBwf-p
z+EQUX7>~L)^uz*Xsm+U%=HH6>E3x|HK7`SY+SFl`<fCjb9)?>vwil}w$*)4RcmpWh
zY<Rh)3jj3+q>})t2GS(vZ0*$8kpXp3REW+h0zewo3-yboi`$gTq)j!jfsg*gBD(-N
z>^}i|SbVPH+&yQeJdypW)_caQS+hnAR_tNr4QLRCbxm)b2o@mAnRV^K>XidHGv@p2
zTCzUKOCf!^UGt)-Bw@xrGb2#g^h%x!KH~=|aghwGpSlvOyRrHlkK2t~MD$C2xV`sR
zf>meuLch#V<dCg;nPljIH!MCbvy+a6AMAP$_nlQDzZk4epFaHb*lW{|MJ?_ZFV<S<
zaIDtg3`Ti=?I~yVD>HUiLoS)VT`;N~JYTZ@S@Ac63XQqW$9VN^E4nGO?Vqp&)dko6
zC~eCz-IG!`{QzM=H=~+@#Iu@tXJNBccBns`#(YDi^EaZO>+5CHKhJWY1fNtdv9d$Q
zI0x=h#g#EzXIzkf9ZB9u$HA^M@Bu?UC@y7C+p>3w>}8@&#{9!a5x4FxgLl5akK?WH
zy74jgUH|8$tNz*l93Nfzz%iD&FP&k-1H|`mAEnUCg<qKP*Y<MVFlWdUu!ITAR~`5O
z#+l>1u-sdKv4=eXX2%ALP;^VCjd0ChDbQX`>Lrsl@fkNNn(!zH1~*}Y7rtvxatU0{
zr^9jUzC%|Tk9lU10#i`vQ89Ca<(yvxQizMjdf?c=C~(FvRbPK4!??a>E7kRj!=o!4
zO+r6hvh=O!$IJD4-OTqDHiJ|`X3*)Dd7GUfw3aPrHbT|ArXiOS2at-5XApkfw*7~<
zZ?xR4E$(}@={VzPodReuK$VHg=xb~+@@jVsriH>)1m$vmwWiTpT6<MhrL6R^=~`(D
zSZf0jFKM;z(BHex4Z|Q+nNpe=x~@1ULo<Z3meo!1{x+1w?EIq%_vSt{)wv#>zd7&1
zp}g*fbKO_tsXS;ddkA!0tA1azhOBliXW~+3l_<vxj&ty5NdQJ5M-+?U4jo*ZM}&0q
zXPDg2#02TbdArG$V`pjYI>3Eh79YTpSKb_<Vb`INzALhu_NIK1U)zcth|fW5%`?gS
z^j_Il3`@bWCfyOizzkw~nu<chxeYMITl$LhJ#AvN=X2RsTeO*!XHJ@hmVhSm6MfcB
z!h5AXBKqZUIc&O~ljCAUf`uLAKqLTgqn-`xuATJ=2L@x<6@jaaCJi%lY&vhgTk7ku
z?fHCuHXYKGR+QKeZ-?gpM)ccxQ|AHSgXp2b6$nzTl?+h_NA$&tR%eVl6Oo3%h0m%2
zKM{SO=>J!5OhE06embuAQ_ieo3C4<{^BRoa511C<>P!pf7{bp>E)n(j7V0<_Fp^3t
zf;W%H<@Ru`@1&n?Jr<`VcFm#d1!VQrIG*p~++4@<xRn><*cS)pJcNhtvw3UpL;pi|
znhgJ}Zil}=?uT<N?|2lCrks2GG4|g~_geDGn=TZ_f|{YE$p|A841pPoun~zPZkw|)
zN0^%fA)brffH~wD7b5T)Q(B=*pG|KI#mE#BRC<tYJ0jdc#JQ^Otl!k`E~~ZhrPhsh
z{`<LeCfZOr0SE9^>X3muPQrNgk9KgNyy9106?$i=M?YRhnI9pX$Kur9+v3d*LoZ&w
z^c{+ls*HE?XQ%skfB2!QcLZ+^MY-3_niokj6uW~VcMgH+%UT3OuBdTHGPCeAJ4zDF
zv;8f2p^$<7w>PO;B@$QDqJd5P=*#Ub^G2%|Pg{s$wj?<sEoTRuc{H5uuexMpC5%#M
z8iD<lw7LONJU99HzU}uHBLBj*(WM|hd>+EyIv2@r=MuE7MR!JRgn>5ByK;b7gRI7j
zGaGaY(z>a}z;q`t6^sr`PYzX0vKmo;s)e=+t_4A$O3XPWw(qu52tVG+jE{a4miOp=
zSN?iGT!!ZFk7GU7uCJTU`JnIV`-i_aWAX6z$~;8$rn#)qnP90DGAJ0m1Sm7IKrx!o
zVGAmuK2K&!o%u8_`HUhlKsRm;!~Uu@+ct)LVhhH}nr$t;=QaUe+M|j!a^KY3y+)Zl
zK!r;n#|!lyEKX&8lUp>Q16cxC2KQmji19&~M_PiNM?YEVYaev&!=M*pqT~j_NT%sg
zuH-{@qf;b9I*3oX6JB9abf@I{aO(G*cai*DU7FL{c}M*XX)sJGp!wrs!5twv2Jy(R
zp^(kO7P>Pi_{{9Wz$8ZAfHMLoCfy2L)$`b2hjARPV|zZ=-KiV9PVN1lZ8JQYoA;yt
zt-W@CoG-`z_sc(z=SvB;|7?BmH$T+%^*;U*BqyR@ugB|VYcen<m@mube9o_2dLUe*
z!dQ)q+6D5Vpw`?n8`x$H&<z{T^h5VJ|9LCn;^3O065ypNVkLj5`|ClF7nh_?HI<|H
z{^suO7zL%dxs&n%`}5^-y@sage?0n2=VJe3bm2b!qrLZkukXjU@5j9zi$wIgxvi^K
z8KVPGCmq^qb*)A0OwSeLJqOD}TZ!kZyxCet&gR}g`$<KY*67$42TU$&Yg4Un$t#{H
z=Z2HA7*GZjz|;Lp%GUuTC;5mR`zZEZdnHSu<G$$i@vzy}0f86mOamP;t1SO7=|79u
z@@i?vwL|w_xo!NAyg7>P=-~XUG=)dk)#BECCwaAorCzPlNP=Q`;>hv0_!Va`S4!r@
zY<#JV#3K}3pn^l>uMppUanG36&o)XudfRA`e(b6fvK6bpI&19(Y>jL;TA!LthkwnH
z5K^|POK2x&l<(P^M*^uP0OttKYSkWMz88OjnQ3@+-%)2i9$#|}B+aDT_DJo0Sp(C*
z(x|VG%9)vnPEKi8zn`S-%K3cVI<X*dW@p__S*8ad8jcN1-ZYjAR0tpnu-+k9T8<n;
zG7a(@7&*Pg;QFn@q?~rGR!d;U=!{lbh0`AOSs1l}SkykEy2GyWOT<HRDR}30NB!KM
zs_L}mK*NmBoU!m<S51Pf_C6a=irpoUQw4F}7!-j8+*T&00pL#0#4o*@>A7wNip0~q
z53t;ZC;ATUg=`{uz1haZ8R-LURxJ8fN5n|(=BL4nWfAV2Sqg--9Q}KnLUUfRal$Lj
zwib9fFm0mPrAljiDG<4)(-3s`8lf8vQ|J5XKFGc{DHZFtFZs0`+U%V^vuy|>kcASs
z)S>59dJ%n{O{*aXIWvF@Oi6$&xA^k%vf4H6(LYHr8i7R=6aCwP5$B`K@$<d@t^JSo
z_Sl!aIIvqxifG)-%^MU^#K<kF1hi*lqX7&kkPK@nV~CCQ(p&Nt$hYnWt6|N3)*A|z
z*-A?zGO67QeOc`;OxUwUGs=>qlXSY5J&?NHO{yAUK;JgCY|b(Uh&Wzf*O574p6E;-
zJFD2$cLyoZSwIis5eWo9X5D@j-xs*zYJ0lpHh`HH%f@9fkPWPzD!viDDsJR9p$@Nv
zxi(9h815WM83I_xjQ||!Bnw%{Xj8;=0*gISAN4ES#{Y-FWY%)tT^)C6Pv^_wtv(&?
zL6ISW3xhZY-Dd*JTNs_V6;i>u!jc$POE_j*2u{r(x{v6lBd@Itth0CKmP9g&9ZQoZ
zG6Mm+={CASH?}go#!FARCX|rPx!n%`Me@t3KE3T?GFu*Huko|hdKi8+|3d3$O!<$~
z{Zj!s4#x3H7~R{cnVLVkzU%It8;V98dr3Zx<#E-Q5a;n2qbvG+aBlqB^}{dgxFY%a
zblE~PP}cbVP*hAMQqyPV*-T0%l98DMTgq&Dc)7vWS=W7RIP)Lh^5;+X1Q+5=by_!^
z$;%3dFUIf7o?_@KK{lBAreIG%uj^80?LJ8*b1*IeqkykP7ru{1?Iyvrk!5Z8xeT&p
z=JM*rY1AD}hFu4?YO8az>6qwd3%^;POQKXD^Tk=LiUCC*0tda97t$}uH%lC2m>de?
zzT}CO-=4cmKI2-SKVK<EZxM^hZ@X7LV+7_d9f{Q1?MZ%Z&sDwF?F){KDQY4Gw{$)x
zEy$s2k>OoN{l=W#;>M%<e_*jX>-3STA`4hcl7e*XmR2#^!oT;#-Ag=v@4CJpI(P1`
z=f|lU$`e2I_uDWO|EDTn@^QLVr|MEa+UvOy3V7Y!`}4iKHf=qOo!j-|Zf$+qY)ghp
zG<Jmx$e3GyKx2<(@H$Z$0EdpRW_#7~)oiaCnM&ywUygau{)jB#O2Th`vPGmS`l87n
zsd#<DBJ`<G!_kraJ`V9m<J)2E-F>`_?M-Iqu5Pc({i*z24>}+wvuAmQG|T|2z=_NT
zQ(N05ED``Sq>C<OJC=7E<qqY#Z&@a+^AW@;5gt2I@f>X$hvZC9e|H-5AUvTc31!K)
z&?Q#%+9}YpJKc2;lhb*VZXsU$%8oiV>?b+M(Xr7XUo1b!a-jX99~Vn@kwixQIuzzk
zv&j4jA9}JQUIX-akiv7>2w)xKI%&j#cTdysd`z-A-5vqDX@oIUUpM#!He*9C0#@8F
z&O4b@&XR0c>P(%_bYAc-=m=205zUiw@&#kaAQr0a*Td)f1-%HKv3;DTH*a79hd8sn
z=69mko6U8REyk=hyd`iQGn%0~H$A8Wxs-3~(gqbWvk3{q8P=}574*Ix-=%m-*ZNyf
z9v;Xei4HHJjh|9%QyLY9)Mk1(6Ws`>8xZZUMK8h&^xmxf8G=i}j2V!0^aON^nMTN}
zu?zz<lt5qut{JI#XSNJbJ-pb@3tT8V%vH7s1m)QPqMT)tBb)8Wg$N?1r~_}8E~`mQ
zt<Wo{om7*Ge!HA*+j?C^;Z{lWJhN$iAWaCFHnQHV`WV_R&~_t50s1=tEV@bh(szwv
zKOE7XQf<-dDWpe|T#HM+OX1JDzVva7C2~dcV;{TYslNB8d)2l(woUKr!-fML7MiDH
zT?<!u25%4`Jea`2STjVHGkE`Wa~5a|#Q2#I?3_P@Y#(I(Bw$<bRAv-G==9~3r3BJE
z!5!l!_eH^PinM_`1r&LZBovG`<IKCk5+|dd@rka7z$6PnIwck^PM*WZ{+qnW_lxk9
z!o{@NiT1V4**CNqxj#3T_0Bb<K`b;eVhkh@fk34uZf4!n%g@tkKeO&3u-uo$Pk|{a
z*gQ!p+pd$k!=xsaNnkG8DR%w6>AfV8^=FhmV@NKDtLMW#^yQ$xKOXm?@nam7lCQLj
z$PA7Yh3rdyX}0?`F^y;`CD?NoDnZ?&@hd<&mH;h)l`1n7yjXsbJVJxA*Xn)e_L}e*
zz~ab_q0DV5OvFr2*fYCN<v)#gQ2-%LT@$7ANa&07|Jq5n6gN&>x$3&LZ0Mr^UNbZ|
zqYn$j=cS~N-8mSxAk@u7aX}+xH*BJiAQgSy0h+b9(t%?l+!Qry9uxU++T;F-hIx8M
zkX^WhDWA0$x&jydl)9KkpN2huMnyj#j+;$4Y1wosqb*_p1IDT!!ZORjm!o2PIjn&x
zN+eq`?s>6_vXDL<^#yzUNS|6=pp8Bpw9npBBC^~(8ffks^;22tw@m?~DKRa;wu(L!
zYbXsKu?sE<a}Ale6(cv~;2n5mf&B?WFwfAv+4L2{E@RqdGvwv>v7AFBN)F%3aQiS!
z8l!tuFh(f%SiITk$@11c0(5a;rAugCzjX=4w}d_p)&N!iLB_z;IdX2nvA!u7bASb2
z3m6?N8H25bJ`DP#e70<zmb7G5x<dysT$T7_Wl{SiI}Qd(uBA(v)V&)o5C0h4BT30m
zh&P?~K{hw};nUsw$L@T;Na%7~w>)=fu(Xt{II=uhaE+uo95`T4OGQ4hYVgD|Lp#*&
zLA5XYJ$<#k?$@_ZJRE(?q?EwSeP5suWv4Fp)c#W6h<>?T-%InIGu4D7k|`(Dr!3j1
zV9qNj71@3qKOh_8OvGnq=iKj%`iCUco5-f|EwB8^=|r#gM6X1_pWoQ9^V$V<B!yTc
zM!F;!l7X~0I5xBO5oF;HxuHzSk}<U2q)Ov6;Xi7_kg{lfcqTtcNmE%JvOSIVtC!RI
zqOph3-(onFhXLC(hDrABD4#GsilLV>Zdo>|glr*(W7?hYm7qGC{v!}2TZ++f>tHgP
zQLKZ?MBr>#+vH1TvwA4lWvMSp8A+uRSKw{X--dSklv6#ebi0-G$o}z39V6K^UVS==
z>BQ7^(Z#(zH%0RLa@g!*g6tY9IcAP7NKo)wGB1ox02aZ(jRT#Ov&BXp2rU~J>CE{`
za;iCrx1?5o8bqWLjs(>6YOj$j1*@_lXK`PG|K||n{c(5LZT6AHk73HW+H3KDJY1#e
z`LWro^h|?o2;0nf4M7Vt?C^1Qa2@IG0a+#65@M!ygZ_i-i{$fXE8Q@LcWpLVhV5gq
zc6zIUcz^S>w~dL2UKQ{CtybBX1P*<Hu|<_L0-ROsATyBxx1)lYv?+#t><CRdML>^a
zo&Ac$vc<c$hjECwC37^S!TAzo$A@If%a`e6-?-6-G(<P}_~?q&x`fZu$iJLbJ|4=$
z`rNF$sbvEeh^o_rL2FE8)-0Eb8i(2ZJn(2yv^XPB$@UpA6=X^8?clPM*yr#X!s*2k
z!shVFz!*zEOiVl|c9l{m8eq2#w1|FgHl6MfkK)?{gxB?%9Sgl&Cet%peKzbId$26-
z-Hgg)l>8tcO~%Rmi`l09{79;K(MFMM7-YL$911q;@mkSvdu7_kLoNGGzJt)_v#=D2
z5FATcvRhnH=9MrJP~QWr3PD=N3Mb{jk!Gf&578BL&4^N2!TMySgQ{r3oaF><n)%Fk
z=|~{mNa8KZAAgZHBscH7x^M2H<I9q1z3(6WkDp!F-tW8NYTZ_c>v7Y0h3Ev!SF&Z&
zGHGxKN0cU<jr$%@ol7n_7;~B3Ix@D>#%8SiG_qlp{g||sP6@TNxtfw9$C#D>m^Bsy
z0WuCr8=nWCinr?HA^PY;l#aQv3sScnvk+>_Z{z;aKf3<5w~zMmpg?TXY#_Ng8>(gu
zfJ{KqLR0wlmTaC;+(OnE;0TuSWin}*YXXq*og??QQPJy`xF-+m_?~E0^V)7X2U{QU
zsas|2`mjax^XYVb2c}dVJNE519+YjQ%H_?J1YilYfkmY2n52R4X9T2RYJ*9auSFl;
zIPOb6aWX$fHesams`S9fa(veF4*J+X_x9XW(QmhHTLOA4mQ;g<qMDlFx>za#nWiu=
zTj;Xn(Ix|Y7)I*wdBG+AD0*7Pd8Nxf8Tz-B>s#dhdf5wOd?&GdJPQ50u9~%$V}dA{
z|EZm`0;y4f{7w>T5QSw1HxBhKtmm0o(+N^MJ80rFet|o)737^TXP1W)!Ek%odQ$>-
z!R4GZFWD3Q)HWqt-4wHNe8_;!))06mvBIbz+4vDQ$naQkhDP<M?=w;cY#TJiXB%nf
zd=-TjUpS2i1?=U(zxti)+)l;=#!1#IRPUqMwq423QmcH}(1xIt7HZ5=a~?Q~$VZq+
zNUhOL!4xqt1D1}xl~^u9BF%q-L8tX{m^`kU8iN{17dasnUGQ@kloQdb=5)Mm!_}oE
zPt)STSt^C>P!<4Kbsv%18DtT-u+FhJs#OClZMXD@Rr0|xTQY2mdP^3TiE;B2$xxb9
zlfDbOm#_`8lgcOAfpU^|?+JS^O82=Y?Nsuq=(p-zZ?=<$&+-ByveldeM5jXzjTH=d
z1u6%8l4Mxb>Xvb5qZX#-|D93az|x<-LM5Mu<aC4Hy$&QQdVQ|m_aSu5E;vUtJ)kJj
znvG=wwb1Lpt+0?udM4wWs|8mMz78DyUlaXZzo!omR8;K={7#z^Z8z;jZAJ9@a6GS5
z4+_GD7kkEhPqG7MG+1DShV(>Wz>;AEtdQBLWGD}t+5J0jGs`R0lr=gnZkw0ZE0dS5
zQEgFlK7mRiU~6AIHy`g^9INPBCwtXt!$^M(_n+gfY5P-(_c(kTwH3)r$hPf8!OA8!
zxy;6U3rjiYyp%0CR%sg72)fOBMZ>#sY#wapUVfG_m(?xPL(6&6H0aiovT>-aWNy92
zO<SbZak@*1qjU~V(lzlJM`;|4dZ4EtCJAH)!Eif)Fw&kEdlWA4wQjCkSI)603;895
zAc!$xI0IYFA!kb9ftSR{&zvF72pLos8pQ0a^w(4l>cDGP+|}GRRmge{0dbIdz#iNa
zuU2`jNqdRqB6@Yco;Es;3AK62s_67)Bsz0GFzKAZF3VOILk1{FCcrS2G_=Gtfc#d4
zeC&|~$RcLu88uZnNNZ?e^a>c&vrVw<RVQts714O;l__gu#9PW)zSE))-Ij27p$j|m
zdRjO7N5D;cq@uc4??i;D>>mVXud{m28R8?U7OWArO>y+)o0)<5>*{^Wqj?&BXW{&(
zy-RjO7ZJU>)yEAe=Lk5=a8qEmGu;=4EtXzjRR^JGIDo;r5oMZ#Kzur|Mry0x7n3ea
zIckffDcwbNNs1+CZ@uyWc3c`}#Zh`Pj8O`CO0*kv!S|=rq3X-aO%OjIjlCPY=70V#
LaXpsJob&(yi8r1B

diff --git a/data/MT_rep1_2_Ch6.fastq.gz b/data/MT_rep1_2_Ch6.fastq.gz
deleted file mode 100644
index bb7bbdac117a0965f4a41b71f8baa2bbac2efa26..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20037
zcmV(zK<2+6iwFP!000001GRnIcI!B<?R)>me(2p@RVm*jOSWzlDbaa89OFEm|No0a
zU{1)A<0z}D_jaw7<=Ap65J6lfCcxqGe?R~DKk0nD{J+DYDsI=?@pw4ij)(lXT@UAT
z{a?`~@8X}Qf4J!KAMaE2@+8HS@>@~fb@{2hEpOxtQobrbb@E9U^K~hICSRGK=F8<L
z<agu({A~Gn{!#g7$Mlci{xQqL#~7zM<S${4^Y|I$`$v2{VjQP9$2d>Z<fmAK{I7oE
zF+QeIJ}~EN9*^lV=6Cx1-!zT+zx=uRG0W%6{Q2=8!}pTsujRG6o}Z%ol>c+`JIe2#
zf6@HC%9cfx+mZid;qvSGl`N^#R~!Z9D-MEtxoPs*EVH~2^?v3nMD)k^kH~+s2oG;h
z=k*oy=DPJ8^CQgpe$8XZf17>L&)4#Dd`ccwPLWoAU%3gMUm-%2KQXUX()ETka$}<Y
z2nXqs(*>6sB<l?IkXMr2wwNU!hcW;0<LE;e$Gmj;Z_K}WUe9TapV9YO6Q(J+n_QV+
z%&Q;5)DKl(H(h=Vb5@tftl|e*<|+1~BUyX$pWs5@g?-7hx>xHpS9KThWR<wuKI)yo
z4H03Jh~qprfT)7JBJ!%bJJo<l@$F~G=D{7xl24Cm(qDSc0%ehgY4GQd`@IzreJd}<
zLG^l^`cvEA?qQIdGz{Z?nyM^eAEvSGy}N}WzuI@rP!6~ASoXtz*<)Yw`g|>l)kaSq
zYN}E}Pi4jA8~NfQgjFqeyvV7h=vJ4j<TF`dRd&6SSLuzBuPQmTHbQChLU)rsWUqVK
z(_`MXd56eF`Nhv!E`@?mwk>3<PqGzazJb0Ktr)pKru@mcC;91iyp-otwNiJed0w(?
zud-U}Ld%Njs;i-rz0CbD*CaJ~d_7`AC%o?P)4V8vEPB)@{@q5Ow@>EP_O`cPHg>CT
z$Tiun<^7V)+dPHw(TCR8-etY<KZst{$J1qP<3yEZKk5=fv&CjPH33eoy}TH8_o~!U
zU)N3S^@F;z<+Jz}3L#cc-pD^nMbDDV%R(+zY<k)+`RgG1L=B-G%S)X<hu7`SL|mMo
z<ee~PXAq};PxSMxDvIK?(RUc~2=Qdyyb7rDdN{Ed^@$3CE5tx}y9pjT5a|=uJa{z+
zxSN|rpP&D}F7>p$Kd-LJLW+`4Bi@>PzFpbe>GgT1OxMt6^&j)oJ;^Wm>+y0b*7^=J
zE(;f7u9NOo5385;)-8IU)ZmNM@=(X7rU1U&EyBu`vUg&1-|72S>DTw;DKNkIuOE!l
za(hN-Ne&q1#}A@kuGM8FI?Ob!n5w9LtBb6zE;-x`I6iC$(Fyg|dZ~s$Uc(;si;NER
zK6tee`#$||Jyt${GAsCl=tbV|D<h|lIKlm@`I4KE)MtA-bd^DsUfmR2yI1?IAH#xc
z`~n-{p#K^SsOS7qazD3zsz%q1Luj4vL-y_2*A0~z{_M8159lW+zMzV+>0Q@_Q0G6_
zuIc>k*14-MKE`%W@*+R2t5r0UL>l-n4_!ul<FJSItJh!-%cz1a>hBq)(lS8NHRO}K
z08bB}E?P9?>pDM4;3Z*5(0}DiUbej7r@(I~ar%=j?fh6w0+$y?K`tG6tvt)FeqZv_
z;aVRy)?G}pPh3*9&-zQtosiuMJEy)MyICxrLv*9s!QH~RCpx-|WYMm3FKhlKI-Td2
z7uDR+xIg0hQ}71smj;c8<}s%R`;wQZ9KNsD-0K1|gwgHnG;mSZmf+ZYVr0&cM(nub
zg%DhgnWJfxpMrqb>4#*^!xV#$Q!_Q?&6V}gHI4U@f7GM%*)dJ`wrfJjfts&>_oEv-
zAKZ}5c$hz9tmnQjhw@UFIi#)HTiNtu*7C0J@-NXjSG2C_ZhMlKx8tcguN_;|a68d$
zsZ`0b4cVv~p^4#7boR+}MPcCdQ*>f#_&x7{7<gP3{d3WyWL~;;=Skx=`8vhb%8n0e
zLC)Ylqx>6Wvorpkqc`=RCwTq*=~@1UIQ<}cUiIr~9lh&@)C^61y6tgfBuV2m)p%xa
z4)4O#%Uyz@*EiJXi5^7UK}`!HY}`hlr%31Y%+_Z0?RVYVS&mr^n>tW8&DlKUL?_<m
z)lucn>Ya7G&i~Bbb+>79v)i@qS@gQNTvmxo@{m|8F%vA6QgaVP=X!H!<q|U%wc5Bc
zLf;+CngzD{qNWD<`D&|g9MS}l^VdLoHZQ@<w7OGSVtr2*dYW-}aM|2GrhCqU^u1jp
zSKf;AvA(VYxnzzQ8&r3-=H-Z25$&jNOHAiHOaM}J?*7P4uSpx@A<b~*rtUkot$B-k
zZ@ykJ@dfOcAN0MrREHHd;}Kp*MytAfnx7y%(R&d!T6KtbV8*e?jCfTI(vs~u&EygE
zC)FpxX10>}ypOR_=f|S<i`N<@_g*jiY;WcNX*Lz1S12b3wG%gc%&BbrLEVe$dR?V0
z2*gx&<Qtw^3@xBY&RjU7RaJinL`w|=9!b3!v;b<OcJMz8<KjjQ;$sw}xOz=JyjX_a
zG(A79uGRUx!DbM^F#}cCO~0kk3_^RRCJ7R%1*CaL%m*Zk=1$BWtbNjOa7T2LU?a=7
z_u!e%jGG@YOV$?l^OozaI2|tK%EWP90k;58a}X@4rB2Z9#C;qQ$1rCFHj!|vB7iB~
z35<Z$*smSi{^MrL_zSn;&Ep5jt5X(zi_OSyYMU3k)Dc?K%#7fRdMHF{03jW44E1@c
zBam9NUpT@9clcxM9+%{dB5*0+Ub{RC_%`}c?0`i1dy-eT%eg4dn~kn36NxOS5RR$~
z96Aw(Ucxio<+{W~G!pg&_RWInAa9dB@|1joz5KmLTz#GRg&@bi<k#z|+JteMt$B~7
zQswj(h0uKykI-n{8OE-1LvmnsxC5T*u3H9s<qP^eDc+FRtYa$DKYx2X#_73}amUo3
zKX2dkz|Y93j=UKJo(j8bUgxakc)YH&7KH1%IT4IT6bd4vIxK2LH36sutpWX-Cpx2k
zklCs#M5MX=LEG2S+#=f7hxk9g#^4d}JrtWjaJw)0rM|2!oO)_qKm}hFQ4x<0T>waz
zRO_{}qG3E?N5m=milB0G*VLhVOWl&=4-v@~jQb3I=o{AriAZy%Qcr%C#d4#oy}R_i
z8(jC{?)e%&Ww(9pb1u`jy*q`R(YT@xgLA{ZKbK8&>6+o6zjxj7aynhz@8Nzt?nr(-
z+^*;Px{=&##0vS<sUZ{9FvfB?+!PJeJbjji=EP_K!RYHzgCD>0hslCCT_-^l#f5nb
zRK{83miP!#Xvk?@&OVB+>1&PJfmY|Yhx|<#C88fRq|eXmw)3OEmQs(%nfgBNe#~D_
z$75M-k`}l64;I_eklDYHo}5vPMl24=sK&Mm?nHHPj9pWrJJ9_v!27JLt||&Zp`8S1
z&q?+SJIy+Z+=)qo6}ca_xdU!gR7_HC!l&Mt*v{jaf6(OO!{vW}5WPAUr*+cen4H2I
z!6?BgVFe?qOWgKq-7HPPWn6v_g<${-x39QXz7_vb7#H8v*0w>MRu^L(epvPLe6Ejc
zeb-&B8X8eGG6@V|Mx+B@rmCRZAsYIQ$OQ>qS~7N2s%RNsC4XjI-wL$(?rUgcQ0=_=
zc2DwC{#w_kRo0^NB31`Jqahr&JJMI&34Oui1?HbvYYhjyda+0-6A6zMnH``RiI?6i
z>AdIV<AOzaJjuJ^(*M`Dj~_%opDw3$T@@}W5ovTLq}~AY<lOCM3W30Af*Og)Mvr(^
zqXP{P5Y{0J`l|1*Ks#g{M*(UCL!YN-QEq*k1eeI^T$tvi=$yB>*Ff&!eh~dy6^C_=
zjb)@rC<>Sx>Z1xyE+8Y)b}O-XR-QHb0V<E%#=;s<J3>}#(Ziy`R$Y_T%r?82Ag~HG
z&+{{Vecq+GD7k>R)2KD&d^=_>Q2wCsm&37KZ*`>L8lTfkt4ziQfKZX<8#Q%}Od(aI
zmnUO_`D`S~0N9m(v*?RPXQu_lj!(RGiC9*fJ+8`;`Drim3hp}fE;uKIjK06`Nq)Xv
zi@I3j6GV<Q9s#(jk%&jKf^wVP1<%Yb5|GgfEKzA?kywpgbMxc~%=)(_w)MC3>zkX4
z);4aG?Jj7XqEH&ddeoWbeIs|d9n11`UB|EliM;`guu>k0m%3&xI(W}oL$v%Qa;Hat
zR=5F7aT75Nb&wuNB;hvo-;ZGwx<ogP3Z%K5VvjK|U$5`ZP2acqCr$#_YDq!^Az|8}
z^zO%)6+B81m~ZSXrH4KTe!E7lQ2SnPR$UiT%MEIw92pT2B~*gWF418VnOa#pO=4i^
z7XzqwlHhFo#S+^)ZP+}fFRwi9h1{fZFMJo#uXR-xn^4Y5;Lv3hjX;gTk`sdW1aS;y
z*FDZE%~D!L+6uFQ%uYV|Ms&lPCRxvBng%H{%};1z0q53!`z5~#d@)};D0^mK^x|4w
ztL>sIyzY{P=FG>)10mGv3B)1TWkD5Hb}hG}21OD@LL{6V%T+%t`a5T@+3I5RmKs}>
zSXZ`)0{1yGp6m|(WZui{)T?r{)f3rK8jzwo=vhfu8XTcTaG(+?V#&Beo@q)VoDor|
z1-8pO`4YrN6OCzV4B=rc8C}4zy$90<aGr!Yq6Ti78mUCG3_0X&zZ<_-xAJ<Zt{aHL
z=za=NXf}eV9Bxd}B`Sl0E|HZ(rfl5U+%6=C5tH|<O0Oj!CpS%g80y|-V>L;gNka$G
zS25nomdOA0!;n)l7Eu*+o-MG;m-KPyvXSovsrFOddw;vQ>z?Fwep=Osavj8C2eW7j
znw$58WD<#Ramj5W*ng<Nz@g(sL^TctMWn8(CUWl<(Xln-X7!(zYI!nfa1x=+^Aow5
zdJt@)fbBw{6!Va`OZh-TxG#EfIKJ$3XN7-f2sA4OwCJklh-QJdc@|OO@kxw~NY{x%
zMxxfpUVg@(U%uZ*VdvN9TB7)2r)SA8_4Tk0VL{|%p&0ZKU}=ojjWz(U<yl{}6xN{<
z2eS|n`zl+|On`T<h@36p^A$64)3+$)yxHvr(#Bzyj9zp@_WTc$m-TszOL@mog_s+t
zU5wW>cmrutSyQ&!mdVCvny%C!N0Xn(3e;`#8p8fuMf?5+yhQU4<aK;?U-Ig5El(TC
z5zp8<>aQPZuVN$u>2<l0%cLhcI7OpYz~do=!QG&vd*V`y-0yUG{CXE!^5OF3V|?m}
z%W<2qXo?)s<(RJQvuDrl!O!DfS@m?dovZ70Q)eO_l)tq9-(ZdX4Qxgt^v@W#sp{&9
zfO_@<_=3~C$DlanugBx*x~0uHD;Y)B*_0>cvDOY~I%<$8B}Y?p;LfNki9>e?h2f@1
zrczwvZJHiMylA|j(eNcLn9s)iien6)65vfrEy!MQyyqbQ-=VAjS$tGo^RMINLpJmM
zt#8U7L_djRU*|0hQ~@w1)Q5md5_2RW+}4?ph5kcg%))M`k@wgM$Z)}x^5fWdA%^UW
z!!QitcI$iyLkL~h`+oF?rmyO2cbk1b*E#=eTzNYDc^U40Y_IjXJ$9$_$E7Ozrk`W;
zG5PM?b@%3cJani27r+^aUTzoN2(Sba7sw?UZ7hN-;~n|n<QhC9diVhc?9P#n0Dwck
zeXH;9$ooHR0%EdKQqCE=SNt|0>{G|^gXHJS@w@>eL3K77v_PmZT|OC_Zk#cfjOXeI
zOR(6C9uP9#pW!<~ps4yTO!wg8AmFl(pHtsPEjdI#_~3Hl9i463e$+Mq;l@ZnGm||d
z)^TdI^JBi}Y*i8+5n1TpJ}18)yAkY3sNdD5zNn?L$PCQ6=Pm{XGLlCFs;KiusX`gT
zKxtfz*Z|Ot6|Ycp<EpQr7ke_vqAf(gwI;;xl;Fw#1Y$Ejc++-QJ&RtKMSa;o6lS(O
z`65td1}Ly>o)zV^YT}5C>4zT&OHz{^Vsqq(uJe``E!Wg#64#}`(*~)Tr{`7g*fXP9
z`ut&`OoetYPd}aWsuy*=vT~>jf&mLe%@ApQ$`UaFdChO4h3U>J>52St02R@qKWFhR
zN-k1raXu7bMj>U+sh4v0i9tT5E_kmQ=&TmKf7Oxs(F$vHR1h}r7Kz{I0CwqhoI|xI
z`sH>k%F}tBxS;w?;4m#9b7Z}VM4$)=3jeX1YBZ9FH%Ua%%2cER0iu^xmyJ4RIdkTf
zx8|&!0$IoqBP-(LlwZ5Y#<fjXXVc#|qoi$N9JSNMOG=k5LKeKvccved0NZEbi$m4j
z`j5`-8+ZYx%FAh+v%zh9m}zTWbF40*=B#unl7&42{-_=fs|+#%oE~kWhJg&|R<WFv
z&ORka9ksb7FQRl~1=uk)TVm91w{aMh7A#wN(%znFDyC^@q_0O-J52Yi0z+^gAQj7<
z*+bv6=;xxkoVLC#!dqKq9IzBA@CL9uFyl2p<t9g_gMy^^1;5l+6AQ79s`ACR{XJ&P
zQ}gZLxW=h>bGB2Nzh3J#jbbrwOWOvh07y<97gU2gPx?Oj(Yo=?C2FMwR7#*SBwL|Y
zE&Bcm=tW~Vm@Zha&sn<}0k3Cs@bA2y+4pU34w^d1lU?ICIhgIil9N5t{UG}JT&`I|
zC@C0a*%C4o(|I!@Ri6ncd_M3Rlvv5H^Hy^~^a}b-B>OuGhAc`g9(z#Uu4J0|g>lki
zagaVuG4}bTvTt)prx-<?rF%tcP;J)fW8>TnjbJ&oX`~3>DhQiHgnr)D_wrUAFZEX6
z4Up4c(_B=H&Vm<^ToLQBam#`0iVPNHNf3sOUO?8QHVb?~Bj0Izc+upq>it}6l%Wiy
zouo5R;P+`acaf*p^I?NVIu@Cha1Ajn$wCQXDyqlH5u!qGfFp$t?wmnkGLoE625K{m
zo$Xf_me1^+Qh@@BiR$j=sgV{4#TwP8OSR!qOxeb_;sAvNpKn0#oh<vK4><~im7Fea
z$m;Ne=*6|FUwb#x61W=|%#qm+5edP$NU#x`IuWW@{9Y?+sK-PWqCp&sueti`vNDR9
zjsI+M`Ww~2!wd1`vBsw5uI0zDFM4slZb1qLs4S*{DUiVXb`Z*Yvb$M%K_=rZy3s#I
zpnz&PmTa--zc=smY^|IWs?Et~$YD-iM<v<J^DXD>65>d}=6V4px|kiiGT-x6{V7~s
zbIr@|_xcyJ;PvISriwBAGP1w(@fpS@GXiABr8OL#jNpvo3RuG|L&YUFPz7OYkN4WP
zgH|4G?TKvYd0@%(J^hE?e0i`}UCWB-|B*iri4C;98T~-J@JyDxywn@kP>j^k1W=XS
zFSal!ecxMLmPqK=y`gT+vmxL~r!faWvQ&hLYkjuPW7~IQ`R}qSnsZZ1FIHC`{&o7Z
z?)~V))J|h{|G52j4P6$aY0l++c426J4q$waV}{{h=i_g+$R9&B`oF@WI91ZKdR-Gp
zP!<C-#t}qR3m#c$OSIsY&U(i9&RK--7j(*)hRJ|AY>n@cDN9M#X8jkyNmGc8;omPS
zpK3{5s`F;214u)+9l~}(sma>XQ4-<5o>maaN?Z^;jeM0vkev1QTj_4s#64DdYWP|7
z$U23UUzXBU{`-UIMb1|@=sQ3rG%gZqK>mf0-Wzxmt*6I2m&g?Ko-|PsGQf@mu8X?u
ztLXEKEcU9BXUtK*d(76V{A_-kABx9HbJMt2TRk6cr~L9ta%eK(7*FOa>PXS-9{5D$
z4YKkICn$I&dmadBKp&h1@VxQ|BzAndsP9*NBDbo4#D4x1Tc1<)*{IO-?DqMvS@NRV
zFvVcTBJ4Y&aa0%>hC(Hvz^t8v&Tp@<4JeX5U@szV1ER4OTuu2rOkuI#Lof1<0y+$j
zFicuBp2ny*TfCdVwK+EzlwBC3;?iX##W=OKG%ogIPI=|l{UCT%pAMTy#z<`*aH6Cv
z=(4q@J`d_Yw3tL0+V7Z_;SCWiId7bI<rXWcXpcfMf8Ez>Gq(v@`S!e^@3<Pg@c9vb
z5WK#gk89fwDw|`<r6xW)<F_wsiKW96KyS%gM+@jg)+C`0-&;i%(GGYs+rHF!;&z=8
z&P3h9LX`nvGg3y|%+j2z;rjewXxj>hdO#d7gnK*eL6K*vIvk31%c>CvQhJJS92f>I
zF4A@{WcZjC<{uoH))$t+CFsaE9zp!oKDO0udB_sRXH8;z#bW(NaOq$G_T5?*T{`$K
z+xRtFLCL}wVo;8n?MRo30NRt|I2m@XK>^&esV@SUaf#UM&!r~Q=Jyu;{YPGll=E(r
zN0$6rAFHk8SiNM_ki-l<k`$rDA@IZaCvvOXPIrWAJSq>)v6;&ekxx(B`$IGJSt%o9
zA3|iN(#a~%MF{x^2Tj->V@}d?6d!dkPY&DST{8BpPUEN0-qGYIKQY|z^GC?P=B~Qy
z(3MTYDm4_Q%|#o$!NE~%RpCS-LXY0)&W1gLS%#vl78@M-6jomSEA_><aMiee|Cz_F
z_JM!=;MGpI^R-^%khEVOB~|U!Vc#K$UJ1<)7%GF7PDU$^?aRcWNEd;9j7EY*iSA36
z^1~2l6=Y)KDWObuTmnA#(<4Ym(hL7DJGbn>bAC3?z1PmgX)Z-G`oT|5J}HU*xWCa)
zw@X=U<`H;nRAOSvGV8*8VP%ChI1(O-TH0QQ?_(caveJZOlE1&v*K7QJR=)Rm{)9cr
z&w|U;YwOMw3lurFv!o>mQ3s|ql}Ij>u^z!O0eXX|pX@SNKxXf19e=X!?;dYF{kv=M
zk06txDL;u`*4tGFuAU-kj!a$LQYABH9Q(B_A#+P#Gg^aUwCI;TzL70H&ZX6=+mN6w
z`8Ph_TniOnzJEx2b9^}WHwgbCdX*oW%4%Zo5@T`ra>s!R&tjrS?;36cV!nk|VJ*vG
zo*?K0LZgj=YlRo?LwL;HIA`0PH?}}(VDC#QWfo9o98r+azshx16rz?{c~hHhejKG{
zlRwqne;?d0i(XyM$L;VuT6af+B-_yFoxJT_+BG@?BgmS0vxGIW<qJ8U-M(-1eg3-c
zeC+S}I4-K7!>9zEls=GM&vw4m)ur4>4)n=!)d8)b{z6|z0vPH^0NA3{l31`nq64!?
zRau5{q+Q4MjMQXp2MktR<*LpuW4Gr+CrqhF2!OR_W!d~hkUL*)r{Z|s4ijTK&ITUG
zVAR+Uj`c(9%Jp&aVdRck46MNnA^XS;q7@iL<CNDn7-?5J@!}M`@B6$jg3ba9vj{GJ
zPUug)!fc~V4^+x)-*n9&qb22n2+7Bso=1m9k2!>5U-F`Sov7vAG9hYV%6SZ-0b$TP
zmdZhq1S#G)B_NO`A{VUL0LIgvR#|<fEsvE&UX2UWT`sApHtEj&!~{%;{c;m}of#%r
ztibWT<}cFM5_NdqG`nV)e7<<t8>4nE#Ke_rNGclSo>*vOy5^X5ve5@C>M>)(1HJ?N
zgs>H@o9yq7$y)av?ue@U{Ee&nOVq%3FWJ17eRY@GVtqNT>Hd~yT4qVckoN}J80*)3
zKFf&=@>zae=)#F*DpE;7xB9ZvztY^P2imtXJF+9R0(W;|>EML!+&Gj)kZYgS{W_go
zH@e+%Ga~w_D7O8&DY7uOm^C6##9k4%1B*i^>J!dU5MqIvJ-HX`*C&MezSH6FJf4eA
z&a{%DWzM8PRrBuXkt}+BI9xWg8E-I}Bk932We3*G;0%=~LhnQ%qph&UH1vP(_|)3I
zC>1h~I-2Zb>sOl_6x0AaJd`t{7d{1+OF;`75%o_uWs7AaD0IPW4+V=G+r8PS=lrxh
zZh&_bnAmxk7A7#~EIs$X87_y?s<FV;u8}8%?Qb1?R)2eDt5j-ua&c2*c#OS#$C~Oq
zKIaZBJHp(1&S9mGNqm}@0{WyA+0>nnp4FFq(a+~{Lld*TY2gUdEIMjt@6qNGL=HZt
z5%(6Rf+Y-S4xq+DIPrfedSBZyG_y!}r{BSr0j|ogC|OPey3ok%hPmGbpS9#r9ycMZ
z0W$Dguukw_q`~sR(Q%2$6reYjj!}&?z&SD^Mkc>>D!(`6PpSl_go=0bUYgrNABODA
zM<vH<o2D%9Ja*UN+=qO@bPob&xUBD?35DC$cPTmKW#2*+KvPI~aL!z|)>Bw8i_f5k
z%mUmPG{cZ8!(Ja_mtw@E8VT9zIv(uvvyJEeP;^7rHT~_@-EQ^mIy8OEqDWxV%`w#B
zI!(@HS%ZuPl(^>>W9SMeyo+JF4<UvzzaR4F^KU%3T5iM-(R&uXDi77hzT0Fa?r^l}
zk|ko?P-D3hra-Ji9sxPRunn;2X$j<_wd)FqE-<=G6X;|77;b$I%<eh)X#aH|{&{Y0
zrMr*CaIK5mhrenWRw4MVEWI`;_IKCx`342yip6FeCFPOU=DGOjvx>~Sy$+YdtvIg7
zD;X6sVp~!!Oc%klEm9&pQ?^JRAXs#sO?o63IFJGX#B($oqW35qw_(UDl-Jwmh0M#C
zH)&qJ?3CR!-6S&W$6nMt3|)H;rSpB4KhgRayykD^oL{&NG2ExIZQFa36@D6T;@Rt=
zAN^%daG?R$#hL+4h=L?okGYo7PzvP&IqT_wCnsq(f*_YzigKvL60iWi<;gwxHAF6K
z*-@e*pP%0thE)b{>6<vqJ<){=d%bKK&<O(}qbISIBjnNoLRlta#4mFQ7{vlTiP6f+
z&Tb}x0tKX{nm*Rea4O4d-8D_!6h(P1%c}k(uWguyv2|^AJKekU>2@#ge(+;;%O7Zl
zcI;e}7vD9v>RcUuFFyV_|Mq)%7r^eXKonW@Qx4#_*(wWbk?fzuaWl@$Rb=ySB=s|~
zSdNfAVvm?Z$Hqq_1gha{_u}gD14A&bhK6YY0)MrCQA4*WTf{LNzI#@XW*X}~$uGC^
zT-2|ECkAkyXe4~GL(5h&sNIwGhQ1d6{A0KdNCgq*52R7$6#vgn0{-F~*KF}!ZO<v&
z<x-rBjbCH*9+`wBvvSFJjv&imP65zofNdx-A`CENPMVo<$&e2IqUiItJoj(zz;ZS5
z>mNjy?#0u_yrUDzdk%ina;P)j03DvQX}5_JMH5kG#_*h#h-etTAu(fxc^`f7Ijb55
z>BSe=L0F$MZfBG_@sRiSIOI6RS5>wHAAg)q2ib4^1j^JrhP#aVZ~J>MG~oQ7L;(4v
zLa@7b486~me_!<T;ZUpyLy3h&rYkHuFnNv7l-xpfN0@nG(Il1@!H$lYv4$uJZsR{5
znYdZ&?{oeAB3qWcz8<#tGmk9*9K}8sk8yhbX~dA$u$d<s5!N#;vpNgz9MT(-M_l|S
zSB3rhWgfJ(YMP*5rQeTbBu&4b*Hwpw_LdgO90qZ~V@JlNj$|5yq#n_lV}&hpXa~4r
z$ZxFP;=Z-+I?nX%`fbF0d1sDOj{m~f-QMpOTrT<PLAX+{R4MqWWzIy5MwLt?ayy>u
z5_!Y{$zv$IC**Dm;~U1n5c|y-3Q1GLgMEq9S}HA;<)sVKwKpm0bt#@s!Va-nG~q$s
zXd`H(oM#apdWy}ktMB>gHM_U<R!0>DSUCr5d6K4<{fu<RS?;YJ{E6_RN3~@s$|f{^
zEa+5Bi^3x(<Iuxtvt)|W%6Qj#HWVY@+D|q3b9B=G@c?-uED9mR2%M5?6p)^FORHJ*
zsyeQXoHkjyCFwA69V0eu<!F!t7ZWvf_9!~|K=ifv<p&v~$}P$2dc@-*E?^gqHXgl|
z$fe<0V;FEnb>P~A6LjS<=B#|WoNt#>#@)N2ceinO?8>FeTfMv-UJ$t_RhjHKJIVwg
zqX%~|k=2V!Jea~PDmnJN>Ze$)WF;c`pW+#=yxZ8WxV=CUS0PYof57w6i`~nKjF*#d
z%H6~zi+(K*hYei{*%`_QNJmjLA;CRqA1XLs>}*=t;Z{7im?u7AT!XUpr~is;+m;+x
zHO9S}YWjF~yzPmu5XIrL9rm7_r5+rr0}zV2C??7QStTY5lbD$4kOgZz@8oR;g$A&y
zQ`+N!7R=8n>Y-dZdn1gRzJpMZwY<X_O<sGo#%>%uW=-VGeg-v6<~~oCe327WVwCCm
zB6L7Y*iWLD_4Q?#8VFFH^<)f;L)SGDRBKi84CCWMFJO}^>^N}=;MB>;@mn&%#+Pk0
ze4ZaR!hU*v4>j{~_MHoZ3)%R4fn@Vb**fIA<QjMH`>+R0WzkR9?FbHspbh*lmPirC
zN8=17LtngAToY&Kc}w#WQ`1Dyov9nhv9DYGZRK=+Bk=BH9%XzSfLqid_ZoXHGJ@m0
z4&{vEi*Sr>X~vQC2F@*s3PBtPG;+CTkw9Kz%uZa>hLim2toQlxMRpthyR@`^{h&@9
z)%L#~$%}*btgcIpsJMU;NzQ+wS=THL&0L9m2!NL0*;wl;PAOuu1Lw|pu*dQIS1K;#
z01uCOHvC;%v)A&Q85}3OL<aoNlEO6T+1?X9e=Um3^(Bh6dKyP-nAviuyV(Sl|6pWA
z7X8oDAD8ptbhwsX(+utDTAs>mbne6O(baWzz23`V82-H8%6nCQ+-~=eKMy&}@_A$D
zoUHBod)~{#C}mVR!b}gtkTV`)D-{QsHWcDsV~>blRp;Y|EB2S5D7Ozqd4r-@r+``f
zZq?^E-@e)F&(4O0J~2<v%`uTw8=Bo!Ki$gX>GD!+vbi~l@G)Ys=&fPN!G<d~O)8}-
z!;;El1P4ki-WzL3BcAxSXZ20nedx2P&WoBOa+jl%R*Bz1Eux^XIwY}g^mv|7&assE
zCdY8@c9Rddq4m3vTyeggFXd(+Fy<zZ5JmPUN6zSeHUJjM2^Jop1$r7C+wjRJvGz_H
zPHcUh0xdy#$5P@vMaLqUuwG^!SJY=;)ob6i8+_DbUc7|o;0?~wyBXSB2>lRrg5bTE
za3nb2oSX~&UOQKo{8E)01R8i0+fy(%ZIz~gI%S65$R#YN!V<uNQ@=Ibp}sG~t3C7n
zOP=goe(?PKn(xl%p5(>tSRbn6W-9K}p_3Gm2)#dP6GYwYKZLJ|UA~DlvZZqm=?#7!
z!hx@(P^i3q;Q-M1^2Wn*yf=?m{xfdCE=Z9@uj}LSw29-Qk)SvjEzu((&LP{=Tlo;E
z16jeI8yhTtZxJp#IZ?6!i(*=qPZF?sQvLOET2a1i#5q0M-r=&X{>bsEl7UDul9J5y
zk+-%K&gHWoQdb+Z(XWPoW{Gai?YHK3-?i?gOp)7kbNQtsnf^^(64=f}p&j`HT28^)
z9hlI=#Re*otR;qAU}XDRoBBJPP}7lTUT%+RWz=&hGqNXQzn(6V=h*%5+N;YEX>3h2
zAMb3cj04<)ILAH7{0ZiMfuvZsQ6q_406h9j%C>G=5D4b41+Vk^WCsc1ng^6-*T$8%
z^YwJy#Bu*iwFVpE<96x}?VCk@d7$@Y+vzd3kAS{@+Bl^b?9&4a&C@T%i@w_cK=^f%
zcN1ydjK~v)Zjd4F8LneoNqoZ5h}*0^%yFvZh7J2Xjl-rq8;d3<pP$3jgZMY2o*IU^
zrN~|x6hRst9$KCaGiHOzN3X&%*cbhhqs6UbBbk}GmV-RtG#Vk4J#5RU1?&r}of}%4
zh)*Wgr;QfA4&zL-nThBLWtayW#mz@y#7tD^fZG#st<Fo;0~ot3bkpTH_wJ8<m#tkJ
z+z@X4-V}<WzMZpq-_WHTM!Pb(w1!mUj)5lumaJ(#dV3==CnLx?b9pIRFKA+5`zto?
zi`(9Mw+%3@K%U&NX^T?rNM0WFtkLzsWZo^T-9$nxsqY*x=Iult9Ti8X4T%u_db)lv
zpMl$Osw{Af*V%z=wAD!@qYP^w^}zJ$c?!*fNoA!_q+L{e8l*E@0=PDeq0Vtzj$+%7
zaU6%~?t9-3dxO8T=v8sqlpU1E$>KkvIa>M1Y#r<ZxM$Da27)zs8jXh#tw{pYcszb~
z+>PVDJm&4cvZmol^nU0F;K~=yji;h+yIa@a#&g|W?oF`=&6LO6sV)wiaWhP};!-}h
zNQ?vSF>Z!Ad*a(Z+2{q*VPWu@pIbW=1BGSH-Bl*3>6|nZY*FKBCEW8iL<{zo%N`8+
zpd3>l1kBPy)u+9zB}-nNPSuNFQ^Y0m6kr;$XoMLy;W`#qSCbfs+5m0@d5#7W2p#fR
z`H_;OhqfBJqU(z4{M+xR%Rdk2;$Q!Lt`5IR!_mKGwB3+1a{q^K8h0#zKm9orUDL`a
z0y!MrwaqZ}&E=!L*LNJ(^4GdN%G2#wRGTI~j?;)sHOWRP>AD+1GFqb#4q(F!22lD{
z2SAd6EA%X0SQ5VvN_|b#=k<4!RT}1}il8V3ZjG#d{|Iw41;4B9r(1Entc|-COpL$i
zt(1&HX-cNJth*3!@^*3&JD|uUiO~vG9?jX@AU|}bzs%Izx#l6W=H`e2*jdTt5Fe)v
zkdw@U9{1q5QaXDs#rd#F(w)N&e)c*zTWsCL9{oNJu-r09&(WwYiK^9H&@cdgEE-uC
z(Wjtf7s)+54-@T#@fovyAG;w8lAh$uTN3f_4=efhu`l}hayY#l&*jJxaMq4Q7@gq4
zQe1-7bB;khX01hbwlf|AC4=)qJu&QGGw!T6zVGT;5oDemG5`FSHb?ABp1s-zf(59G
zdJ4Io5m7h`N-VD=bn|iWf?MSC?9z0sS3x;QGDP<GJp}t&5jWYUUuap@{;#Om&ZGlm
zcX>0VoM&;_98$$-Jy{hAON%@k)DW0|9I=Lm20izKbB`k{G|BpNjXX}1p+DHDusR;h
z%Mrkou+;ft(uS%5*~lZ-P<fGoJKD$g(BbBN8+<>EuF(^$rr0XwM)TDCZ6y`=ebKKu
zZCN3(0N>cjq7({NmaKUZsR+(#afVw!mjTz2B=0#O5&;cqMrdyQAxYn@9?t7GCP~dC
zRe5O7_8fZO%Ov1&a#D-i-RQ-wtn2H`Y}94AVS?3)sI=I*vQ`rJ%w@r(f>#F`nXKN#
zs)VAqj_g9<kV{uaUP}RHlq4YaOFg4N*#kL3m2sagEAgNNDWOnIpR&Y$6`_aIFx-55
zyA7S3TvE0}Ur4`Kj%oKKm(h8L^|@p=TjbfA3C;*eAm9xeA#5cJcC<$yqrMJf#!!g4
zi$`08824<uvwp@-sOnQ3uh;5$@$TOF#-IId^kM9!A-`9P(cQ=4<_0%p576bM9r_^3
z(~R*J`fuWxpNzYlBVAsun{%3y5&lR9^M)Ba&a+NSi5Ub+GxWN_(XedExd(kX+Tc7!
z^8W)gG0D-FfgFr&`hF79#WeV->$6X6_C&u51Y4fIjQv(;3>?Dw1(iWY3Sh~x(<Qov
zI0m->Sy_ZfJ>K}g300b(Pgz~Fn0I8~K7=vI6c6wD-Bh*8=Dj*sFU3Vz@8k?~OXLqE
zV>~gPwcHaaT(s&lyjNsZ(Hqymd+X3&ME^pNSbe8|XY2jhy8{-_8XnncyVSTpB{PeD
zD$CQ?Icf=V!`K;+_}uJf^DVr?25^bJF&4!uyy30p<_Y0{AI<Q0|8ae}%I}_TFwA|)
z>r+v@oI9Behrwz=gOQgZj&TIj5x#>jjb;dfGKAlt3$*>DO~z+|ZN}j=Bygkhk4d-_
z9X^Fw5a(&**|L9+6K9CUG5_z2zOR&K5<3|uxre)#^{qUg)<spvZJxMMK^uw0oulQg
zAOa<3W18E7I_ePe6Ql5mW{|+5)5E)WrN#N>sZ@v8^TB{gyjy(Ytq&Wq1I}C!jAw6(
z+Vzm1o{CLXl|`_~3ZwHZ8$eS%s=@$RjNc3}hYF54d+cK|`g0<)#=TYdwTkW>m;L6Q
zm1mEu^XatVZ`xJ&>+x0;hfM@)<Ty?dNIXT-5NHNt95w=?Cn#c`jA3$Uz@0T0L0+&{
zWL~ZNf-oxcN#dAMd(!9Nv(J`PZq(tZ5~)qorvx*T;Bm5B<RIQ05`_HAH$ooheVAM^
zx%}C=-D}a#l3&lo_5ea+i>$3K;R@E=o}{G@nejRgsdL7JMC(354J%8KQYC9X4t?WV
ze;;r6_U8LB|Ng>x3L*c_p}*YQrkC(-Du1tfCvAD7B(A+43Lu0iCnwU5T{pVA`RH8R
zT<fB_wJ!hEQ1<<g(aiOfpB6>288~AVBQjkShWADfV5mw~xcAoBN%WEijo1X@m_$3t
zy4x?H&vEnbcV0~lUk-bnT0<l&FF#}tni0{9>tTK1Cen{hPoi<f`~ovX0+Fk0y&1v8
zv51ZtA5$8z<T<BBqW7SU%3=P@)<~on*?XEJ88>CU^`UKww!mzuI`rLa&B{WN@m)6H
zonX+4Z%kt|{CjMioM$rJ`>vm5nA%>(c0J#Yr}|QC`|>e&ATsczYr{Z)_1DR2swkj1
zV?wDVjK(<uyI((zYzNSux&aB_rD`<D<iFU3QPPo7ODS2i#|cO91!4c{HrLtK*zhv!
zRerM^&o^UxtE-~z`_2#5Z}lz5u+w;-hP>B>Qt9@Jiz51^+{~6@b^#>aTNsOy0;#~H
zJ3B@<?EbMf&Lz3Ajjn+|;5;V0KY}y=(ct%0gU`O`<)J=oN2vi}AfFe3D+)}8(xIuz
zcw`QxRQ2qTK0SwxI3xe21;2MJroYY%TE(;Th7>qEd?UC^-p)FiWQYRb1sS;&7{j7*
z8x-37viHO8R=?z@=fnEg$%PjK{w~5*^1hwrqBb>Z8LR`kfg@)Qy$__zCW4VR=3bs=
z(G`UW%Knw|n2v5&jsE=1eo7~O-{du|KFRdBiJ{#nTtGhwcUamx6p<O4{QN9qS;l{M
zv;Lqle(X+>%c7Up>)N%^|1xMrh(_*=;!o&BRnnuCAvs*a0f9wC$8jU87BEBa<6rB`
z|L(DB)mz*EAuXAlC!oh}r$d&!I+VxFM(1Ihj%*2^o;QM%FC7WK*zBJB*<8LZx(*!E
zK*$rvUb^-Dml2`gF#wl`wd(VlCoZc2kY=ZTZsxvkTUU3tx;=MY4p;W_^elOOIM(Y~
zJ4m?6>3a5O18-SUHv)^u-r`b%PDZRF+XSHuvNe`%eA;VMUK1mJg**b{0Xtbou>)va
zoOJ5mp5)TFS(lscMIuY=&oe&vQc6vnT#tA@veZgf2Mhw3jzPI>+hhwAlDoGj5bAME
zY`BgXO@a#QC=D&L%V#}Z@Oej`X|108F!#c_kU<P_bj{%0VNY}+3>D`!BxhYMDLv!Y
zr!6xEx2I+>6O9<w!X7^K3Ov`?v6BtaaRLmhlYhfxBy~lO#|W@_wgcEA6B03p!f4M?
zZ%}@()?<U5d68qAR?}M{{c5Vq6Luf{V>f&+Z`HXhUJwdLx;_~K?BL+g6!PaRC$qus
zEW3Kg2x!$3<(Be`<3tlKx{kb^?$tGi=J&A+a-0WbmguD&obQKhs9S~EvcQAfiYn*i
zdOPm@efD=h`eBMiZ2xH6;Zoo0!+rb}F!CxreC1(t?qsxnEaV_03*b*o%X4IO`XriW
zAZ#-Xnm@;OK2=8@-7oS%<I1o9xal=jcUELlOnx_zt8bU%=IC@HF;+5MSl}SfM89<N
z7^B9qdkrh@NCd;kM143AahUPw!i!7SVt)?PbEthEoz7Lsaj9gRp*go>KkB^lCQEaZ
zbXA6^<s_}Ww?!id=eAk7oy-!7^WQ?Cv*a>~VKaq-zL-{w^;HLOnVBi-iOkQ4)EGTz
zVP5!{tV+o{hpYTA0?;obk4+1{3va!}esGgrzO{SJEtQ-=cs`sri_XqO2bvI&2h><2
zjxUMPL`C%h@q1cEsNpF`Bcp<WPkF+#UxN1y>G$g?1wUvxAK~TIeQIEpv(?kFtTyFz
zM6Jqv$1j>}5M(6K0&t1W+&LDU6u?DhyM#l|YICf@kyZ5um$Kj3j5GKW@oNvZvLv!q
z6FcWLoL7R)sIX+-;8JFqTq9?m?5_IhcBzk>1AClJTtHj3&CbHiKUHrJaK@3$j^+hV
zkFAQqLY>C|XuU0fpFt>5$%!aPakX#9aCbS1A7+W-qoyL+q2D3@JlFTW>68*FujoX@
zIJQzqE4tRZaBoA`4Z}@lFpV;V$!GcNkG<f%I^T-=dfAM`wb_^kz@?=vhs|q=(<c<W
zqJ0BBE*akhH7Sqo*5nohHu+al<S**<e9<RAB(bbL5xbrDGPW#vc{*=rYN4ROi9peq
zZfH=UImc8n;LXlBzkcb{^UPzN4a5g^68Vo$o8Wz|$UeTO3+&cg*7s-GI6_Lvn!hJ`
zq32Uo8}<ki%EYNb$#a@iWJe$Pe~FWotnz>cDE$Vr)vV`n8Azi*&pK~eXW~Kn%4ZvD
zDu*!Q*n2&3s=nrMv~6{yRxhJJB^Pbn$KX%-moEgknS1BYH~;&!DO>3l@IN{>5&cpf
zw&#a0>~3q>VQmbjZCZ5!$vfj33=_4YLIS?SZVBhi;yG*Rd#mqHRra_iU(d&0DAKrV
zFNL|KGPitYFIla$(cxHZh+>4568SwgO^SSI_B!acH>#*-5^s|mklnG+ybM+&H=6ZD
zJ>Y!e0S-}s^(atWoI)G=K^~npY6(0ydAURmS?pv;oOI+~ebY8wHxA9<W01otqfo#V
zn`lF+7{;_0$cgCZs=TcCI#LpXu$}<&QdEYC*N_c8+>X6l>f0T|HxjGB1=IwN3=>w{
zwL@FL1j$Xpqz9woujFhpR7Qy35~4^vKg)r8Vb%${(O5Q*c%3<oE%uK4T=*Aqe%Jh|
zgK>BFdU?B)8|Dnw$6J?u;xXyq{X{e4tgsMrS?Xtq<Q=h$xPWoc3ih)vT-dM1daJ?H
z(yY2?<!szk7W#n`KEmuY5uElQib@if^Xaf&^d;3`l?@Y5N$)0Ij;$<43rE$(k@^`y
zq*4q^5en8YEZ}=f3ANiWz6@P>-iMW_sx7N6?UtXwI~63Gce{tDi|EHoS#FA}2)isP
zf0F#8F=Ch6OSF0utIZKnKZ4z~-VDwfRuS%h@71II{@Y}2hfeV<w2%Y6{D56}CX1fK
z_u_TrIr~nRQ4HQjtR$n%Az8s7Oc$-6Wg!Pg1~-mFwmd)1c^nE+2>IoDcYofRd-LJP
z!Sx^IxhbW6Gg~PC=jc0`f;dcrPIb>_YiRSvAF}0}+8Gu1p&fIUpZ}lPD7yQ2Yg>2g
zKHO01X>_|gy;A#r-C`7M*<3Phy#cHpc+5z2w0GWyQzS<5BulUk9j#$AI;+vE&3p-H
zzRp2@dB~jCUq8%c(fmDhMnpf!Y1$jczY(N3?mGd7Bs(tgs2&&XghuRU$i;ari3J+u
z=fLIZ9CiPEH)4rkvgL4bLWdGCNA%?{xP|q1&c^+I<TPN?)2w8Cu>738ii?PTIb05#
zIdZJ0fHq^pSE5lP)6(Z+rrRx)7<ha#iI&5?P@`sj!vDg^na9e${aAK-ORb=W{3Ln~
z1P@yzcGGaj=4=)n)sw+k(alF2T!FE`&!Jn0f*iSJ3<ptt`Z;%*cfWmo@!6iQ8WRMP
zm;R4k-&V=M$d%yEx|AG?rp`Lj95V69G9pqMIzhJQ+YAnN-vhcO2NwAces~sq$T5tZ
zBis%$5@^VO+CQ6q==)LP?|+xq`@L?O=A-X_|7dcGQGE>Oak!0s48H0<>W?;u^|h#9
zJ^ee|0ZCW8S8n{i;FZ*sR~y|Mppr-yax_fLX}|@C4I{=3Ylbz23qUtiVv3)56bPbU
zxAN(`$~-TtxsKnT4<c7M$;=84T6UL_9MiZT$O&KSdVKY3932r6p#6^**jZVfQ5nmz
zN2!CWW!iw(aYDl&19OGR+_`S>VV0xf$FXtRx+!t_D0J_s4TB%`98x{7wYvUxtDEl9
z*KNMyQtb#{AF^?;>aV;=BeXeAQm~PF#{OFJ7ih0HmK@7X3<YtMe6kQBS<BC_9h|<`
zu=|s)=qLC5TCLrSr%h$m<9KXma2giNh;YUbRCi9&5L&-HJD5;iN^A&1W@|AKfc<a7
zxE1s7&FkMtERJn3cHD!8E@-X#2g$GJoV0BVjGkTfoDxVg=E?H4pq&sqiVlv<S<s+4
z6%{FuM<*1|KGNOrt3!rNTs_7oI?e6$kaWpzW#q_o4WsLQ-}%3ay^H8|eOaS3E^;Q8
zH&PH2baF(%2QW^!BVmehYa+d!IW2+$&n<3sIzSZhQ-}X;CksrE$5-6-c|XE8>pm|u
zTCGYxj4HVYQPlFauC~}ar$yOZNDzNh<cZmyK0291_+H0|s@;oh>0rVO(-94eR>Wqz
z=;Ny-!vey61>;sMvRS732bmSSFyQCmku9E=@%R2C>;cds`MJ2%n-g68b7%^!R(x__
zAqXiXC^<z8Uhz-1D;YC}z`C?RRpj4g(VMPo+ctaQe)NOd?!k$LABTSI2VuW>X+{<z
zNk2!BO|s$7f@SX?gU_yg7>A(BpLcuEQGtJcROedle;>l1VUGwbqL<}yJ#fZxY&|=W
zV8t~2=CA>5V3g1}K+BW*X<ey^>yu-zCZMgzV{iORf$?dZwj<89tp49_^!~s5KhFQG
z{?&E;(B6uVs;S&Z-}-M`Rex=(AN3UNb=N9!bW}r){)%d%f0hqm?UR*XNDd;~*PC&4
zOJk!q#ux}ES#&u<QqL7s8n70MM`^>l<3RL2FRT*+sMOC#7y8iW|6#A?l$^HarFG*_
z_)ArHT_{7i_D$=W(0iek)}fhN=f=GVHj7>z59?zcu&r(08&NcEz4VK}`;X1w8~eU^
z;uT?RIVaj)oStUxUG56L)q^bXx!J2bsN}^Omcq_PVbt1lqt%TgWr+5cozrC*NwPjg
zwDAL%j!RXRx9TZ5z4)x7CrmtcBd>0QAeIlsIQ3gSo=$zk_-UkXF>2!Yyi!bp41n+V
zLfBf48?8?D28MM6p%9xBS!FYafmF`sQE>EvU#QNwGv2L8&&ej|kqrAX8TLPT#OFBB
zHEL<)i#=FMM8C>8CYyMMCpav9iKG*F;_YZrwy8>lTO*qJyb&Z_G-f5rF`fly4dCDB
zs$cR`IS_h&eRmD!Xa~p)5KoY+ho59%x^#R7^>llppKqt4IBf@IMq;Qar+D^V6Q?0U
z>B)y=fhI9px8e((nUyRY&nIK=)AxrPd=swB`@K!_UhIRkKW6*No8k9UHj%t~ImQGA
z+TrLewCS?N%Z5w<mjcij$sMJ|BYwkbx=-8;#zb-JZigqKktXXdC(1qagr*0Dy`$|O
z_ABBDV&RGAoj$lYLbHWR#zX6}=qx~FgRxg|sf7o8s9vVMJ2tKmUx+Ijlx=6^C!ReY
zi9Wy(8)`M{ZouawY2f6@)pb+fnnnir%TXTxF6!f<$xf=7syO^Pj`w!RAxjQP^dZH?
zrnq%wT^;_o4fkOfr#8nt=jNlj3{8J`VQj1R)_i<aRaIQC{q>_CcQ?8mJXs&UoNa)t
z9Nn5^1$Y(>h&3)Psfg5;<kVwZ!#J+Gej_WiDeaPBW$mTEMcR(%KHO&SXP2||sYQA&
zr;*O*!8LO7bXM@T9Y5;we*3tI$ZZ!o*Smk#{csz*>uFDNVL)Gt?c8r_F}R+{TkTs%
zK#tHX47g*+Ymhw;V+PeCdJgDtZb4_hM<VB^{PQa+{f-a$@}|83Qsw^9H6nU(Jrvu5
zx)~`<dvwbIL?CyQ+-S3RB1uw-;4`O|I8aGQi^R6k5Kar#e|gr<OQ!y8b9<8K;}Q{h
zdviBYzuZpe%VB#c8&eVkpU4@CNaN_tP(ac@=FmWmO7wsTUlP-FYy@K({*@_1PbV#H
z*+z+iFI!4Wc0aVXD$skcr-S^U@0U}xt~nTu53Tqpwb@)P0W1#F@$=pWw86`fPT~xq
z_=UmI-$=eKExr&1HqT!N<byq|*NhK8sLPlid*wxu{CwIpdD!?~zsRM)6N?Aj1PSy(
zhB6BcmWYg}^Y)yqhB^zP8To0yI0X5-8tW@#(8J|-v!6-<vcoMqcR93fPjDg56sPlM
zgq(pW$h^TIV&h7T(MYn^tff>77`Mm>*ZKjB^_<wJ4p*=SD87kc*N{z=11(DVX-XpC
zt1By~ogy|eP}*N)bS!mZU-aT!uO}vw_~)$wo3#Z{Um`6Zd-uVOUkJZI>ipC0(baR@
zTwFMU>6J%d;LK#Bv7RUGK9o-&XKwmM%j0Ty3)vt1!2FU7xgUPl8M}GE-}a6buFKoG
zs5Z7ea?&NcmRvH7hJ=3<$kEgZWCq5b<#o46i*hs}oJdwEGFXv3xR@0+j$?lrgx;6^
z_28R(Ej;*~kM*(a-Q721*IvDEnzqSVdkl5kwY?wZB)cfX?cFraQwaA^+=k9iQzxKh
zzCK5?<G7otUT@WDb1WH>GYz>Mk0FU{H;<NZ!>fY>&z^0AA7bPlCqw?-XTM<q+n~t)
zu{wS1E%-iTLd`6bNg_sGXQ$R=rT^&f8U=M>_QS9X%vAa7DgV2%ao(V18#LoFl>$??
zjNl$gVmEq4!bA#qe0D&P<G+xkQI$Z?s!{p$6o-gYs-$K-7cyqbXGM7EL|i!=x)u(V
z5C}vYWQLB{V;iSf>N%Tr)B)%D*OjA8TxbUOn{Y#Sjk}&<ayoAtH&IYX#)Kf}5iJXF
z<N#v`usdg6iQcjr%x!SaKzh&#)xG&ulD=BU^*1Y0@Hp%IL{|kjv}1ekr*<5*ce4+Z
zlT&p2!gbykUE8_NU7PCm`|UQ=RoA<Z-D9-tS{Ossrb*k`STrU4lI^YN#4<UGd_Uk6
zY796q40$W6=MGGxv`lX>lq$aUY-7$WRm?13mL>Q>TT?GMfO4MeUJo!6q;Aq?Zs9}4
zyv9?bgK4rt^cvv{&X-+WDCcn(c^A>E&EdMFezD5P%Bf?@#z>CqjDh`p-QgH*l$cf=
zI}XT1>>HWRZ$-CKLnN>JVPkK%VLAp><b-wjoR{&~3S?@SpLa(nwF!8R3RzEnCxXqw
z3-n%Z;~5)pO2V5ZV)SLnDiFxu*a}42FXnOpY>w=6=Xe5?tm5}6+vLsGaV*D{CN;ms
z9yvBsuF(@Z=TUG8xgpXXwgbJZ=-12Xx~7CG76Bo-$S_&XS?vW8twnGM-Qm%+l16%<
zMY}|}jC-b9|BJ2pf9GKtjgH>g6J4P9!-fML(U4#b4J1JN$n3?jj(|Z!-kEHX4UIAS
z1%GnP=`h&9kv<i{oB8~V{`2uYFJzAk`7%4tOSAs+qeuwuvyI$c_2VrY_`_z=E!$q|
zfswTrNAxL0y98>Gkc48!B*Rn;kwwtTP%rZ2k8dc@|HBWx4f~Rx>&x~K!UWPcstg#S
z!r&7w+9A;Myd{n_8zgJk@5GuNIvL3|Vu40}ijV`PS7RKnZam*^bw3PJTGLJr;WEn+
zWa{p-j}|^f*Zy|z+orqSZe?FL<Fy}ULdV$O+HSaY*SczkrXKEXTlc};HGS#aPCz4H
zFZJs=O(^|4p232&(~<X<Y}aC?nY~yJ4liZ^ym~?l2RUjGpZn!C|K%+|uNa0;bGRq@
zNkX~e<p6QC+X9T@S&V+ge70(%f0mfg(jKiT9XHG~3?h1Ar>U<iP1DP-Kd%vJ%wIyw
zd8c8X_AN;Y9+VIoBFN)1Hrf;%^x&KU^rAmMh<-jCH~MbjdqSG}3}+KSWM?4@$?b3e
zU^~N#gK$NRgH%JkU)TX}8PKe#ee>A#J-p`ci)nifaS()h$h%=*^qj438W__;;ZDyA
z3b%Oz?BID{Z1hYr%g$hD8QwweV6>iIB=f6C?g!2v%{&<rF)pWQOUAzK+7tHh#HQz7
zLcaEgML)}w(5l+ZSYje-bI-lwY;$$Wq)~A>wYuVMD@1~@aVf_p0CbqlM!ZHG-*)2p
z12e1xhTAJ^VBE}#S_F(UvA9Wi1#%XnX}}<s@u5c)=_x+F4}I8;<SvrBT+3~$4iIgb
zvjVh*K`MM00&^NThguZ7dpNX!Xhbp>sUZO}++QONNgnE>mWki0#MAB18=48vtekZc
z#}A@kF6HUvq}Al?s3A-UwJ|xK6T+~@sIP)S0}_QJ(ZrD&3b#lO5h6S=5x$ZrmY{V*
z8rq9#Hh1G$ahJ1Y?OyC4Q^!Lg2L4=q0b&8Hp}jhC41-oAST=En+o7%uGw&mt0FtpK
zFvbBfz0rsKLIXBlfE<ecDJm}G1wMn9CZC+d8JzxD&dv?1)y5eQ#ut|zQ0NEr6dXaI
zeZL3Li0I|HemR&7Ey*6ys56&tfUM$_AVd*+2mBn#^-r7wj`|a)|2uT|C3LR8)%TZS
z&+}TfgD&FQu)n%{FXVxozUoL|FY|HdY~c4LuP%p|Q%sVvl^t0k31m(WOxcs<7j3+X
zxpfkwEU9-)d%WE@8|{GeiiGy6L!k2CS&mGdaX5Hh@IaRx0%phzGmM&+R`Y9Bj?+9o
zE}z$NR6aO4do{R8&f}C`-Ve7cc_~BBkM(A!Css_XcubB6!I>0|Kfrb6fW5RZO9)Ik
zitO#|CIZ#&9qWD4A(7gw_AtMU*4t^ii+qjum<SBm7Wy!Vfe@PTJds2pXzgtll9BJH
zumeqr=tYj+w<()uSdkei<8lIxSaO!mB5_#mX_no&%E=C?B5jB}2M$i=F^t%$Vfu_t
z)prz6?d2foc9P-8Lp$6DG#Sjk@5icZx~}$>oG7oIi-Z5OE&H~;f3)2f#`_OMvEv~>
zt<Lp&np%oH7MU>#jI*Qm$Aufok}|!QHR;pvkvI)7mYHfq@Ei9$+n2)1b11{=EzYHR
zen8E@^c-g(fvn3`a)(MFqMz%RN%!e_99v?`4q)d6t%`7ME?E=4javcy&guB5H#$an
zOmE;6{!X?!y?M$5KNj;i!~WAg?lq_%50#udxdEZEiDR?`hRKKH3M$};-Zr%J!v7fq
zj+JGfo<j>@^9{!VAxXPdCz*OFlrIlcCcpcLA}TA%dd9FCe)_mLa8Vx!YH=26KkGS1
za%z**(q|m9oz23|q#+T#IF##y_&|<GpnI@l>XE!g&dc(afLp;)sX^ZAGL8pS;wTib
z>#VgG4$iN__wWdvjD%B@hKv)$nTL-$x?CzSKHOv;{^aI3N}ofv?)m#>oaVEC&-?y)
zrz;R!*PFANz;*>G4viU@s>fOq(CK8ceT+86z}R7=fXBo>R}d1C6&Xbvp89U|2bp>o
zZA&|QZ?@CAmVG{9dhl#*tFtBM`UHwTffxDcb_2OA`uTJ!*P<i6MD4^|X>mPj!30mF
z#=#lT%sbE|0dOopKI*VbQ{b<#D4yhr{Q=nP$7YVnPg^~Ue!XtS5)flt2%O%cFB0bz
zP$p9hdQIXV=d?uHy2vstK#Rq80>^zji~{tqFT&{BXRrMe-}x^p{88xNRZ(wCZG>XI
zvr$vg^Z<!$B)<gC%2XtBMk=u=Pte9WYZ4*z$gNWKUtP|}<MpPD>ksRUXfcRl0mzWk
zJGLkK=~m}-byIjq-hOMpKo3Yn*t>;s{c)i?>#YF@ot!17i4_^}O&nV|;o089GjHz1
zcbdMbLIiwr$^nra+ao?hNi@47u#d;H^eP|Lv>|Lx{OWO4UIDnm_=P+PB2|u@1GMj8
zh6T30QK}MWin3s{&mnonC<a%3ryC1}W)5~4vUKqkVV6Uqq8Ih)c-?q)@IJ`*BE=tb
zr-)4Rm^Ic_5>s$?xNt-Xy}gy6c;Ewz7~<9A{Q2ZCtB|wS$@JTC)B%E7ftU9i%~T-r
zC{tN5uVFN>hNsh&W7pl~tfQRU>@X=*^lNe6_V+}ReU0J>s)(_^9Ba;&k{c%`87yhl
z7?6p)%|q5{MG=wRPAvBdO2(b<-aM8&rK`V(M~djxxmce=7>x}^JH^@oYRlI4L&N5+
z47YSZkT3@=x472X6iM7d_YD-gJTzd7i?{B1LDZS&Z^YFzUq+dhnKNBk|1j1<Oxc?R
zEuvS4<N3I?a+v;V^Kra+b4$my?2a*cC_N=lTm~nHb4&wJEcNlMvy)b-O^jKNPsz)r
zBUKK?9;aq%tEOvQuY?Kjo5|;|s<_>7Y2}2MUMRm)=$hhs_4(RvuIH0bhI@A|?#(gV
zI9KOPb+~q?LtX4ev03t*vTbHjpcj)6vK5NFHMk_B219<Ux<j=IVL5c~NARd4OH~Ye
z)!M(T`S%0-UrQ64tDhN6TwO5B5$h9R`aQ{o3tU!pxeegareLXhVgn}MfR*=T0463U
zi5s4vyO9d4)XrW3a%$XHIP$xPl@(f<!ZRVx=SM;>^VQJD7hUh=kZB<m<UCb~H{#a!
zMK8|fX`8xmo&<Y2l2s20(CCHHf)#dUVwzxLVipN%ED1jD7^-zf7mG;5Vl@t4j?Zl+
zBMITelbK}ngv^y2uWs-UeX$REuG6GFee>t2XM9b4{@cd89DdY}AyhhtiAdL;<id_D
zPsPjGJ?gy3X+(cLyFF~PM|LeI+COiDlf5+zC<Yz9Wx6ajCDP=7jNhri>{ba=D|{J!
z;QSJ~1D}f0g|X|EL8<Ld<87>6@8m3mrrGOZRMCrTbzIYD5<?j4$BD}zM4Nnw*Q~dL
zq)0TUlQ<bYOEZpP0V80Bzh0esgbXtXGZLa}FNaEW*V2LXCd<6gyvUPqgW`-fP9E~2
gUVh@4lbBE?P5y(-bm)55x#1uGA8P!7MV#~i0IEpumjD0&

diff --git a/data/samples.csv b/data/samples.csv
deleted file mode 100644
index bc50be2..0000000
--- a/data/samples.csv
+++ /dev/null
@@ -1 +0,0 @@
-1,MT,./data/MT_rep1_1_Ch6.fastq.gz,./data/MT_rep1_2_Ch6.fastq.gz
\ No newline at end of file
-- 
GitLab


From 1ab4e4140ca543955ddc1508602b13d4ed91e99b Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 16:58:57 +0200
Subject: [PATCH 05/51] New demultiplex scripts for 10x #5

---
 bin/demuxStatsFromXML.R         | 204 ++++++++++++++++++++++++++++++++
 bin/extractInfoForDemuxStats.pl | 124 +++++++++++++++++++
 bin/extractReads.pl             |  27 ++++-
 3 files changed, 351 insertions(+), 4 deletions(-)
 create mode 100644 bin/demuxStatsFromXML.R
 create mode 100644 bin/extractInfoForDemuxStats.pl

diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R
new file mode 100644
index 0000000..63d77a6
--- /dev/null
+++ b/bin/demuxStatsFromXML.R
@@ -0,0 +1,204 @@
+#!/usr/bin/env Rscript
+
+# R version : 4.0.4
+## module load system/R-4.0.4_gcc-9.3.0
+
+# demuxStatsFromXML.R
+# Lecture d'un fichier XML pour extraction et mise ne forme des statistiques de dÃ©multiplexage (orientÃ© 10X pour le moment)
+# Par Ã©chantillon, ce script rÃ©cupÃ¨re tous les index associÃ©s, le nombre de reads trouvÃ©s, dont le nombre de barcodes lus parfaitement et le nombre de barcode lus avec un mismatch.
+# Ce sctipt rÃ©cupÃ¨re aussi les index trÃ¨s souvent retrouvÃ©s mais non associÃ© Ã  un echantillon
+# Le pourcentage du nombre de fragments par Ã©chantillon sur le nombre total est calculÃ©
+
+## --------------------
+#       PACKAGES
+## --------------------
+library('xml2')
+library('stringr')
+library('optparse')
+
+## --------------------
+#       FUNCTIONS
+## --------------------
+concat_df = function(df1, df2, col.names) {
+	colnames(df2)<-col.names
+	df_tmp<-rbind(df1, df2)
+	return(df_tmp)
+}
+
+## --------------------
+#       PARAMETERS
+## --------------------
+option_list = list(
+		# All arguments are compulsory 
+		make_option(c("-x", "--xml"), type = "character", default = NULL, metavar = "character", 
+				help = "Path to the DemultiplexingStats.xml file."),
+		make_option(c("-i", "--indexNumber"), type = "character", default = NULL, metavar = "character", 
+				help = "Path to the .indexNumber file."),
+		make_option(c("-d", "--demuxSum"), type = "character", default = NULL, metavar = "character", 
+				help = "Path to the demuxSummary.txt file.")
+)
+
+opt_parser = OptionParser(usage="Make demultiplexStats easier to read.", option_list = option_list)
+opt = parse_args(opt_parser)
+
+if(is.null(opt$xml) | is.null(opt$indexNumber) | is.null(opt$demuxSum)) {
+	stop("At least one argument is missing.\n", call. = FALSE)
+}
+
+## --------------------
+#          LOG
+## --------------------
+cat("\nLancement du script demuxStatsFromXML.R avec les options suivantes :\n")
+cat(paste0("\tFichier XML :\t\t", opt$xml, "\n"))
+cat(paste0("\tFichier IndexNumber :\t", opt$indexNumber, "\n"))
+cat(paste0("\tDemux Summary :\t\t" , opt$demuxSum, "\n"))
+launchDir<-getwd()
+cat(paste0("\nLe fichier de sortie sera Ã©crit dans le rÃ©pertoire :\t",launchDir , "\n\n"))
+
+## --------------------
+#          MAIN
+## --------------------
+xml<-read_xml(opt$xml)
+
+df<-data.frame()
+vec.names<-c("Project", "Sample", "Barcode", "bcCount", "bcPerfect", "bcOneMismatch")
+
+projects<-xml_find_all(xml, "//Project")
+
+cat("Lecture du XML\n")
+for (pr in 1:length(projects)){
+	project<-xml_attr(projects[pr], "name")
+	Samples<-xml_children(projects[pr])
+	for (sample in 1:length(Samples)){
+		sample_name<-xml_attr(Samples[sample], "name")
+		xml_bc<-xml_children(Samples[sample])
+		barcode_names<-xml_attr(xml_bc, "name")
+		for (bc in 1:length(barcode_names)) {
+			if (barcode_names[bc] != "all"){
+				lane_path<-xml_path(xml_children(xml_bc[bc]))
+				BarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/BarcodeCount")))
+				PerfectBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/PerfectBarcodeCount")))
+				OneMismatchBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/OneMismatchBarcodeCount")))
+				
+				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<-"-" }
+				
+				df_to_add<-data.frame(project,sample_name, barcode_names[bc], BarcodeCount, PerfectBarcodeCount, OneMismatchBarcodeCount)
+				df<-concat_df(df, df_to_add, vec.names)
+
+			}
+		}		
+	}
+}
+
+cat("RÃ©sumÃ© des informaqtions extraites (nombre d'Ã©chantillons par projet) :")
+table(df$Project)
+
+# ConcatÃ©nation des index multilples
+# Ecrire script pour gÃ©nÃ©rer ce fichier Ã  partir de la SS
+cat("\nLecture du fichier contenant le nombre d'index pour chaque Ã©chantillon.\n")
+indexNumber<-read.table(opt$indexNumber, header=TRUE, sep="\t")
+
+df2<-data.frame()
+df.defaultLine<-df[which(df$Project == "default"),]
+df2<-concat_df(df2, df.defaultLine, vec.names)
+
+cat("Rassemblement des statistiques par Ã©chantillons.\n")
+for (line in 1:dim(indexNumber)[1]){
+	mySample<-indexNumber[line, "Sample"]
+	mySampleNumber<-indexNumber[line, "NumberOfIndex"]
+	
+	# Single Index Case
+	if (mySampleNumber == 1) {
+		df.singleLine<-df[which(df$Sample == mySample),]
+		df2<-concat_df(df2, df.singleLine, vec.names)
+	}
+	# Dual et 4 Index Cases
+	else if (mySampleNumber > 1) {
+		sub.df<-df[which(str_detect(df$Sample, mySample)), ]
+		#print(sub.df)
+		# Parcours du sous-data.frame
+		for (l in 1:dim(sub.df)[1]) {
+			sub.df.project<-sub.df[l, "Project"]
+			sub.df.barcode<-sub.df[l, "Barcode"]
+			sub.df.bcCount<-as.numeric(sub.df[l, "bcCount"])
+			sub.df.bcPerfect<-as.numeric(sub.df[l, "bcPerfect"])
+			sub.df.oneMismatch<-as.numeric(sub.df[l, "bcOneMismatch"])	# bcOneMismatch
+			
+			#print(paste(mySample, ":: Traitement du barcode :", sub.df.barcode))
+			
+			if (l == 1 ) {
+				sub.df.project.toAdd<-sub.df.project
+				sub.df.barcode.toAdd<-sub.df.barcode
+				sub.df.bcCount.toAdd<-sub.df.bcCount
+				sub.df.bcPerfect.toAdd<-sub.df.bcPerfect
+				sub.df.oneMismatch.toAdd<-sub.df.oneMismatch
+			} else {
+				sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode)
+				sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount
+				sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect
+				sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch
+			}
+		}	
+		
+		# Add to data.frame
+		df_to_add<-data.frame(sub.df.project,mySample, sub.df.barcode.toAdd, sub.df.bcCount.toAdd, sub.df.bcPerfect.toAdd, sub.df.oneMismatch.toAdd)
+		df2<-concat_df(df2, df_to_add, vec.names)
+	}
+}
+	
+cat("RÃ©sumÃ© des inforamtions extraites (nombre d'Ã©chantillons par projet) :")
+table(df2$Project)
+
+## Recherche des index indeterminÃ©s
+cat("\nRecherche des index indÃ©terminÃ©s.\n")
+bcCount.min<-min(as.numeric(df2[-which(df$Project == "default"), "bcCount"]))
+bcCount.threshold<-0.8*bcCount.min	
+
+# Rechercher tous les index trouvÃ©s au moins bcCount.threshold fois
+cat("Tentative de rÃ©cupÃ©rer des Ã©chantillons parmi les index retrouvÃ©s les plus frÃ©quemment.\n")
+cat("\tLecture du DemuxSummary.\n")
+linesToSkip<-as.numeric(system(paste("grep -n Most", opt$demuxSum, "| cut -d':' -f1"), intern = TRUE))
+tabDemuxSum<-read.table(opt$demuxSum, skip=linesToSkip, col.names=c("Index", "Count"))
+
+tabUndetermined<-tabDemuxSum[which(tabDemuxSum$Count >= bcCount.threshold),]
+
+cat("\tRÃ©sumÃ© des inforamtions extraites :\n")
+cat(paste0("\tNombre d'index indÃ©terminÃ©s retrouvÃ©s :\t", dim(tabUndetermined)[1], "\n"))
+head(tabUndetermined)
+
+# Construction du dataFrame pour intÃ©gration Ã  df2
+df2.Projects<-unique(df2$Project)
+myProject<-df2.Projects[which(df2.Projects != "default")]
+
+### Pour chaque ligne de tabUndertermined, on ajoute une ligne Ã  df2 :
+df.tabUndetermined<-data.frame()
+for (i in 1:dim(tabUndetermined)[1]) {
+	df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
+	df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
+}
+
+df2<-concat_df(df2, df.tabUndetermined, vec.names)
+cat("\tLes index indÃ©terminÃ©s ont Ã©tÃ© ajoutÃ© au data.table.\n")
+
+## Soustraction des undertermined aux allOthers
+# recuperer les Count de tabUndetermined et soustraire la somme Ã  df2[which(df2$Project == "default"), "bcCount"]
+cat("\nQuelques calculs sur les donnÃ©es avant de les exporter.\n")
+cat("\tActualisation du nombre d'index 'AllOthers'.\n")
+undertermined.count<-sum(as.numeric(tabUndetermined[,"Count"]))
+df2[which(df2$Project == "default"), "bcCount"]<-as.numeric(df2[which(df2$Project == "default"), "bcCount"])-undertermined.count
+
+# Calcul pourcentages de chaque barcode
+cat("\tCalcul du pourcentage sur le nombre de fragments total.\n")
+totalOfFragments<-sum(as.numeric(df2$bcCount))
+
+percentOfFragment<-as.data.frame(round((as.numeric(df2[,"bcCount"])/totalOfFragments)*100, 2))
+rownames(percentOfFragment)<-rownames(df2)
+colnames(percentOfFragment)<-"percentageOfFragment"
+
+df2<-cbind(df2, percentOfFragment)
+
+# Export du data.frame
+cat("\nSauvegarde du data.frame.\n")
+write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv"))
+cat(paste0("\tLe fichier suivant Ã  Ã©tÃ© crÃ©Ã© :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n"))
+cat("\nFin normale du script, on sort.\n")
diff --git a/bin/extractInfoForDemuxStats.pl b/bin/extractInfoForDemuxStats.pl
new file mode 100644
index 0000000..ccd29bb
--- /dev/null
+++ b/bin/extractInfoForDemuxStats.pl
@@ -0,0 +1,124 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ extractInfoForDemuxStats.pl
+ 
+=head1 DESCRIPTION
+
+ Extract from the samplesheet of lane : (1) sample names and (2) how many index are associated. Ecriture dans un fichier .indexNumber
+ 
+=head1 SYNOPSIS
+
+ extractInfoForDemuxStats.pl --sampleSheet
+
+=head1 OPTIONS
+
+ -sampleSheet|s : the samplesheet file
+ 
+=head1 EXEMPLES
+
+ perl extractInfoForDemuxStats.pl --sampleSheet 20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv 
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use utf8;
+
+###################################################################
+#
+#						INITIALISATION
+#
+####################################################################
+my $sampleSheet="";
+
+GetOptions ('sampleSheet=s' => \$sampleSheet,
+);
+
+if ($sampleSheet eq "") {
+	print STDERR ("Please, give a file !");
+	print STDERR ("USAGE : extractInfoForDemuxStats.pl --sampleSheet <File>\n");
+	exit 0;
+}
+
+#Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
+#Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
+
+# recuperer le nombre de fois oÃ¹ "*Index_ID" est Ã©crit et leur position
+# rÃ©cupere la position du sample_ID
+#Pour chaque ligne recupÃ©rer le ou les index_ID
+#Si index_ID =~ XX-XX-XX alors #index = 4
+#Sinon #index = 1
+#Faire la somme des #index par ligne
+#Ecrire le nom de l'Ã©chantillon et le nombre d'index associÃ©
+#Ne pas oublier l'entete du fichier de sortie
+
+
+### Lecture de la samplesheet :
+open (my $handle, '<', $sampleSheet) or exit 1;
+chomp(my @lines = <$handle>);
+close $handle;
+
+my $projectName="";
+my $sample_ID_position;
+my @index_ID_position=();
+my %sample_info=();
+
+
+foreach my $line (@lines) {
+	my @cur_line = split(',', $line);
+	
+	# Recherche du nom du projet
+	if ($line =~ /^Infos/) {
+		$projectName = $cur_line[1];
+	}
+	
+	# Recherche des positions des Sample_ID et des Index_ID
+	elsif ($line =~ /^Lane/) {
+		while ( my ( $indice, $valeur ) = each @cur_line ) { 
+			if ($valeur eq "Sample_ID") { $sample_ID_position=$indice;}
+			if ($valeur =~ /Index_ID$/) { push(@index_ID_position, $indice);}
+		}
+	}
+
+	# Association Sample_ID avec sont nombre d'index
+	elsif ($line =~ m/^(\d),/) {
+		my $sample_ID = $cur_line[$sample_ID_position];
+		my $index_number=0;
+		my @cur_index_ID = ();
+		foreach my $pos (@index_ID_position) {
+			if ($cur_line[$pos] =~ /\w{2}-\w{2}-\w{2}/) { $index_number = 4; } else { $index_number += 1; }
+		}
+		$sample_info{$sample_ID} = $index_number;
+	}
+}
+
+# ecriture du fichier de sortie :
+my $content ="";
+$content.="Sample\tNumberOfIndex\n";
+foreach my $k (keys(%sample_info)) {
+   $content.="$k\t$sample_info{$k}\n";
+}
+
+my $file2write = "$projectName.indexNumber";
+
+open(my $fh, '>', $file2write) or exit 1;
+print $fh $content;
+close $fh;
+
+
+
+
diff --git a/bin/extractReads.pl b/bin/extractReads.pl
index 2328434..2a1bfc8 100644
--- a/bin/extractReads.pl
+++ b/bin/extractReads.pl
@@ -58,8 +58,6 @@ use File::Copy "move";
 use Cwd 'abs_path';
 
 
-
-
 ###################################################################
 #
 #						MAIN
@@ -153,6 +151,7 @@ MAIN:
 	# Initialisation des variables
 	my $runExistsInNGL = 0;
 	my $NGLBiRunCreatedFile = 'RunNGL-Bi.created';
+	my $NGLBiReadsetCreatedFil = 'ReadsetsNGL-Bi.created';
 	my $NGLBiRunName = "";
 	my $NGLSQExperimentCode;
 	
@@ -196,7 +195,7 @@ MAIN:
 			my $checkPSS = check_my_samplesheet($lastPSS, $preSampleSheet);
 			
 			###############################################################
-			#					INTEGRATION NGL-Bi
+			#					CREATION RUN NGL-Bi
 			###############################################################
 			$NGLSQExperimentCode = getNGLSeqExperimentCode($preSampleSheet);
 			$runExistsInNGL = 1 if($NGLSQExperimentCode ne " -");
@@ -252,7 +251,7 @@ MAIN:
 			my $laneExtraite = '';
 			my $counterIEMFiles = 0; #counter to store the number of IEM files found in the bulk file
 			my $IEMFileContent = '';
-			my $IEMFilePrefixe = $preSampleSheet;
+			my $IEMFilePrefixe = $lastPSS;
 			$IEMFilePrefixe =~ s/BULKDEMUX/IEM/g; # Replace Bulk by IEM
 			$IEMFilePrefixe =~ s/.csv//g; # Supprime le .csv de la fin pour faciliter l'ajout du compteur de lanes
 			$IEMFilePrefixe .= '_Lane';
@@ -341,6 +340,26 @@ MAIN:
 				}
 			} else { $logger -> info("Nous sommes en mode test : pas besoin de sauvegarder InterOp"); }
 
+			###############################################################
+			#					CREATION READSETS NGL-Bi
+			###############################################################
+=head1 A_SUPPRIMER
+			if ($runExistsInNGL){
+				# parcours des dossier PipelineLogs_Lane*
+				
+				# recherche du $NGLBiReadsetCreatedFile
+				## Si trouvÃ© : on ne fait rien, les readsets existent deja
+				
+				
+				
+				
+				if (! -e $NGLBiReadsetCreatedFil){
+					# CREATION DES READSETS DANS NGL-BI	#   #   #   #   #   #   #   #   #   #   #
+					$logger -> info("Pas de fichier $NGLBiReadsetCreatedFil dans $raw_data/$dir -> Les readsets ne semblent ne pas exister dans NGL-Bi");
+				}
+			}
+=cut 
+
 			###############################################################
 			#					LANCEMENT DE NEXTFLOW
 			###############################################################
-- 
GitLab


From eff2c90a22d5235995b48fc4cda28e0d60729471 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 17:00:19 +0200
Subject: [PATCH 06/51] New scripts for core pipeline #5

---
 modules/module_core.nf     | 247 +++++++++++++++++++++++++++++++++++++
 workflows/core_pipeline.nf | 142 +++++++++++++++++++++
 2 files changed, 389 insertions(+)
 create mode 100644 modules/module_core.nf
 create mode 100644 workflows/core_pipeline.nf

diff --git a/modules/module_core.nf b/modules/module_core.nf
new file mode 100644
index 0000000..8658f07
--- /dev/null
+++ b/modules/module_core.nf
@@ -0,0 +1,247 @@
+//params.sequencer = 'MiSeq'
+//params.rawdata_location = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
+params.outdir=''
+banksForConta = [ ]
+
+//mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1
+
+
+process decoupageSS {
+	// Not used anymore
+	publishDir path: "${params.outdir}/SampleSheets" , mode: 'copy'
+	
+	input:
+		path multiSS
+		
+	output:
+		path '*'
+		
+	shell:
+	"""
+		extractReads.pl $multiSS NovaSeq
+	
+	"""
+}
+
+process prepareReadSetCreation {
+	publishDir path: "${params.outdir}" , mode: 'copy'
+	
+	input:
+		path sampleSheet
+		path runNGLBiCreated
+		
+	output:
+		file 'readSetCreation.info'
+		
+	script:
+	"""
+		extractInfoForReadSets.pl --sampleSheet $sampleSheet --runNGLBi $runNGLBiCreated
+	"""
+}
+
+process readsetNGLBiCreation {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
+	
+	executor = 'local'
+	beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
+	errorStrategy = { 'ignore' }
+	
+	input : 
+		path infoFile
+	
+	output : 
+		path 'ReadsetsNGL-Bi.created', emit: readSetFile
+		path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
+
+	script :
+	"""
+		createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
+		
+	"""
+}
+
+process checkErrorFromNGLBi {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
+	
+	input:
+		path logFile
+		
+	output:
+		path 'ReadsetsNGL-BiCreation.log'
+		
+	script:
+	"""
+		checkErrorNGLScripts.pl --file $logFile
+	"""
+}
+
+process maskMaker {
+	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
+	
+	input:
+		path SampleSheet
+		path RunInfoXML
+	
+	output:
+		path 'Run.conf'
+	
+	script:
+	"""
+		extractInfo.pl -s $SampleSheet -r $RunInfoXML
+	
+	"""
+}
+
+process bcl2fastq {
+	publishDir path: "${params.outdir}/Demux/Files" , mode: 'copy'
+	
+	echo=true
+	
+	input:
+		path SampleSheet
+		path Runconf
+		val mismatchNumber
+		path rawdata_location
+		
+	//output:
+		//path "*"
+		
+	shell:
+	"""
+		mask=\$(grep 'MASQUE' !{Runconf} | cut -d'=' -f2)
+		echo "bcl2fastq -p 10 -r 4 -w 4 \${mask} --barcode-mismatches !{mismatchNumber} --output-dir ./ -R !{rawdata_location} --sample-sheet !{SampleSheet} -l DEBUG"
+		
+	"""
+}
+
+process extractInfoForDemuxStats {
+	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
+	
+	input:
+		path SampleSheet
+	
+	output:
+		path "*.indexNumber"
+	
+	script:
+	"""
+		extractInfoForDemuxStats.pl --sampleSheet $SampleSheet
+	
+	"""
+}
+
+process demultiplexStats {
+	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
+	
+	module 'system/R-4.0.4_gcc-9.3.0'
+	
+	input:
+		path DemuxStatXML
+		path IndexNumberFile
+		path DemuxSummary
+	
+	output:
+		path 'demultiplexStats.log', emit: log
+		path "DemultiplexStats_*", emit: demultiplexStatsCSV
+	
+	script:
+	"""
+		Rscript /home/sbsuser/work/Nextflow/wf-illumina-nf/wf-illumina-nf/bin/demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
+	
+	"""
+}
+
+process fastqc {
+	publishDir path: "${params.outdir}/FastQC" , mode: 'copy'
+	
+	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+	maxRetries 3
+	module 'bioinfo/FastQC_v0.11.7'
+	executor 'slurm'
+	queue 'wflowq'
+	cpus 1 //{ 1 * task.attempt }
+	time { 45.m * task.attempt }
+	memory '1.GB'
+	
+	input:
+		tuple val(name), path(read)
+		
+	output:
+		path "*_fastqc.{zip,html}" , emit: ch_fastqc_result
+		// path log files
+		
+	script:
+	"""
+		fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read}
+	"""
+}
+
+
+process illuminaFilter {
+	publishDir path: "${params.outdir}/IlluminaFilter" , mode: 'copy', saveAs: { filename -> "${name}.fastq.gz" }
+	
+	module 'bioinfo/fastq_illumina_filter-0.1'
+	executor 'slurm'
+	queue 'wflowq'
+	cpus { 1 * task.attempt }
+	time { 1.h * task.attempt }
+	memory '1.GB'
+	
+	input:
+		tuple val(name), path(read)
+	
+	output:
+		tuple val("$name"), path("*.fastq.gz"), emit: reads
+		path "*out", emit: log
+	
+	script:	// la sortie de gzip est redirigÃ©e, donc peut etre que le -c est inutile...
+	"""
+		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.out | gzip -c -f > good.fastq.gz
+		
+	"""
+	
+}
+
+process check_conta_bwa {
+	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
+
+	module 'bioinfo/bwa-0.7.17'
+	time { 20.m * task.attempt }
+	memory { 10.GB * task.attempt }
+	
+	input:
+		tuple val(name), path(read)
+		each genomeRef
+		
+	output:
+		tuple val("${name}_${genomeName}"), path("*")
+		
+	script:
+	genomeName=file(genomeRef).simpleName
+	"""
+		
+		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
+	"""
+	//
+}
+
+process check_conta_samtools {
+	publishDir path: "${params.outdir}/CheckContamination" , mode: 'copy'
+	
+	module 'bioinfo/samtools-1.9'
+	time { 10.m * task.attempt }
+	
+	input:
+		tuple val(name), path("*")	
+		
+	script:
+	"""
+		samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
+	"""
+	
+	
+
+
+}
+
+
diff --git a/workflows/core_pipeline.nf b/workflows/core_pipeline.nf
new file mode 100644
index 0000000..997a1bc
--- /dev/null
+++ b/workflows/core_pipeline.nf
@@ -0,0 +1,142 @@
+//params.sequencer = 'MiSeq'
+//params.rawdata_location = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
+
+params.outdir=''
+params.isMultiplex=''
+params.chemistry=''
+params.sequencer=''
+
+banksForConta = [ ]
+
+include {
+	prepareReadSetCreation;
+	readsetNGLBiCreation;
+	checkErrorFromNGLBi;
+	maskMaker;
+	bcl2fastq;
+	extractInfoForDemuxStats;
+	demultiplexStats;
+	fastqc;
+	illuminaFilter;
+	check_conta_bwa;
+	check_conta_samtools;
+} from '../modules/module_core.nf'
+
+
+
+//-------------------------------------------------
+
+inNGL=true
+forceNewReadset=false
+isResume=workflow.resume
+
+//-------------------------------------------------
+
+workflow Preprocessing {
+	/*
+	 * Decoupage samplesheet -> non
+	 * Creation readsets NGL-Bi -> oui !!
+	 * Sauvegarde NextCloud -> non
+	 * Decoupage jFlow ?? -> non a priori
+	 * 
+	 */
+	take:
+		sampleSheet
+		runNGLBiCreated
+		
+	main:
+	//if inNGL && (!isResume || forceNewReadset) {
+		prepareReadSetCreation(sampleSheet, runNGLBiCreated)
+		readsetNGLBiCreation(prepareReadSetCreation.out)
+		checkErrorFromNGLBi(readsetNGLBiCreation.out.readSetLog)
+	//}
+}
+
+
+workflow Demultiplexage {
+	
+	 //ecriture du masque
+	 //demux avec bcl2fastq / cellRanger
+	take:
+		SampleSheet
+		RunInfoXML
+		mismatchNumber
+		rawdata_location
+	
+	main:
+		maskMaker(SampleSheet, RunInfoXML)
+		bcl2fastq(SampleSheet,maskMaker.out,mismatchNumber,rawdata_location)
+}
+
+workflow DemuxStat_10x {
+	// creation du fichier Project.numberIndex avec extractInfoForDemuxStats.pl
+	// Extraction des stats avec demuxStatsFromXML.R
+	take:
+		SampleSheet
+		DemuxStatXML
+		DemuxSummary
+		//Read
+		
+	main:
+		extractInfoForDemuxStats(SampleSheet)
+		demultiplexStats(DemuxStatXML, extractInfoForDemuxStats.out, DemuxSummary)
+		//fastqc(Read)
+}
+
+workflow Check_conta {
+	// Liste des genomes
+	// pour chaque elem de list_Genomes, faire 
+	// check_conta_bwa(elem, channel.reads)
+	// check_conta_samtools(elem, check_conta_bwa.out)
+
+//alignement BWA
+//SAMTOOLS
+}
+
+workflow Core {
+	take:
+		ch_sampleSheet
+		//ch_runNGLBiCreated
+		//ch_RunInfoXML
+		ch_DemuxStatXML
+		ch_DemuxSummary
+		ch_read
+		banksForConta
+		//mismatchNumber
+		//rawdata_location
+		
+	main:
+		//Preprocessing(ch_sampleSheet, ch_runNGLBiCreated)
+		//Demultiplexage(ch_sampleSheet, ch_RunInfoXML, mismatchNumber, rawdata_location)	// A voir plus tard !
+		if (params.chemistry == '10X') {
+			//DemuxStat_10x(ch_sampleSheet, ch_DemuxStatXML, ch_DemuxSummary)
+		} else {
+			println "Les donnÃ©es ne sont pas 10X !"
+		}
+		if (params.sequencer == 'NovaSeq' & params.isMultiplex) {
+			println "Les donnÃ©es ne nÃ©cessite pas de passer par IlluminaFilter"
+			ch_read_good = ch_read
+		} else {	// Si MiSeq ou Nova + noIndex
+			illuminaFilter(ch_read)
+			ch_read_good = illuminaFilter.out.reads
+		}
+		//fastqc(ch_read_good)
+		check_conta_bwa(ch_read_good, banksForConta)
+		check_conta_samtools(check_conta_bwa.out)
+		//checkConta
+}
+/*
+workflow core {
+	take:
+		ch_sampleSheet
+		ch_runNGLBiCreated
+	
+	main:
+		wf_preprocessing(ch_sampleSheet, ch_runNGLBiCreated)
+		if not noIndex { wf_demultiplexage(data) }
+		pr_illuminaFilter(data) // ou SubsetSeqFiles : dans quel cas on fait l'un ou l'autre ????
+		wf_check_conta(data)
+		pr_fastqc(data)
+		
+	emit:
+}*/
\ No newline at end of file
-- 
GitLab


From 46ebba47eb9d4aa5adacf97c744a42d1b1bcd0ef Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 17:01:36 +0200
Subject: [PATCH 07/51] Creation of files for future sub-workflows

---
 modules/module_dna.nf     | 19 +++++++++++++++++++
 modules/module_test.nf    | 18 ++++++++++++++++++
 workflows/diversity_qc.nf |  0
 workflows/dna_qc.nf       | 22 ++++++++++++++++++++++
 workflows/rna_qc.nf       |  0
 5 files changed, 59 insertions(+)
 create mode 100644 modules/module_dna.nf
 create mode 100644 modules/module_test.nf
 create mode 100644 workflows/diversity_qc.nf
 create mode 100644 workflows/dna_qc.nf
 create mode 100644 workflows/rna_qc.nf

diff --git a/modules/module_dna.nf b/modules/module_dna.nf
new file mode 100644
index 0000000..f8cdc87
--- /dev/null
+++ b/modules/module_dna.nf
@@ -0,0 +1,19 @@
+process BWAInddex {
+	// BWA
+	
+	
+}
+
+
+process BWAAlignment {
+	
+	
+	
+}
+
+process AlignmentStats {
+	// PICARD + Samtools 
+	// ou  Qualimap ?
+	
+	
+}
\ No newline at end of file
diff --git a/modules/module_test.nf b/modules/module_test.nf
new file mode 100644
index 0000000..26f01c6
--- /dev/null
+++ b/modules/module_test.nf
@@ -0,0 +1,18 @@
+process bar {
+	publishDir path: "/home/sbsuser/work/Nextflow/wf-illumina-nf/results" , mode: 'copy'
+	
+	input:
+	  	path x
+		path y
+	  
+	output:
+	  	path 'bar.txt', emit: fichier_de_sortie
+		// path 'foo.txt', emit: other_file  
+	  
+	script:
+	"""
+		(cat $x; head $y ) > bar.txt
+    """
+}
+
+
diff --git a/workflows/diversity_qc.nf b/workflows/diversity_qc.nf
new file mode 100644
index 0000000..e69de29
diff --git a/workflows/dna_qc.nf b/workflows/dna_qc.nf
new file mode 100644
index 0000000..2c980cb
--- /dev/null
+++ b/workflows/dna_qc.nf
@@ -0,0 +1,22 @@
+// Juste un alignement
+
+
+
+
+
+
+
+
+
+
+workflow dna_qc {
+	take:
+		// sortie illuminaFilter ou SubSeqFiles
+		// genome ref
+		
+	main:
+		pr_BWAIndex(genome_ref)
+		pr_BWAAlignment(data)
+		pr_AlignementStats(data)
+		if pairedEnds pr_insertSizes(data)
+}
\ No newline at end of file
diff --git a/workflows/rna_qc.nf b/workflows/rna_qc.nf
new file mode 100644
index 0000000..e69de29
-- 
GitLab


From 56ec0d377ee6e1b5ac079dd0d50da5db8e114b6a Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 30 Aug 2021 17:02:28 +0200
Subject: [PATCH 08/51] worked on #5

---
 main.nf | 437 ++++++++++++--------------------------------------------
 1 file changed, 95 insertions(+), 342 deletions(-)

diff --git a/main.nf b/main.nf
index befd72c..3dcb1fb 100644
--- a/main.nf
+++ b/main.nf
@@ -1,379 +1,132 @@
 #!/usr/bin/env nextflow
 
+nextflow.enable.dsl=2
+
+//include { foo } from './some/module'
+
+//------------------------------
 
 /*
-Copyright INRAE 2021
-
-This software is a computer program whose purpose is to
-analyze high-throughput sequencing data.
-You can use, modify and/ or redistribute the software under the terms
-of license (see the LICENSE file for more details).
-The software is distributed in the hope that it will be useful,
-but "AS IS" WITHOUT ANY WARRANTY OF ANY KIND.
-Users are therefore encouraged to test the software's suitability as regards
-their requirements in conditions enabling the security of their systems and/or data.
-The fact that you are presently reading this means that you have had knowledge
-of the license and that you accept its terms.
-This script is based on : 
- - the nf-core guidelines . See https://nf-co.re/ for more information
- - the institut cury template https://github.com/bioinfo-pf-curie/geniac-template/
+ * WORKFLOWS
+ * Sub-workflows
+ * processes
+ */
 
-*/
 
+//include { decoupeSS as DECOUPE_SS } from './modules/module_test.nf'
 
+// Mettre ca dans des fichiers de config ??
 /*
-========================================================================================
-                         GeT/template
-========================================================================================
- GeT/template Analysis Pipeline.
- #### Homepage / Documentation
- https://github.com/get-nf/template
-----------------------------------------------------------------------------------------
+if DNA {
+	include { dna_qc as QC } from './workflows/dna_qc.nf'
+}
+if RNA {
+	include { rna_qc as QC } from './workflows/rna_qc.nf'
+}
+if amplicon {
+	if taille_insert dans itervalle {
+		include { diversity_qc as QC } from './workflows/diversity_qc.nf'
+	} else {
+		include { dna_qc as QC } from './workflows/dna_qc.nf'
+	}
+}
 */
+//------------------------------
+/*params.sequencer = 'NovaSeq'
+//params.raw_data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
+params.raw_data = ''
+params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
 
-def helpMessage() {
-    log.info"""
+mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1
 
-    Usage:
 
-    The typical command for running the pipeline is as follows:
 
-    nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
+my_data_miseq=Channel.fromPath('./data_test/20210713_MISEQ_7_BULKDEMUX_JRCVF.csv')
+my_data_novaseq=Channel.fromPath('./data_test/20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv')
 
-    Mandatory arguments:
-      --inputdir                    Path to input directory 
-      -profile                      Configuration profile to use. Can use multiple (comma separated)
-                                    Available: conda, docker, singularity, path, genotoul, test and more.
 
-    Options:
-      --samplesheet                 Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
-      --contaminant                 Name of iGenomes // To be discussed ????
-      --outdir                      The output directory where the results will be saved
-      --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
-      --email_on_fail               Same as --email, except only send mail if the workflow is not successful
-      --maxMultiqcEmailFileSize     Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
-      
-      -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
+//ch_ss=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/PipelineLogs_Lane1/20210713_MISEQ_7_IEM_JRCVF_Lane1.csv')
+ch_ngl=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunNGL-Bi.created')
+ch_runInfo=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunInfo.xml')
+ch_ss=Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
 
+*/
 
-    =======================================================
-    Available profiles
-    -profile test                    Run the test dataset
-    -profile conda                   Build a new conda environment before running the pipeline. Use `--condaCacheDir` to define the conda cache path
-    -profile path                    Use the installation path defined for all tools. Use `--globalPath` to define the installation path
-    -profile docker                  Use the Docker images for each process
-    -profile singularity             Use the singularity images for each process
-    -profile genologin               Run the workflow on the cluster, instead of locally
+// ------------- Test 10x ------------ //
+params.sequencer = 'NovaSeq'
+params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
+params.raw_data = ''
+params.data = '/work/sbsuser/data/NovaSeq/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
+params.isMultiplex = true
+params.chemistry = '10X'
 
-    """.stripIndent()
-}
 
-// Show help message
-if (params.help) {
-    helpMessage()
-    exit 0
-}
+ch_ss = Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
+ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
+ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
+ch_read=Channel
+	.fromPath(params.data+'/TregThymus/**_R1_*.fastq.gz')
+	//.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
+	.map{$it -> [$it.simpleName, $it]}
+	.groupTuple()
 
 
-// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY
+//banksForConta= [ file('/work/bank/bwadb/phi', followLinks: true), file('/work/bank/bwadb/ecoli536', followLinks: false), file('/work/bank/bwadb/yeast', followLinks: false), file('/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Betacoronavirus_SARSr-CoV/SARS-CoV-2/genome/BWA/nCoV-2019.reference', followLinks: false) ]
+banksForConta= [ '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/ecoli536', '/work/bank/bwadb/yeast.nt', '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Betacoronavirus_SARSr-CoV/SARS-CoV-2/genome/BWA/nCoV-2019.reference.fasta']
+	
 
-/*
- * Create a channel for input read files
- */
-// If you want to use the channel below in a process, define the following:
-//   input:
-//   file dir from inputDirCh
-//
-
-
-ch_inputdir = params.inputdir ? Channel.fromPath(params.inputdir, checkIfExists: true) : Channel.empty()
-
-// Create a channel for input read files
-if(params.samplesheet){
-  if(params.single_end){
-    Channel
-      .from(file("${params.samplesheet}"))
-      .splitCsv(header: false)
-      .map{ row -> [ row[0], [file(row[2])]] }
-      .into { ch_read_files_for_fastqc; ch_read_files_for_qc1; ch_read_files_for_assembly}
-  }else{
-    Channel
-      .from(file("${params.samplesheet}"))
-      .splitCsv(header: false)
-      .map{ row -> [ row[0], [file(row[2]), file(row[3])]] }
-      .into { ch_read_files_for_fastqc; ch_read_files_for_qc1; ch_read_files_for_assembly}
-   }
-  params.reads=false
-} else {
-  exit 1, "Expect a samplesheet and an input dir !"
-}
-/*
- * SET UP CONFIGURATION VARIABLES
- */
-// Has the run name been specified by the user?
-// this has the bonus effect of catching both -name and --name
-custom_runName = params.name
-if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
-    custom_runName = workflow.runName
-}
-// Stage config files
-ch_multiqc_config = file(params.multiqc_config, checkIfExists: true)
-ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true)
-
-
-def summary = [:]
-if (workflow.revision) summary['Pipeline Release'] = workflow.revision
-summary['Run Name']         = custom_runName ?: workflow.runName
-// TODO nf-core: Report custom parameters here
-summary['Input dir']            = params.inputdir
-summary['Sample sheet']        = params.samplesheet
-summary['Data Type']        = params.single_end ? 'Single-End' : 'Paired-End'
-summary['Max Resources']    = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
-if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
-summary['Output dir']       = params.outdir
-summary['Launch dir']       = workflow.launchDir
-summary['Working dir']      = workflow.workDir
-summary['Script dir']       = workflow.projectDir
-summary['User']             = workflow.userName
-if (workflow.profile == 'awsbatch') {
-  summary['AWS Region']     = params.awsregion
-  summary['AWS Queue']      = params.awsqueue
-}
-summary['Config Profile'] = workflow.profile
-if (params.email || params.email_on_fail) {
-  summary['E-mail Address']    = params.email
-  summary['E-mail on failure'] = params.email_on_fail
-}
-log.info "-\033[2m--------------------------------------------------\033[0m-"
-log.info "-\033[2m----------------"+ workflow.manifest.name +" --\033[0m-"
-log.info "-\033[2m--------------------------------------------------\033[0m-"
-log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
-log.info "-\033[2m--------------------------------------------------\033[0m-"
 
-/*
- * Parse software version numbers
- */
-process get_software_versions {
-    publishDir "${params.outdir}/pipeline_info", mode: 'copy',
-        saveAs: { filename ->
-            if (filename.indexOf(".csv") > 0) filename
-            else null
-        }
-
-    output:
-    file 'software_versions_mqc.yaml' into software_versions_yaml
-    file "software_versions.csv"
-
-    script:
-    // TODO nf-core: Get all tools to print their version number here
-    """
-    echo $workflow.manifest.version > v_pipeline.txt
-    echo $workflow.nextflow.version > v_nextflow.txt
-    fastqc --version > v_fastqc.txt
-    multiqc --version > v_multiqc.txt
-    scrape_software_versions.py &> software_versions_mqc.yaml
-    """
-}
-/*
- * STEP 1 - FastQC
- */
-process fastqc {
-    tag "$name"
-    label 'process_medium'
-    publishDir "${params.outdir}/fastqc", mode: 'copy',
-        saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" }
-
-    input:
-    set val(name), file(reads) from ch_read_files_for_fastqc
-
-    output:
-    file "*_fastqc.{zip,html}" into ch_fastqc_results_for_multiqc
-
-    script:
-    """
-    fastqc --quiet --threads $task.cpus $reads
-    """
-}
 
-/*
- * STEP 2 - Fake QC
- */
-process qc1 {
-    input:
-    set replicate_id, file(reads) from ch_read_files_for_qc1
+// -----------------------------
+// 	   Includes AFTER params !!
+// -----------------------------
+include { bar as FOO } from './modules/module_test.nf'
+include {
+	Preprocessing as Preprocess;
+	Core as CORE;
+} from './workflows/core_pipeline.nf'
 
-    output:
-    file("${replicate_id}.qc1") into ch_fastqc_raw_for_assembly
 
-    script:
-    """
-        echo "mkdir ${replicate_id} ; fastqc --nogroup --quiet -o ${replicate_id} --threads ${task.cpus} ${reads[0]} ${reads[1]}" > ${replicate_id}.qc1
-    """
-}
+// -----------------------------
 
-/*
- * STEP 3 - Fake assembly
- */
-process assembly {
-    input:
-    file (qc) from ch_fastqc_raw_for_assembly
-    set replicate_id, file(reads) from ch_read_files_for_assembly
-
-    output:
-    file("${replicate_id}.assembly") into ch_assembly_for_multiqc
-
-    script:
-    """
-        echo "ASSEMBLY ${replicate_id} ; " > ${replicate_id}.assembly
-    """
-}
+createDir = file(params.outdir).mkdir()
+println createDir ? "Creation du dossier "+ params.outdir : "Le dossier "+params.outdir + " existe deja."
 
-process workflow_summary {
-  
-  output:
-  file 'workflow_summary_mqc.yaml' into ch_workflow_summary_yaml
-
-  exec:
-  def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
-  yaml_file.text  = """
-  id: 'summary'
-  description: " - this information is collected when the pipeline is started."
-  section_name: 'Workflow Summary'
-  section_href: "${workflow.manifest.homePage}"
-  plot_type: 'html'
-  data: |
-        <dl class=\"dl-horizontal\">
-  ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
-        </dl>
-  """.stripIndent()
+// -----------------------------
+workflow {
+	//test(my_data_miseq, my_data_novaseq)
+	//test.out.samplesheet.view()
+	CORE(ch_ss, /*ch_ngl, ch_runInfo, mismatchNumber, -*/ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta/*, params.raw_data*/ )
+	//println banksForConta
+	//ch_read.view()
 }
 
-/*
- * STEP - MultiQC
- */
-process multiqc {
-  
-  publishDir "${params.outdir}/MultiQC", mode: 'copy'
-
-  when:
-  !params.skip_multiQC
-
-  input:
-  file (multiqc_config) from ch_multiqc_config
-  file ('fastqc/*') from ch_fastqc_results_for_multiqc.collect().ifEmpty([])
-  // TODO get-nf: Add in log files from your new processes for MultiQC to find!
-  file ('software_versions/*') from software_versions_yaml.collect()
-  file ('workflowSummary/*') from ch_workflow_summary_yaml.collect()
-
-  output: 
-  file "*report.html" into ch_multiqc_report
-  file "*_data"
-  file "multiqc_plots"
-
-  script:
-  rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
-  rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
-  """
-  multiqc -f $rtitle $rfilename  --config $multiqc_config .
-  """
-}
 
 /*
- * STEP - Output Description HTML
- */
-process output_documentation {
-    publishDir "${params.outdir}/pipeline_info", mode: 'copy'
-
-    input:
-    file output_docs from ch_output_docs
+workflow {
+	CORE_preprocessing(data)
+	CORE_demultiplexage(data)
+	CORE_filter(data)
+	QC(Core.out)
+}
 
-    output:
-    file "results_description.html"
 
-    script:
-    """
-    pandoc $output_docs -t html -o results_description.html
-    """
-}
+*/
 
-/*
- * Completion e-mail notification
- */
-workflow.onComplete {
-
-    // Set up the e-mail variables
-    def name_wf = workflow.manifest.name
-    def subject = "[$name_wf] Successful: $workflow.runName"
-    if (!workflow.success) {
-      subject = "[$name_wf] FAILED: $workflow.runName"
-    }
-    def email_fields = [:]
-    email_fields['version'] = workflow.manifest.version
-    email_fields['runName'] = custom_runName ?: workflow.runName
-    email_fields['success'] = workflow.success
-    email_fields['dateComplete'] = workflow.complete
-    email_fields['duration'] = workflow.duration
-    email_fields['exitStatus'] = workflow.exitStatus
-    email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
-    email_fields['errorReport'] = (workflow.errorReport ?: 'None')
-    email_fields['commandLine'] = workflow.commandLine
-    email_fields['projectDir'] = workflow.projectDir
-    email_fields['summary'] = summary
-    println(workflow)
-    
-    email_fields['summary']['Date Started'] = 11 // workflow.start
-    email_fields['summary']['Date Completed'] = 11 // workflow.complete
-    email_fields['summary']['Pipeline script file path'] = 'aaa' //workflow.scriptFile
-    email_fields['summary']['Pipeline script hash ID'] = 'aaa' //workflow.scriptId 
-    if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
-    if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
-    if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
-    if (workflow.container) email_fields['summary']['Docker image'] = workflow.container
-    email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
-    email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
-    email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
-
-    // Check if we are only sending emails on failure
-    email_address = params.email
-    if (!params.email && params.email_on_fail && !workflow.success) {
-        email_address = params.email_on_fail
-    }
-
-    // Render the TXT template
-    def engine = new groovy.text.GStringTemplateEngine()
-    def tf = new File("$baseDir/assets/email_template.txt")
-    def txt_template = engine.createTemplate(tf).make(email_fields)
-    def email_txt = txt_template.toString()
-    
-    // Send the HTML e-mail
-    if (email_address) {
-      // Catch failures and try with plaintext
-      [ 'mail', '-s', subject, email_address ].execute() << email_txt
-      log.info "[$name_wf] Sent summary e-mail to $email_address (mail)"
-      log.info "$email_txt"
-    }
-
-    // Write summary e-mail HTML to a file
-    def output_d = new File( "${params.outdir}/pipeline_info/" )
-    if (!output_d.exists()) {
-      output_d.mkdirs()
-    }
-    def output_tf = new File( output_d, "pipeline_report.txt" )
-    output_tf.withWriter { w -> w << email_txt }
-    c_green = params.monochrome_logs ? '' : "\033[0;32m";
-    c_purple = params.monochrome_logs ? '' : "\033[0;35m";
-    c_red = params.monochrome_logs ? '' : "\033[0;31m";
-    c_reset = params.monochrome_logs ? '' : "\033[0m";
-
-    if (workflow.stats.ignoredCount > 0 && workflow.success) {
-      log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}"
-      log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}"
-      log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}"
-    }
-    if (workflow.success) {
-        log.info "-${c_purple}[${name_wf}]${c_green} Pipeline completed successfully${c_reset}"
-    } else {
-        log.info "-${c_purple}[${name_wf}]${c_red} Pipeline completed with errors${c_reset}"
-    }
 
+workflow test {
+	// input channels
+	take:
+		input_ch_m
+		input_ch_n
+	
+	main:
+		FOO(input_ch_m, input_ch_n)
+		//DECOUPE_SS(input_ch)
+		//FOO.out.view()
+	// outputs
+	//emit:
+		//samplesheet = DECOUPE_SS.out
+		//my_output_2 = process_2.out
+	
 }
\ No newline at end of file
-- 
GitLab


From 89748dc010c90fb6dcabbf568c18781a996945d3 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Aug 2021 15:01:31 +0200
Subject: [PATCH 09/51] NGL-Bi processes in independant module #4

---
 modules/module_NGL-Bi.nf | 54 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 modules/module_NGL-Bi.nf

diff --git a/modules/module_NGL-Bi.nf b/modules/module_NGL-Bi.nf
new file mode 100644
index 0000000..654615f
--- /dev/null
+++ b/modules/module_NGL-Bi.nf
@@ -0,0 +1,54 @@
+params.outdir=''
+
+
+process prepareReadSetCreation {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
+	
+	input:
+		path sampleSheet
+		path runNGLBiCreated
+		
+	output:
+		file 'readSetCreation.info'
+		
+	script:
+	"""
+		extractInfoForReadSets.pl --sampleSheet $sampleSheet --runNGLBi $runNGLBiCreated
+	"""
+}
+
+process readsetNGLBiCreation {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
+	
+	executor = 'local'
+	beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
+	errorStrategy = { 'ignore' }
+	
+	input :
+		path infoFile
+	
+	output :
+		path 'ReadsetsNGL-Bi.created', emit: readSetFile
+		path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
+
+	script :
+	"""
+		createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
+		
+	"""
+}
+
+process checkErrorFromNGLBi {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
+	
+	input:
+		path logFile
+		
+	output:
+		path 'ReadsetsNGL-BiCreation.log'
+		
+	script:
+	"""
+		checkErrorNGLScripts.pl --file $logFile
+	"""
+}
\ No newline at end of file
-- 
GitLab


From c9906ffb7eeaf03c72f2b7d0e7dbe310909fcee0 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Aug 2021 15:02:23 +0200
Subject: [PATCH 10/51] New script to make stats after contaSearch #5

---
 bin/contaCounter.pl | 95 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 bin/contaCounter.pl

diff --git a/bin/contaCounter.pl b/bin/contaCounter.pl
new file mode 100644
index 0000000..36bd328
--- /dev/null
+++ b/bin/contaCounter.pl
@@ -0,0 +1,95 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ contaCounter.pl
+ 
+=head1 DESCRIPTION
+
+ Make statistics on samtools outputs
+ 
+=head1 SYNOPSIS
+
+ contacounter.pl <pahto_to_folder>
+
+=head1 OPTIONS
+
+
+ 
+=head1 EXEMPLES
+
+ perl countaCounter.pl ./
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+##################################################################
+#
+#						INITIALISATION
+#
+##################################################################
+my @files = glob($ARGV[0]."*.txt");
+#my @files = glob("/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x/CheckContamination/*.txt");
+
+#print "FILE : @files\n";
+
+if ($#files == 0) {
+	print STDERR "[Erreur] Le repertoire $ARGV[0] ne contient aucun fichiers !\n";
+	exit 5;
+}
+
+my %hash;
+
+##################################################################
+#
+#							MAIN
+#
+##################################################################
+
+foreach my $file (@files) {
+	my $simpleFile = basename($file,  ".txt");
+	
+	# Extraction nom contaminant
+	my @simpleNameToSplit = split("_", $simpleFile);
+	my $contaminant = $simpleNameToSplit[-1];
+	
+	# Extraction nom echantillon
+	@simpleNameToSplit = split("_${contaminant}", $simpleFile);
+	my $sampleName = $simpleNameToSplit[0];
+	my ($shortSampleName, $direction) = ($sampleName =~ m/(^[0-9a-zA-Z]*).*(R[1,2])/g);
+	
+	# Comptage 
+	my $count = `wc -l $file | cut -d' ' -f1`;
+	
+	# Ajout dans le hash
+	$hash{"$shortSampleName($direction)"}{$contaminant}=$count;
+}
+
+# Extract info from hash
+my $contentToYAML = "Statistics from contamination search.\n";
+foreach my $sample (keys(%hash)) {
+	$contentToYAML.="$sample:\n";
+	foreach my $conta (keys($hash{$sample})){
+		$contentToYAML.="\t${conta}:$hash{$sample}{$conta}";
+	}
+}
+
+# Print info to file
+open(my $fh, '>', "summary.yaml") or exit 1;
+print $fh $contentToYAML;
+close $fh;
-- 
GitLab


From 61ecae459fd1efe05364b098e47788534ace96ca Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Aug 2021 15:02:52 +0200
Subject: [PATCH 11/51] Worked on #4 and #5

---
 workflows/core_pipeline.nf | 54 +++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/workflows/core_pipeline.nf b/workflows/core_pipeline.nf
index 997a1bc..906edcd 100644
--- a/workflows/core_pipeline.nf
+++ b/workflows/core_pipeline.nf
@@ -9,20 +9,23 @@ params.sequencer=''
 banksForConta = [ ]
 
 include {
-	prepareReadSetCreation;
-	readsetNGLBiCreation;
-	checkErrorFromNGLBi;
 	maskMaker;
 	bcl2fastq;
 	extractInfoForDemuxStats;
 	demultiplexStats;
 	fastqc;
 	illuminaFilter;
-	check_conta_bwa;
-	check_conta_samtools;
+	search_conta_bwa as align;
+	search_conta_samtools as filter;
+	search_conta_summary as summary;
 } from '../modules/module_core.nf'
 
 
+include {
+	prepareReadSetCreation;
+	readsetNGLBiCreation as readsetCreation;
+	checkErrorFromNGLBi as checkError;
+} from '../modules/module_NGL-Bi.nf'
 
 //-------------------------------------------------
 
@@ -32,7 +35,7 @@ isResume=workflow.resume
 
 //-------------------------------------------------
 
-workflow Preprocessing {
+workflow NGLBi_readsets {
 	/*
 	 * Decoupage samplesheet -> non
 	 * Creation readsets NGL-Bi -> oui !!
@@ -47,8 +50,8 @@ workflow Preprocessing {
 	main:
 	//if inNGL && (!isResume || forceNewReadset) {
 		prepareReadSetCreation(sampleSheet, runNGLBiCreated)
-		readsetNGLBiCreation(prepareReadSetCreation.out)
-		checkErrorFromNGLBi(readsetNGLBiCreation.out.readSetLog)
+		readsetCreation(prepareReadSetCreation.out)
+		checkError(readsetNGLBiCreation.out.readSetLog)
 	//}
 }
 
@@ -69,28 +72,25 @@ workflow Demultiplexage {
 }
 
 workflow DemuxStat_10x {
-	// creation du fichier Project.numberIndex avec extractInfoForDemuxStats.pl
-	// Extraction des stats avec demuxStatsFromXML.R
 	take:
 		SampleSheet
 		DemuxStatXML
 		DemuxSummary
-		//Read
 		
 	main:
 		extractInfoForDemuxStats(SampleSheet)
 		demultiplexStats(DemuxStatXML, extractInfoForDemuxStats.out, DemuxSummary)
-		//fastqc(Read)
 }
 
-workflow Check_conta {
-	// Liste des genomes
-	// pour chaque elem de list_Genomes, faire 
-	// check_conta_bwa(elem, channel.reads)
-	// check_conta_samtools(elem, check_conta_bwa.out)
-
-//alignement BWA
-//SAMTOOLS
+workflow Search_conta {
+	take:
+		ch_read
+		banksForConta
+	
+	main:
+		align(ch_read, banksForConta)
+		filter(align.out)
+		summary(filter.out.collect())
 }
 
 workflow Core {
@@ -106,13 +106,17 @@ workflow Core {
 		//rawdata_location
 		
 	main:
-		//Preprocessing(ch_sampleSheet, ch_runNGLBiCreated)
+		//NGLBi_readsets(ch_sampleSheet, ch_runNGLBiCreated)
 		//Demultiplexage(ch_sampleSheet, ch_RunInfoXML, mismatchNumber, rawdata_location)	// A voir plus tard !
+		
+		// ----------- DemultiplexStat
 		if (params.chemistry == '10X') {
 			//DemuxStat_10x(ch_sampleSheet, ch_DemuxStatXML, ch_DemuxSummary)
 		} else {
 			println "Les donnÃ©es ne sont pas 10X !"
 		}
+		
+		// ----------- Illumina Filter 
 		if (params.sequencer == 'NovaSeq' & params.isMultiplex) {
 			println "Les donnÃ©es ne nÃ©cessite pas de passer par IlluminaFilter"
 			ch_read_good = ch_read
@@ -120,10 +124,12 @@ workflow Core {
 			illuminaFilter(ch_read)
 			ch_read_good = illuminaFilter.out.reads
 		}
+		
+		// ----------- FASTQC
 		//fastqc(ch_read_good)
-		check_conta_bwa(ch_read_good, banksForConta)
-		check_conta_samtools(check_conta_bwa.out)
-		//checkConta
+		
+		// ----------- CheckContamination
+		Search_conta(ch_read_good, banksForConta)
 }
 /*
 workflow core {
-- 
GitLab


From 2b810935f54fc771930ea2aff3ec43c1f83d922b Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Aug 2021 15:03:21 +0200
Subject: [PATCH 12/51] Worked on #4 and #5

---
 modules/module_core.nf | 90 +++++++++++++-----------------------------
 1 file changed, 28 insertions(+), 62 deletions(-)

diff --git a/modules/module_core.nf b/modules/module_core.nf
index 8658f07..1df6b53 100644
--- a/modules/module_core.nf
+++ b/modules/module_core.nf
@@ -23,57 +23,7 @@ process decoupageSS {
 	"""
 }
 
-process prepareReadSetCreation {
-	publishDir path: "${params.outdir}" , mode: 'copy'
-	
-	input:
-		path sampleSheet
-		path runNGLBiCreated
-		
-	output:
-		file 'readSetCreation.info'
-		
-	script:
-	"""
-		extractInfoForReadSets.pl --sampleSheet $sampleSheet --runNGLBi $runNGLBiCreated
-	"""
-}
-
-process readsetNGLBiCreation {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
-	
-	executor = 'local'
-	beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
-	errorStrategy = { 'ignore' }
-	
-	input : 
-		path infoFile
-	
-	output : 
-		path 'ReadsetsNGL-Bi.created', emit: readSetFile
-		path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
 
-	script :
-	"""
-		createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
-		
-	"""
-}
-
-process checkErrorFromNGLBi {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
-	
-	input:
-		path logFile
-		
-	output:
-		path 'ReadsetsNGL-BiCreation.log'
-		
-	script:
-	"""
-		checkErrorNGLScripts.pl --file $logFile
-	"""
-}
 
 process maskMaker {
 	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
@@ -93,7 +43,7 @@ process maskMaker {
 }
 
 process bcl2fastq {
-	publishDir path: "${params.outdir}/Demux/Files" , mode: 'copy'
+	publishDir path: "${params.outdir}/Demux/Reads" , mode: 'copy'
 	
 	echo=true
 	
@@ -115,7 +65,7 @@ process bcl2fastq {
 }
 
 process extractInfoForDemuxStats {
-	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
+	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
 	
 	input:
 		path SampleSheet
@@ -131,7 +81,7 @@ process extractInfoForDemuxStats {
 }
 
 process demultiplexStats {
-	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
+	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
 	
 	module 'system/R-4.0.4_gcc-9.3.0'
 	
@@ -202,12 +152,12 @@ process illuminaFilter {
 	
 }
 
-process check_conta_bwa {
+process search_conta_bwa {
 	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
 
 	module 'bioinfo/bwa-0.7.17'
 	time { 20.m * task.attempt }
-	memory { 10.GB * task.attempt }
+	memory { 5.GB * task.attempt }
 	
 	input:
 		tuple val(name), path(read)
@@ -222,26 +172,42 @@ process check_conta_bwa {
 		
 		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
 	"""
-	//
 }
 
-process check_conta_samtools {
-	publishDir path: "${params.outdir}/CheckContamination" , mode: 'copy'
+process search_conta_samtools {
+	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
 	
 	module 'bioinfo/samtools-1.9'
 	time { 10.m * task.attempt }
 	
 	input:
-		tuple val(name), path("*")	
-		
+		tuple val(name), path("*")
+	
+	output:
+		//tuple val("$name"), path("*")
+		path("*")
+	
 	script:
 	"""
 		samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
 	"""
+}
+
+process search_conta_summary {
+	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
 	
+	input:
+		//tuple val(name), path("*")
+		path("*")
+		
+	output:
+		path("*.yaml")
+		
+	script:
+	"""
+		contaCounter.pl ./
+	"""
 	
-
-
 }
 
 
-- 
GitLab


From 3f1975295603811b8a008c8b0cac72d3765c041f Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Aug 2021 15:04:04 +0200
Subject: [PATCH 13/51] Minor changes for #5

---
 main.nf | 37 +------------------------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/main.nf b/main.nf
index 3dcb1fb..3c1ff42 100644
--- a/main.nf
+++ b/main.nf
@@ -65,24 +65,18 @@ ch_ss = Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLo
 ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
 ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
 ch_read=Channel
-	.fromPath(params.data+'/TregThymus/**_R1_*.fastq.gz')
+	.fromPath(params.data+'/TregThymus/1ADT_S1_L001_R{1,2}_001.fastq.gz')
 	//.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
 	.map{$it -> [$it.simpleName, $it]}
 	.groupTuple()
 
-
-//banksForConta= [ file('/work/bank/bwadb/phi', followLinks: true), file('/work/bank/bwadb/ecoli536', followLinks: false), file('/work/bank/bwadb/yeast', followLinks: false), file('/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Betacoronavirus_SARSr-CoV/SARS-CoV-2/genome/BWA/nCoV-2019.reference', followLinks: false) ]
 banksForConta= [ '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/ecoli536', '/work/bank/bwadb/yeast.nt', '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Betacoronavirus_SARSr-CoV/SARS-CoV-2/genome/BWA/nCoV-2019.reference.fasta']
-	
-
-
 
 // -----------------------------
 // 	   Includes AFTER params !!
 // -----------------------------
 include { bar as FOO } from './modules/module_test.nf'
 include {
-	Preprocessing as Preprocess;
 	Core as CORE;
 } from './workflows/core_pipeline.nf'
 
@@ -101,32 +95,3 @@ workflow {
 	//ch_read.view()
 }
 
-
-/*
-workflow {
-	CORE_preprocessing(data)
-	CORE_demultiplexage(data)
-	CORE_filter(data)
-	QC(Core.out)
-}
-
-
-*/
-
-
-workflow test {
-	// input channels
-	take:
-		input_ch_m
-		input_ch_n
-	
-	main:
-		FOO(input_ch_m, input_ch_n)
-		//DECOUPE_SS(input_ch)
-		//FOO.out.view()
-	// outputs
-	//emit:
-		//samplesheet = DECOUPE_SS.out
-		//my_output_2 = process_2.out
-	
-}
\ No newline at end of file
-- 
GitLab


From 5db09526fac1aa3b0fcbf8ba4ede2e2d1a922fbc Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Aug 2021 15:19:14 +0200
Subject: [PATCH 14/51] Rename output directories

---
 modules/module_core.nf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/module_core.nf b/modules/module_core.nf
index 1df6b53..7686f85 100644
--- a/modules/module_core.nf
+++ b/modules/module_core.nf
@@ -102,7 +102,7 @@ process demultiplexStats {
 }
 
 process fastqc {
-	publishDir path: "${params.outdir}/FastQC" , mode: 'copy'
+	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy'
 	
 	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
 	maxRetries 3
@@ -196,6 +196,9 @@ process search_conta_samtools {
 process search_conta_summary {
 	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
 	
+	time { 10.m * task.attempt }
+	memory '1.GB'
+	
 	input:
 		//tuple val(name), path("*")
 		path("*")
-- 
GitLab


From 2f0062dfcdab91d25a7480ee458a5938f203831c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 4 Jan 2022 09:17:28 +0100
Subject: [PATCH 15/51] #8 Add lists of genomes for contamination search

---
 nextflow.config | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 87e3584..5aa1549 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -4,7 +4,13 @@
  * -------------------------------------------------
  * Default config options for all environments.
  */
-
+process{
+	 executor = 'slurm'
+	 queue = 'wflowq'
+	 time='1h'
+	 cpus = 1
+	 memory = 2.GB
+}
 // Global default params, used in configs
 params {
 
@@ -13,7 +19,9 @@ params {
   inputdir = "./data"
   samplesheet = "${params.inputdir}/samples.csv"
   single_end = false
-  outdir = './results'
+  outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_global'
+  genomesRefForConta = [ '/work/bank/bwadb/Escherichia_coli_FRIK2069', '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/yeast.nt' ]
+  addBankForConta = ''		// Ajout ponctuel d'un ou plusieurs genomes
   skip_multiQC = false
 
   // Boilerplate options
-- 
GitLab


From 82900b1014b1c011a88f54c1e9599f8a29685153 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 4 Jan 2022 09:21:15 +0100
Subject: [PATCH 16/51] Fix error if no undetermined sequence is found

---
 bin/demuxStatsFromXML.R | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R
index 63d77a6..f250311 100644
--- a/bin/demuxStatsFromXML.R
+++ b/bin/demuxStatsFromXML.R
@@ -166,20 +166,25 @@ cat("\tRÃ©sumÃ© des inforamtions extraites :\n")
 cat(paste0("\tNombre d'index indÃ©terminÃ©s retrouvÃ©s :\t", dim(tabUndetermined)[1], "\n"))
 head(tabUndetermined)
 
+
 # Construction du dataFrame pour intÃ©gration Ã  df2
 df2.Projects<-unique(df2$Project)
 myProject<-df2.Projects[which(df2.Projects != "default")]
 
 ### Pour chaque ligne de tabUndertermined, on ajoute une ligne Ã  df2 :
-df.tabUndetermined<-data.frame()
-for (i in 1:dim(tabUndetermined)[1]) {
-	df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
-	df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
+if (dim(tabUndetermined)[1] != 0) {
+	df.tabUndetermined<-data.frame()
+	for (i in 1:dim(tabUndetermined)[1]) {
+		df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
+		df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
+	}
+	
+	df2<-concat_df(df2, df.tabUndetermined, vec.names)
+	cat("\tLes index indÃ©terminÃ©s ont Ã©tÃ© ajoutÃ© au data.table.\n")
+} else {
+	cat("\tAuncun index indÃ©terminÃ©s trouvÃ©s.\n")
 }
 
-df2<-concat_df(df2, df.tabUndetermined, vec.names)
-cat("\tLes index indÃ©terminÃ©s ont Ã©tÃ© ajoutÃ© au data.table.\n")
-
 ## Soustraction des undertermined aux allOthers
 # recuperer les Count de tabUndetermined et soustraire la somme Ã  df2[which(df2$Project == "default"), "bcCount"]
 cat("\nQuelques calculs sur les donnÃ©es avant de les exporter.\n")
-- 
GitLab


From 4308120eab4dbc680d7bad9601c597c275515341 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 4 Jan 2022 09:24:24 +0100
Subject: [PATCH 17/51] Move files to NF-CORE organisation

---
 main.nf                                       | 111 ++++----------
 modules/{ => local}/module_NGL-Bi.nf          |   0
 modules/{ => local}/module_core.nf            |   7 +-
 modules/{ => local}/module_dna.nf             |   0
 modules/local/module_reports.nf               |  35 +++++
 modules/{ => local}/module_test.nf            |   0
 .../local/10X_qc.nf                           |   0
 .../local}/core_pipeline.nf                   |  33 +----
 .../local/diversity_qc.nf                     |   0
 {workflows => sub-workflows/local}/dna_qc.nf  |   0
 sub-workflows/local/rna_qc.nf                 |   0
 workflow/illumina_qc.nf                       | 139 ++++++++++++++++++
 12 files changed, 211 insertions(+), 114 deletions(-)
 rename modules/{ => local}/module_NGL-Bi.nf (100%)
 rename modules/{ => local}/module_core.nf (89%)
 rename modules/{ => local}/module_dna.nf (100%)
 create mode 100644 modules/local/module_reports.nf
 rename modules/{ => local}/module_test.nf (100%)
 rename workflows/diversity_qc.nf => sub-workflows/local/10X_qc.nf (100%)
 rename {workflows => sub-workflows/local}/core_pipeline.nf (77%)
 rename workflows/rna_qc.nf => sub-workflows/local/diversity_qc.nf (100%)
 rename {workflows => sub-workflows/local}/dna_qc.nf (100%)
 create mode 100644 sub-workflows/local/rna_qc.nf
 create mode 100644 workflow/illumina_qc.nf

diff --git a/main.nf b/main.nf
index 3c1ff42..4ec72b3 100644
--- a/main.nf
+++ b/main.nf
@@ -1,97 +1,44 @@
 #!/usr/bin/env nextflow
 
-nextflow.enable.dsl=2
-
-//include { foo } from './some/module'
-
-//------------------------------
+nextflow.enable.dsl = 2
 
 /*
- * WORKFLOWS
- * Sub-workflows
- * processes
- */
-
+Copyright INRAE 2021
+
+This software is a computer program whose purpose is to
+analyze high-throughput sequencing data.
+You can use, modify and/ or redistribute the software under the terms
+of license (see the LICENSE file for more details).
+The software is distributed in the hope that it will be useful,
+but "AS IS" WITHOUT ANY WARRANTY OF ANY KIND.
+Users are therefore encouraged to test the software's suitability as regards
+their requirements in conditions enabling the security of their systems and/or data.
+The fact that you are presently reading this means that you have had knowledge
+of the license and that you accept its terms.
+This script is based on : 
+ - the nf-core guidelines . See https://nf-co.re/ for more information
+ - the Curie institute template https://github.com/bioinfo-pf-curie/geniac-template/
 
-//include { decoupeSS as DECOUPE_SS } from './modules/module_test.nf'
+*/
 
-// Mettre ca dans des fichiers de config ??
 /*
-if DNA {
-	include { dna_qc as QC } from './workflows/dna_qc.nf'
-}
-if RNA {
-	include { rna_qc as QC } from './workflows/rna_qc.nf'
-}
-if amplicon {
-	if taille_insert dans itervalle {
-		include { diversity_qc as QC } from './workflows/diversity_qc.nf'
-	} else {
-		include { dna_qc as QC } from './workflows/dna_qc.nf'
-	}
-}
+========================================================================================
+    NAMED WORKFLOW FOR PIPELINE
+========================================================================================
 */
-//------------------------------
-/*params.sequencer = 'NovaSeq'
-//params.raw_data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
-params.raw_data = ''
-params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-
-mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1
-
-
-
-my_data_miseq=Channel.fromPath('./data_test/20210713_MISEQ_7_BULKDEMUX_JRCVF.csv')
-my_data_novaseq=Channel.fromPath('./data_test/20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv')
 
+include { ILLUMINA_QC } from './workflow/illumina_qc.nf'
 
-//ch_ss=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/PipelineLogs_Lane1/20210713_MISEQ_7_IEM_JRCVF_Lane1.csv')
-ch_ngl=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunNGL-Bi.created')
-ch_runInfo=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunInfo.xml')
-ch_ss=Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
+workflow QC_ANALYSIS {
+    ILLUMINA_QC()
+}
 
+/*
+========================================================================================
+    RUN ALL WORKFLOWS
+========================================================================================
 */
 
-// ------------- Test 10x ------------ //
-params.sequencer = 'NovaSeq'
-params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-params.raw_data = ''
-params.data = '/work/sbsuser/data/NovaSeq/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-params.isMultiplex = true
-params.chemistry = '10X'
-
-
-ch_ss = Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
-ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
-ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
-ch_read=Channel
-	.fromPath(params.data+'/TregThymus/1ADT_S1_L001_R{1,2}_001.fastq.gz')
-	//.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
-	.map{$it -> [$it.simpleName, $it]}
-	.groupTuple()
-
-banksForConta= [ '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/ecoli536', '/work/bank/bwadb/yeast.nt', '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Betacoronavirus_SARSr-CoV/SARS-CoV-2/genome/BWA/nCoV-2019.reference.fasta']
-
-// -----------------------------
-// 	   Includes AFTER params !!
-// -----------------------------
-include { bar as FOO } from './modules/module_test.nf'
-include {
-	Core as CORE;
-} from './workflows/core_pipeline.nf'
-
-
-// -----------------------------
-
-createDir = file(params.outdir).mkdir()
-println createDir ? "Creation du dossier "+ params.outdir : "Le dossier "+params.outdir + " existe deja."
-
-// -----------------------------
 workflow {
-	//test(my_data_miseq, my_data_novaseq)
-	//test.out.samplesheet.view()
-	CORE(ch_ss, /*ch_ngl, ch_runInfo, mismatchNumber, -*/ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta/*, params.raw_data*/ )
-	//println banksForConta
-	//ch_read.view()
+    QC_ANALYSIS()
 }
-
diff --git a/modules/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
similarity index 100%
rename from modules/module_NGL-Bi.nf
rename to modules/local/module_NGL-Bi.nf
diff --git a/modules/module_core.nf b/modules/local/module_core.nf
similarity index 89%
rename from modules/module_core.nf
rename to modules/local/module_core.nf
index 7686f85..dc17401 100644
--- a/modules/module_core.nf
+++ b/modules/local/module_core.nf
@@ -102,7 +102,8 @@ process demultiplexStats {
 }
 
 process fastqc {
-	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy'
+	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.zip', saveAs: { filename -> "${name}.zip" }
+	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.html', saveAs: { filename -> "${name}.html" }
 	
 	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
 	maxRetries 3
@@ -146,8 +147,7 @@ process illuminaFilter {
 	
 	script:	// la sortie de gzip est redirigÃ©e, donc peut etre que le -c est inutile...
 	"""
-		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.out | gzip -c -f > good.fastq.gz
-		
+		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.out | gzip -c -f > good.fastq.gz	
 	"""
 	
 }
@@ -169,7 +169,6 @@ process search_conta_bwa {
 	script:
 	genomeName=file(genomeRef).simpleName
 	"""
-		
 		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
 	"""
 }
diff --git a/modules/module_dna.nf b/modules/local/module_dna.nf
similarity index 100%
rename from modules/module_dna.nf
rename to modules/local/module_dna.nf
diff --git a/modules/local/module_reports.nf b/modules/local/module_reports.nf
new file mode 100644
index 0000000..397793f
--- /dev/null
+++ b/modules/local/module_reports.nf
@@ -0,0 +1,35 @@
+params.outdir=''
+
+summary = [:]
+
+process workflow_summary {
+	publishDir path: "${params.outdir}/Reports" , mode: 'copy'
+	
+	output:
+	file 'workflow_summary_mqc.yaml'
+  
+	exec:
+	def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
+	yaml_file.text  = """
+  id: 'summary'
+  description: " - this information is collected when the pipeline is started."
+  section_name: 'Workflow Summary'
+  section_href: "${workflow.manifest.homePage}"
+  plot_type: 'html'
+  data: |
+        <dl class=\"dl-horizontal\">
+  ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
+        </dl>
+  """.stripIndent()
+ }
+ 
+ 
+ workflow summary {
+	 take:
+	 	summary
+		 
+	main:
+		workflow_summary(summary)
+	 
+ }
+  
\ No newline at end of file
diff --git a/modules/module_test.nf b/modules/local/module_test.nf
similarity index 100%
rename from modules/module_test.nf
rename to modules/local/module_test.nf
diff --git a/workflows/diversity_qc.nf b/sub-workflows/local/10X_qc.nf
similarity index 100%
rename from workflows/diversity_qc.nf
rename to sub-workflows/local/10X_qc.nf
diff --git a/workflows/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
similarity index 77%
rename from workflows/core_pipeline.nf
rename to sub-workflows/local/core_pipeline.nf
index 906edcd..361f108 100644
--- a/workflows/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -1,10 +1,3 @@
-//params.sequencer = 'MiSeq'
-//params.rawdata_location = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
-
-params.outdir=''
-params.isMultiplex=''
-params.chemistry=''
-params.sequencer=''
 
 banksForConta = [ ]
 
@@ -18,14 +11,14 @@ include {
 	search_conta_bwa as align;
 	search_conta_samtools as filter;
 	search_conta_summary as summary;
-} from '../modules/module_core.nf'
+} from '../../modules/local/module_core.nf'
 
 
 include {
 	prepareReadSetCreation;
 	readsetNGLBiCreation as readsetCreation;
 	checkErrorFromNGLBi as checkError;
-} from '../modules/module_NGL-Bi.nf'
+} from '../../modules/local/module_NGL-Bi.nf'
 
 //-------------------------------------------------
 
@@ -57,7 +50,6 @@ workflow NGLBi_readsets {
 
 
 workflow Demultiplexage {
-	
 	 //ecriture du masque
 	 //demux avec bcl2fastq / cellRanger
 	take:
@@ -111,12 +103,12 @@ workflow Core {
 		
 		// ----------- DemultiplexStat
 		if (params.chemistry == '10X') {
-			//DemuxStat_10x(ch_sampleSheet, ch_DemuxStatXML, ch_DemuxSummary)
+			DemuxStat_10x(ch_sampleSheet, ch_DemuxStatXML, ch_DemuxSummary)
 		} else {
 			println "Les donnÃ©es ne sont pas 10X !"
 		}
 		
-		// ----------- Illumina Filter 
+		// ----------- Illumina Filter  // ou SubsetSeqFiles : dans quel cas on fait l'un ou l'autre ????
 		if (params.sequencer == 'NovaSeq' & params.isMultiplex) {
 			println "Les donnÃ©es ne nÃ©cessite pas de passer par IlluminaFilter"
 			ch_read_good = ch_read
@@ -126,23 +118,8 @@ workflow Core {
 		}
 		
 		// ----------- FASTQC
-		//fastqc(ch_read_good)
+		fastqc(ch_read_good)
 		
 		// ----------- CheckContamination
 		Search_conta(ch_read_good, banksForConta)
 }
-/*
-workflow core {
-	take:
-		ch_sampleSheet
-		ch_runNGLBiCreated
-	
-	main:
-		wf_preprocessing(ch_sampleSheet, ch_runNGLBiCreated)
-		if not noIndex { wf_demultiplexage(data) }
-		pr_illuminaFilter(data) // ou SubsetSeqFiles : dans quel cas on fait l'un ou l'autre ????
-		wf_check_conta(data)
-		pr_fastqc(data)
-		
-	emit:
-}*/
\ No newline at end of file
diff --git a/workflows/rna_qc.nf b/sub-workflows/local/diversity_qc.nf
similarity index 100%
rename from workflows/rna_qc.nf
rename to sub-workflows/local/diversity_qc.nf
diff --git a/workflows/dna_qc.nf b/sub-workflows/local/dna_qc.nf
similarity index 100%
rename from workflows/dna_qc.nf
rename to sub-workflows/local/dna_qc.nf
diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf
new file mode 100644
index 0000000..e69de29
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
new file mode 100644
index 0000000..0a25e4d
--- /dev/null
+++ b/workflow/illumina_qc.nf
@@ -0,0 +1,139 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+def helpMessage() {
+    log.info"""
+
+    Usage:
+
+    The typical command for running the pipeline is as follows:
+
+    nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
+
+    Mandatory arguments:
+      --inputdir                    Path to input directory 
+      -profile                      Configuration profile to use. Can use multiple (comma separated)
+                                    Available: conda, docker, singularity, path, genotoul, test and more.
+
+    Options:
+      --samplesheet                 Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
+      --contaminant                 Name of iGenomes // To be discussed ????
+      --outdir                      The output directory where the results will be saved
+      --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
+      --email_on_fail               Same as --email, except only send mail if the workflow is not successful
+      --maxMultiqcEmailFileSize     Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
+      
+      -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
+
+
+    =======================================================
+    Available profiles
+    -profile test                    Run the test dataset
+    -profile conda                   Build a new conda environment before running the pipeline. Use `--condaCacheDir` to define the conda cache path
+    -profile path                    Use the installation path defined for all tools. Use `--globalPath` to define the installation path
+    -profile docker                  Use the Docker images for each process
+    -profile singularity             Use the singularity images for each process
+    -profile genologin               Run the workflow on the cluster, instead of locally
+
+    """.stripIndent()
+}
+
+// Show help message
+if (params.help) {
+    helpMessage()
+    exit 0
+}
+
+// -------------------------------------------------
+// 					PARAMS
+// -------------------------------------------------
+/*params.sequencer = 'NovaSeq'
+//params.raw_data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
+//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
+
+
+
+
+//my_data_miseq=Channel.fromPath('./data_test/20210713_MISEQ_7_BULKDEMUX_JRCVF.csv')
+//my_data_novaseq=Channel.fromPath('./data_test/20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv')
+
+
+//ch_ss=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/PipelineLogs_Lane1/20210713_MISEQ_7_IEM_JRCVF_Lane1.csv')
+//ch_ngl=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunNGL-Bi.created')
+//ch_runInfo=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunInfo.xml')
+//ch_ss=Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
+
+*/
+
+// ------------- Test 10x ------------ //
+
+params.sequencer = 'NovaSeq'
+params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'	// In config file
+params.raw_data = ''
+params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
+params.isMultiplex = true
+params.chemistry = '10X'
+ch_ss = Channel.fromPath(params.data+'/SampleSheet_global.csv')
+
+
+// ------------- Test MiSeq ------------ //
+/*
+params.sequencer = 'MiSeq'
+//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/211022_M01945_0364_000000000-DB246_rnaseq'	// In config file
+params.raw_data = ''
+params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/211022_M01945_0364_000000000-DB246_rnaseq'
+params.isMultiplex = true
+params.chemistry = 'amplicon'
+*/
+
+
+//ch_ss = Channel.fromPath(params.data+'/SampleSheet.csv')
+ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
+ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
+ch_read=Channel
+	.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
+	//.fromPath(params.data+'/ROME/B20CG-*_R{1,2}_*.fastq.gz')
+	.map{$it -> [$it.simpleName, $it]}
+	.groupTuple()
+
+
+
+mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
+
+banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
+
+createDir = file(params.outdir).mkdir()
+
+// -------------------------------------------------
+// 					INCLUDES
+// -------------------------------------------------
+// Mettre ca dans des fichiers de config ??
+/*
+if DNA {
+	include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
+}
+if RNA {
+	include { rna_qc as QC } from '../sub-workflows/local/rna_qc.nf'
+}
+if amplicon {
+	if taille_insert dans itervalle {
+		include { diversity_qc as QC } from '../sub-workflows/local/diversity_qc.nf'
+	} else {
+		include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
+	}
+}
+*/
+include { Core as CORE	} from '../sub-workflows/local/core_pipeline.nf'
+
+// -------------------------------------------------
+// 					WORKFLOW
+// -------------------------------------------------
+workflow ILLUMINA_QC {
+
+	CORE(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta )		/*ch_ngl, ch_runInfo, mismatchNumber,  params.raw_data*/
+
+}
+
+
+
-- 
GitLab


From 9a338211a04cbdce3ea96d79859e35e438c7705f Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 16 Feb 2022 16:08:04 +0100
Subject: [PATCH 18/51] Add FastQ-Screen module #11

---
 assets/fastq_screen.conf             | 64 ++++++++++++++++++++++++++++
 modules/local/module_core.nf         | 55 ++++++++++++++++++++----
 sub-workflows/local/core_pipeline.nf | 34 +++++++++++----
 3 files changed, 136 insertions(+), 17 deletions(-)
 create mode 100644 assets/fastq_screen.conf

diff --git a/assets/fastq_screen.conf b/assets/fastq_screen.conf
new file mode 100644
index 0000000..78180ae
--- /dev/null
+++ b/assets/fastq_screen.conf
@@ -0,0 +1,64 @@
+# This is an example configuration file for FastQ Screen
+
+############################
+## Bowtie, Bowtie 2 or BWA #
+############################
+## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set 
+## this value to tell the program where to find your chosen aligner.  Uncomment 
+## the relevant line below and set the appropriate location.  Please note, 
+## this path should INCLUDE the executable filename.
+
+#BOWTIE	/usr/local/bin/bowtie/bowtie
+#BOWTIE2 /usr/local/bioinfo/src/bowtie/bowtie2-2.4.4-linux-x86_64/bowtie2
+BWA /usr/local/bioinfo/src/bwa/bwa-0.7.15/bwa
+
+############################################
+## Bismark (for bisulfite sequencing only) #
+############################################
+## If the Bismark binary is not in your PATH then you can set this value to 
+## tell the program where to find it.  Uncomment the line below and set the 
+## appropriate location. Please note, this path should INCLUDE the executable 
+## filename.
+
+#BISMARK	/usr/local/bin/bismark/bismark
+
+############
+## Threads #
+############
+## Genome aligners can be made to run across multiple CPU cores to speed up 
+## searches.  Set this value to the number of cores you want for mapping reads.
+
+THREADS		8
+
+##############
+## DATABASES #
+##############
+## This section enables you to configure multiple genomes databases (aligner index 
+## files) to search against in your screen.  For each genome you need to provide a 
+## database name (which can't contain spaces) and the location of the aligner index 
+## files.
+##
+## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
+## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
+## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.) 
+## are found in a folder named 'GRCh37'.
+##
+## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in 
+## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices.  The index 
+## used will be the one compatible with the chosen aligner (as specified using the 
+## --aligner flag).  
+##
+## The entries shown below are only suggested examples, you can add as many DATABASE 
+## sections as required, and you can comment out or remove as many of the existing 
+## entries as desired.  We suggest including genomes and sequences that may be sources 
+## of contamination either because they where run on your sequencer previously, or may 
+## have contaminated your sample during the library preparation step.
+##
+Genome of E. coli
+DATABASE	E.coli	/work/bank/bwadb/Escherichia_coli_FRIK2069
+
+Sequence of PhiX
+DATABASE	PhiX	/work/bank/bwadb/phi.fa
+
+Genome of yeast
+DATABASE	Yeast	/work/bank/bwadb/yeast.nt
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index dc17401..d037bcb 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -102,7 +102,7 @@ process demultiplexStats {
 }
 
 process fastqc {
-	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.zip', saveAs: { filename -> "${name}.zip" }
+	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.zip', saveAs: { filename -> "${name}_fastqc.zip" }
 	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.html', saveAs: { filename -> "${name}.html" }
 	
 	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
@@ -129,7 +129,7 @@ process fastqc {
 
 
 process illuminaFilter {
-	publishDir path: "${params.outdir}/IlluminaFilter" , mode: 'copy', saveAs: { filename -> "${name}.fastq.gz" }
+	publishDir path: "${params.outdir}/IlluminaFilter" , mode: 'copy', pattern: '*.gz'/*, saveAs: { filename -> "${name}.fastq.gz" }*/
 	
 	module 'bioinfo/fastq_illumina_filter-0.1'
 	executor 'slurm'
@@ -143,18 +143,18 @@ process illuminaFilter {
 	
 	output:
 		tuple val("$name"), path("*.fastq.gz"), emit: reads
-		path "*out", emit: log
+		path("*.output"), emit: log
 	
-	script:	// la sortie de gzip est redirigÃ©e, donc peut etre que le -c est inutile...
+	script:
 	"""
-		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.out | gzip -c -f > good.fastq.gz	
+		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.output | gzip -c -f > ${name}_filtered.fastq.gz	
 	"""
 	
 }
 
 process search_conta_bwa {
 	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
-
+	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
 	module 'bioinfo/bwa-0.7.17'
 	time { 20.m * task.attempt }
 	memory { 5.GB * task.attempt }
@@ -164,7 +164,7 @@ process search_conta_bwa {
 		each genomeRef
 		
 	output:
-		tuple val("${name}_${genomeName}"), path("*")
+		tuple val("${name}_${genomeName}"), path("${name}_${genomeName}.sam"), emit: sam
 		
 	script:
 	genomeName=file(genomeRef).simpleName
@@ -173,6 +173,26 @@ process search_conta_bwa {
 	"""
 }
 
+process BWA_ALIGNMENT {
+	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
+	
+	tag " $sample"
+	
+	input:
+		tuple val(sample), path(reads)
+		each genomeRef
+		
+	output:
+		//tuple val(sample), path("*.log"), emit: log
+		tuple val("${sample}_${genomeName}"), path("${sample}_${genomeName}.sam"), emit: sam
+		
+	script:
+	genomeName=file(genomeRef).simpleName
+	"""
+		bwa mem ${genomeRef} ${reads} 1> ${sample}_${genomeName}.sam 2> ${sample}.log
+	"""
+}
+
 process search_conta_samtools {
 	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
 	
@@ -184,7 +204,7 @@ process search_conta_samtools {
 	
 	output:
 		//tuple val("$name"), path("*")
-		path("*")
+		path("*.txt")
 	
 	script:
 	"""
@@ -209,7 +229,24 @@ process search_conta_summary {
 	"""
 		contaCounter.pl ./
 	"""
-	
 }
 
 
+process FASTQSCREEN {
+	publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy'
+	
+	module 'bioinfo/FastQ-Screen-0.15.2'
+	
+	input:
+		tuple val(sample), path(reads)
+	
+	output:
+		tuple val(sample), path("*.txt"), emit: file
+	
+	script:
+	"""
+		fastq_screen $reads --conf $launchDir/../fastq_screen.conf
+	"""
+}
+
+
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 361f108..3017477 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -8,9 +8,10 @@ include {
 	demultiplexStats;
 	fastqc;
 	illuminaFilter;
-	search_conta_bwa as align;
-	search_conta_samtools as filter;
-	search_conta_summary as summary;
+	//BWA_ALIGNMENT as align;	//search_conta_bwa		//BWA_ALIGNMENT
+	//search_conta_samtools as filter;
+	//search_conta_summary as summary;
+	FASTQSCREEN;
 } from '../../modules/local/module_core.nf'
 
 
@@ -74,6 +75,7 @@ workflow DemuxStat_10x {
 		demultiplexStats(DemuxStatXML, extractInfoForDemuxStats.out, DemuxSummary)
 }
 
+/*
 workflow Search_conta {
 	take:
 		ch_read
@@ -81,9 +83,24 @@ workflow Search_conta {
 	
 	main:
 		align(ch_read, banksForConta)
-		filter(align.out)
+		filter(align.out.sam)
 		summary(filter.out.collect())
 }
+*/
+
+/*
+workflow Search_conta_debug {
+	take:
+		ch_read
+		banksForConta
+	
+	main:
+		illuminaFilter(ch_read)
+		fastqc(illuminaFilter.out.reads)
+		Search_conta(illuminaFilter.out.reads, banksForConta)
+}
+*/
+
 
 workflow Core {
 	take:
@@ -105,12 +122,12 @@ workflow Core {
 		if (params.chemistry == '10X') {
 			DemuxStat_10x(ch_sampleSheet, ch_DemuxStatXML, ch_DemuxSummary)
 		} else {
-			println "Les donnÃ©es ne sont pas 10X !"
+			System.out.println "Les donnÃ©es ne sont pas 10X !"
 		}
 		
 		// ----------- Illumina Filter  // ou SubsetSeqFiles : dans quel cas on fait l'un ou l'autre ????
 		if (params.sequencer == 'NovaSeq' & params.isMultiplex) {
-			println "Les donnÃ©es ne nÃ©cessite pas de passer par IlluminaFilter"
+			System.out.println "Les donnÃ©es ne nÃ©cessite pas de passer par IlluminaFilter"
 			ch_read_good = ch_read
 		} else {	// Si MiSeq ou Nova + noIndex
 			illuminaFilter(ch_read)
@@ -120,6 +137,7 @@ workflow Core {
 		// ----------- FASTQC
 		fastqc(ch_read_good)
 		
-		// ----------- CheckContamination
-		Search_conta(ch_read_good, banksForConta)
+		// ----------- ContaminationSearch
+		//Search_conta(ch_read_good, banksForConta)
+		FASTQSCREEN(ch_read_good)
 }
-- 
GitLab


From b4cccc1b9e255f9902198c206cbc7f65aed4319f Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 16 Feb 2022 16:18:16 +0100
Subject: [PATCH 19/51] Reoganization of the conf folder #12

---
 conf/base.config   | 139 ++++++++++++++++++++++++++++-----------------
 conf/path.config   |   7 ---
 conf/prod.config   |  34 +++++++++++
 conf/report.config |  33 +++++++++++
 conf/test.config   |  50 +++++++++-------
 5 files changed, 182 insertions(+), 81 deletions(-)
 delete mode 100644 conf/path.config
 create mode 100644 conf/prod.config
 create mode 100644 conf/report.config

diff --git a/conf/base.config b/conf/base.config
index 64b1c66..55b7046 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -1,57 +1,92 @@
-/*
- * -------------------------------------------------
- *  nf-core/template Nextflow base config file
- * -------------------------------------------------
- * A 'blank slate' config file, appropriate for general
- * use on most high performace compute environments.
- * Assumes that all software is installed and available
- * on the PATH. Runs in `local` mode - all jobs will be
- * run on the logged in environment.
- */
-
-process {
+// ========================================
+//				PARAMS
+//=========================================
+System.out.println "Chargement des paramÃ¨tres de base"
+// Fixed params
+params {
+	// EMPTY INITIALISATION OF INPUT PARAMS
+	inputdir = ""
+	outdir = ""			// base output directory for all analysis
+	//outdir="/home/sbsuser/work/Nextflow/wf-illumina-nf/results"	// base output directory for all analysis
+}
 
-  // TODO nf-core: Check the defaults for all processes
-  cpus = { check_max( 1 * task.attempt, 'cpus' ) }
-  memory = { check_max( 7.GB * task.attempt, 'memory' ) }
-  time = { check_max( 4.h * task.attempt, 'time' ) }
+import java.text.SimpleDateFormat
+SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyMMddHHmmss")
 
-  errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
-  maxRetries = 1
-  maxErrors = '-1'
+System.out.println "Lecture de la configuration de run"
+includeConfig "$launchDir/../params.config"
+System.out.println "Lecture de la configuration de run terminÃ©e !"
+// Dynamic params
+params {
+	// Extract run info
+	/*runName=params.inputdir.split('/')[-1]
+	machine=params.inputdir.split('/')[-2]
+	runInfo=runName.split('_')
+	run_date=runInfo[0]
+	machineID=runInfo[1]
+	fcID=runInfo[3]
+	lane=runInfo[4]
+	demuxUniqueness=runInfo[5]*/
+	//-----------------------
+	
+	uniqueness = uniqueness_format.format(new Date())
+	outdir=params.inputdir+"/nextflow/"+uniqueness
 
-  // Process-specific resource requirements
-  // NOTE - Only one of the labels below are used in the fastqc process in the main script.
-  //        If possible, it would be nice to keep the same label naming convention when
-  //        adding in your processes.
-  // TODO nf-core: Customise requirements for specific processes.
-  // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
-  withLabel:process_low {
-    cpus = { check_max( 2 * task.attempt, 'cpus' ) }
-    memory = { check_max( 14.GB * task.attempt, 'memory' ) }
-    time = { check_max( 6.h * task.attempt, 'time' ) }
-  }
-  withLabel:process_medium {
-    cpus = { check_max( 6 * task.attempt, 'cpus' ) }
-    memory = { check_max( 42.GB * task.attempt, 'memory' ) }
-    time = { check_max( 8.h * task.attempt, 'time' ) }
-  }
-  withLabel:process_high {
-    cpus = { check_max( 12 * task.attempt, 'cpus' ) }
-    memory = { check_max( 84.GB * task.attempt, 'memory' ) }
-    time = { check_max( 10.h * task.attempt, 'time' ) }
-  }
-  withLabel:process_long {
-    time = { check_max( 20.h * task.attempt, 'time' ) }
-  }
-  withName:get_software_versions {
-    cache = false
-  }
+	//samplesheet="${run_date}*.csv"
+	
+	System.out.println "runName : "+runName
+	System.out.println "machine : "+machine
+	System.out.println "machineID : "+machineID
+	System.out.println "run_date : "+run_date
+	System.out.println "fcID : "+fcID
+	System.out.println "lane : "+lane
+	System.out.println "demuxUniqueness : "+demuxUniqueness
+	
+	System.out.println "uniqueness : "+uniqueness
+	System.out.println "outdir : "+outdir
 }
 
-params {
-  // Defaults only, expecting to be overwritten
-  max_memory = 12.GB
-  max_cpus = 8
-  max_time = 4.h
-}
+// ========================================
+//				PROCESS
+//=========================================
+process {
+	executor = 'slurm'
+	queue = 'wflowq'
+	time='1h'
+	cpus = 1
+	memory = 2.GB
+	
+	errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+	maxRetries = 2
+	maxErrors = '-1'
+
+	// ----- WithName 
+	withName: BWA_ALIGNMENT {
+		module = ['bioinfo/bwa-0.7.17']
+	}
+	
+	
+	// ----- WithLabel
+	withLabel: littleJob {
+		executor = 'local'
+	}
+	
+	withLabel: samtools {
+		module = ['bioinfo/samtools-1.14']
+		//cpus = { 6 * task.attempt }
+	    //memory = { 8.GB * task.attempt }
+	    //time = { 3.h * task.attempt }
+	}
+	
+	withLabel: cigar {
+		module = ['system/Python-3.7.4:bioinfo/samtools-1.14']
+	}
+	
+	withLabel: qualimap {
+		module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20']
+		beforeScript='unset DISPLAY'
+		//cpus = { 8 * task.attempt }
+	    //memory = { 2.GB * task.attempt }
+	    //time = { 3.h * task.attempt }
+	}
+}
\ No newline at end of file
diff --git a/conf/path.config b/conf/path.config
deleted file mode 100644
index 4e9c550..0000000
--- a/conf/path.config
+++ /dev/null
@@ -1,7 +0,0 @@
-//not tested.
-withName:fastqc {
-    process.beforeScript = "export PATH=/path/to/fastqc:$PATH" 
-}
-withName:multiqc {
-    process.beforeScript = "export PATH=/path/to/multiqc:$PATH" 
-}
\ No newline at end of file
diff --git a/conf/prod.config b/conf/prod.config
new file mode 100644
index 0000000..f46e5fb
--- /dev/null
+++ b/conf/prod.config
@@ -0,0 +1,34 @@
+// ========================================
+//				PROCESSES
+//=========================================
+process {
+	withLabel: ngl_bi {
+		executor = 'local'
+		beforeScript = "export NGL_BI_CLIENT='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current'"
+		//errorStrategy = { 'ignore' }
+	}
+	
+	withLabel: samtools {
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+	}
+	
+	withLabel: qualimap {
+		cpus = { 8 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+	}
+	
+	
+	withName: BWA_ALIGNMENT {
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.d * task.attempt }
+	}
+}
+
+// ========================================
+//				CONFIG FILES
+//=========================================
+includeConfig "$baseDir/conf/report.config"
\ No newline at end of file
diff --git a/conf/report.config b/conf/report.config
new file mode 100644
index 0000000..2c3ad2e
--- /dev/null
+++ b/conf/report.config
@@ -0,0 +1,33 @@
+// ========================================
+//				REPORTS
+//=========================================
+timeline {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/execution_timeline.html"
+}
+
+trace {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/execution_trace.txt"
+	fields = 'task_id,native_id,name,status,exit,realtime,%cpu,%mem,duration,script,rss'	// verifier ajout des champs
+}
+
+report {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/execution_report.html"
+}
+
+dag {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/pipeline_dag.svg"
+}
+
+manifest {
+	name = 'get-nextflow-ngl-bi/wf-nanopore-nf'
+	author = 'Jules Sabban'
+	homePage = 'https://forgemia.inra.fr/get-nextflow-ngl-bi/wf-illumina-nf'
+	description = 'Workflow for Nanopore data quality control'
+	mainScript = 'main.nf'
+	nextflowVersion = '>=0.32.0'
+	version = '1.0.0'
+}
\ No newline at end of file
diff --git a/conf/test.config b/conf/test.config
index ce7674c..8a01c75 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -1,22 +1,28 @@
-/*
- * -------------------------------------------------
- *  Nextflow config file for running tests
- * -------------------------------------------------
- * Defines bundled input files and everything required
- * to run a fast and simple test. Use as follows:
- *   nextflow run nf-core/template -profile test
- */
-
-params {
-  config_profile_name = 'Test profile'
-  config_profile_description = 'Minimal test dataset to check pipeline function'
-  // Limit resources so that this can run on Travis
-  max_cpus = 2
-  max_memory = 6.GB
-  max_time = 48.h
-  
-  // Input data
-  // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-  // TODO nf-core: Give any required params for the test so that command line flags are not needed
-  inputdir = './data'
-}
+// ========================================
+//				PROCESSES
+//=========================================
+process {
+	withLabel: ngl_bi {
+		executor = 'local'
+		beforeScript = "export NGL_BI_CLIENT='/work/sbsuser/test/jules/ngl-bi_client'"	// test
+		//errorStrategy = { 'ignore' }
+	}
+	
+	withLabel: samtools {
+		cpus = { 1 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 10.m * task.attempt }
+	}
+	
+	withLabel: qualimap {
+		cpus = { 1 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 10.m * task.attempt }
+	}
+}
+
+
+// ========================================
+//				CONFIG FILES
+//=========================================
+includeConfig "$baseDir/conf/report.config"
\ No newline at end of file
-- 
GitLab


From 3f30cdfddd29754d457446f61e783c68e500ff89 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 16 Feb 2022 16:23:32 +0100
Subject: [PATCH 20/51] Reorgazine the nextflow config file #12

---
 nextflow.config | 166 +++++++++++++-----------------------------------
 1 file changed, 43 insertions(+), 123 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 5aa1549..2fa2203 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,71 +1,54 @@
-/*
- * -------------------------------------------------
- *  nf-core/template Nextflow config file
- * -------------------------------------------------
- * Default config options for all environments.
- */
-process{
-	 executor = 'slurm'
-	 queue = 'wflowq'
-	 time='1h'
-	 cpus = 1
-	 memory = 2.GB
-}
-// Global default params, used in configs
-params {
-
-  // Workflow flags
-  // TODO nf-core: Specify your pipeline's command line flags
-  inputdir = "./data"
-  samplesheet = "${params.inputdir}/samples.csv"
-  single_end = false
-  outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_global'
-  genomesRefForConta = [ '/work/bank/bwadb/Escherichia_coli_FRIK2069', '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/yeast.nt' ]
-  addBankForConta = ''		// Ajout ponctuel d'un ou plusieurs genomes
-  skip_multiQC = false
-
-  // Boilerplate options
-  name = false
-  multiqc_config = "$baseDir/assets/multiqc_config.yaml"
-  tracedir = "${params.outdir}/pipeline_info"
-  email = false
-  email_on_fail = false
-  monochrome_logs = false
-  help = false
-  config_profile_description = false
-  config_profile_contact = false
-  config_profile_url = false
-  
-  // if use -profile path specify path where all binaries are stored
-  globalPath = "" 
-}
-
-params {
-  // Defaults only, expecting to be overwritten
-  max_memory = 20.GB
-  max_cpus = 4
-  max_time = 40.h
-}
+// ========================================
+//				PARAMS
+//=========================================
+// Global params
+params {	
+	// PARAMETRE POUR OUTILS
+	// TODO
+
+
+  	// CHECK CONTAMINATION
+  	genomesRefForConta = [ '/work/bank/bwadb/Escherichia_coli_FRIK2069', '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/yeast.nt' ]
+  	addBankForConta = ''		// Ajout ponctuel d'un ou plusieurs genomes
+
+	// OTHERS
+	email="jules.sabban@inrae.fr"
+	email_on_fail="jules.sabban@inrae.fr"
+	email_bioinfo="get-plage.bioinfo@genotoul.fr"
+	email_labo="get-plage.labo@genotoul.fr"
+	
+	monochrome_logs = true
+	help = false
+	
+	config_profile_description = false	// ??
+	config_profile_contact = false	// ??
+	config_profile_url = false	// ??
+
+}
+System.out.println "Les paramÃ¨tres globaux sont chargÃ©s"
+// ========================================
+//				PROFILES
+//=========================================
+// Load base.config by default for all pipelines
+includeConfig "$baseDir/conf/base.config"
 
+System.out.println "Les configurations de bases sont chargÃ©es"
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
 process.container = "$baseDir/template-nf.sif"
 
-// Load base.config by default for all pipelines
-includeConfig 'conf/base.config'
-
 profiles {
-  conda { process.conda = "$baseDir/environment.yml" }
-  debug { process.beforeScript = 'echo $HOSTNAME' }
-  docker { docker.enabled = true }
-  singularity { singularity.enabled = true }
-  test { includeConfig 'conf/test.config' }
-  path { process.beforeScript = "export PATH=${params.globalPath}:$PATH" }
-  multipath { includeConfig 'conf/path.config' }
-  genotoul { includeConfig 'conf/genotoul.config' }
+	conda { process.conda = "$baseDir/environment.yml" }
+	debug { process.beforeScript = 'echo $HOSTNAME' }
+	docker { docker.enabled = true }
+	singularity { singularity.enabled = true }
+	test { includeConfig "$baseDir/conf/test.config" }
+	prod { includeConfig "$baseDir/conf/prod.config" }
 }
 
+System.out.println "Tous les profiles ont Ã©tÃ© analysÃ©s"
+
 // Avoid this error:
 // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
 // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default.
@@ -73,67 +56,4 @@ docker.runOptions = '-u \$(id -u):\$(id -g)'
 
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
-
-timeline {
-  enabled = true
-  file = "${params.tracedir}/execution_timeline.html"
-}
-
-trace {
-  enabled = true
-  file = "${params.tracedir}/execution_trace.txt"
-  fields = 'task_id,name,status,exit,realtime,%cpu,rss'
-}
-
-report {
-  enabled = true
-  file = "${params.tracedir}/execution_report.html"
-}
-
-dag {
-  enabled = true
-  file = "${params.tracedir}/pipeline_dag.svg"
-}
-
-manifest {
-  name = 'get-nextflow-ngl-bi/template-nf'
-  author = 'CÃ©line Noirot'
-  homePage = 'https://forgemia.inra.fr/get-nextflow-ngl-bi/template-nf'
-  description = 'get workflow template'
-  mainScript = 'main.nf'
-  nextflowVersion = '>=0.32.0'
-  version = '1.0dev'
-}
-
-// Function to ensure that resource requirements don't go beyond
-// a maximum limit
-def check_max(obj, type) {
-  if (type == 'memory') {
-    try {
-      if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
-        return params.max_memory as nextflow.util.MemoryUnit
-      else
-        return obj
-    } catch (all) {
-      println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
-      return obj
-    }
-  } else if (type == 'time') {
-    try {
-      if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
-        return params.max_time as nextflow.util.Duration
-      else
-        return obj
-    } catch (all) {
-      println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
-      return obj
-    }
-  } else if (type == 'cpus') {
-    try {
-      return Math.min( obj, params.max_cpus as int )
-    } catch (all) {
-      println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
-      return obj
-    }
-  }
-}
+System.out.println "Sortie du nextflow.config"
\ No newline at end of file
-- 
GitLab


From 506301c7fba1ca94fab8a56b3e2b0d0cd8631f69 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 16 Feb 2022 16:41:33 +0100
Subject: [PATCH 21/51] Add alignment + Qualimap #14

---
 modules/local/module_dna.nf   | 150 ++++++++++++++++++++++++++++++++--
 sub-workflows/local/dna_qc.nf |  41 +++++-----
 2 files changed, 164 insertions(+), 27 deletions(-)

diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index f8cdc87..8b03855 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -1,19 +1,153 @@
-process BWAInddex {
-	// BWA
+/*
+ *	Module pour l'alignement des reads ADN sur gÃ©nome de rÃ©fÃ©rence et des statistiques associÃ©es
+*/
+
+process BWA_ALIGNMENT { BWA_ALIGNMENT
+	publishDir path: "${params.outdir}/alignment/bwa" , mode: 'copy'
 	
+	tag " $sample"
 	
+	input:
+		tuple val(sample), path(reads)
+		
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.sam"), emit: sam
+		
+	script:
+	"""
+		module list
+		bwa mem ${params.referenceGenome} ${reads} 1> ${sample}.sam 2> ${sample}.log
+	"""
 }
 
-
-process BWAAlignment {
+process SAMTOOLS_VIEW { 
+	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
 	
+	tag "$sample"
 	
+	label 'samtools'
 	
+	input:
+		tuple val(sample), path(sam)
+		
+	output:
+		tuple val(sample), path("*.bam"), emit: bam
+		
+	script:
+	"""
+		samtools view -bS ${sam} > ${sample}.bam
+	"""
 }
 
-process AlignmentStats {
-	// PICARD + Samtools 
-	// ou  Qualimap ?
+process SAMTOOLS_SORT {
+	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
+
+	tag "$sample"
 	
+	label 'samtools'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.bam"), emit: bam
+		//path("*.bam"), emit: bam
+
+	script:	// Pourquoi unmerged ??? https://forgemia.inra.fr/genotoul-bioinfo/ng6/-/blob/master/workflows/components/bwa.py#L97
+	"""	
+		samtools sort ${bam} -o ${sample}_unmerged.bam 2>> ${sample}.log
+	"""
+}
+
+process QUALIMAP {
+	publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy'
+
+	tag "$sample"
+
+	label 'qualimap'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*"), emit: all
+		tuple val(sample), path("*.txt"), emit: report
+
+	script:
+	"""
+		qualimap bamqc -bam ${bam} 1> ${sample}.log
+	"""
+}
+
+/*
+process alignmentQualityStats {
+	publishDir path: "${params.outdir}/alignmentStats/cigar" , mode: 'copy'
 	
-}
\ No newline at end of file
+	label 'cigar'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.csv"), emit: csv
+		tuple val(sample), path("*.png"), emit: graph
+
+	script:
+	cigarOptions = params.splitReads ? "--readsplit" : ""
+	
+	if (params.pairedEnd) {
+		"""
+			python
+			samtools view -F0x0100 ${bam} | cigarlineGraph.py -i - -t ${sample}_R1.csv ${sample}_R2.csv -o ${sample}_R1.png ${sample}_R2.png ${cigarOptions} 2> ${sample}.log
+		"""
+	} else {
+		"""
+			samtools view -F0x0100 ${bam} | cigarlineGraph.py -i - -t ${sample}_R1.csv ${cigarOptions} 2> ${sample}.log
+		"""
+	}
+}
+
+process alignmentSummary {
+	publishDir path: "${params.outdir}/alignmentStats/summary" , mode: 'copy'
+
+	label 'samtools'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.stat"), emit: stat
+
+	script:
+	"""
+		samtools view -F0x0100 -bh ${bam} | samtools flagstat - > ${sample}.stat
+	"""
+}
+
+process readAlignementSummary  {	// addTreatment
+	publishDir path: "${params.outdir}/alignmentStats/summary" , mode: 'copy'
+
+	input:
+		tuple val(sample), path(statFile)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+
+	script:
+	"""
+		alignementStatTreatment.pl --file ${statFile} 1> ${sample}.log
+	"""
+
+
+}
+
+		//alignmentQualityStats(samtoolsSort.out.bam)
+		//alignmentSummary(samtoolsSort.out.bam)
+		//readAlignementSummary(alignmentSummary.out.stat)
+		
+
+*/
\ No newline at end of file
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index 2c980cb..edfb190 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -1,22 +1,25 @@
-// Juste un alignement
-
-
-
-
-
-
-
-
-
-
-workflow dna_qc {
+// -------------------------------------------------
+// 					MODULES
+// -------------------------------------------------
+include { 	BWA_ALIGNMENT;
+			SAMTOOLS_VIEW;
+			SAMTOOLS_SORT;
+			QUALIMAP		} from "$baseDir/modules/local/module_dna.nf"
+
+
+// -------------------------------------------------
+// 					WORKFLOW
+// -------------------------------------------------
+workflow DNA_QC {
 	take:
-		// sortie illuminaFilter ou SubSeqFiles
-		// genome ref
-		
+		fastq
+			
 	main:
-		pr_BWAIndex(genome_ref)
-		pr_BWAAlignment(data)
-		pr_AlignementStats(data)
-		if pairedEnds pr_insertSizes(data)
+		BWA_ALIGNMENT(fastq)
+		SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam)
+		SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam)
+		QUALIMAP(SAMTOOLS_SORT.out.bam)
+		
+	emit:
+		qualimap_report = QUALIMAP.out.report
 }
\ No newline at end of file
-- 
GitLab


From 79bae003ec92af4ac8572f4d7bf862604f5e7b39 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 16 Feb 2022 16:43:00 +0100
Subject: [PATCH 22/51] Script for contamination counting #8

---
 bin/contaCounter.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/contaCounter.pl b/bin/contaCounter.pl
index 36bd328..c470d79 100644
--- a/bin/contaCounter.pl
+++ b/bin/contaCounter.pl
@@ -71,7 +71,8 @@ foreach my $file (@files) {
 	# Extraction nom echantillon
 	@simpleNameToSplit = split("_${contaminant}", $simpleFile);
 	my $sampleName = $simpleNameToSplit[0];
-	my ($shortSampleName, $direction) = ($sampleName =~ m/(^[0-9a-zA-Z]*).*(R[1,2])/g);
+	my ($shortSampleName, $direction) = ($sampleName =~ m/^[0-9a-zA-Z]*-([0-9a-zA-Z_]*).*_(R[1,2])/g);
+	#print "FILE : $simpleFile \nSAMPLE : $shortSampleName \nDIRECTION :  $direction\n";
 	
 	# Comptage 
 	my $count = `wc -l $file | cut -d' ' -f1`;
-- 
GitLab


From 4a4af93966813050c1b41bf528be0f9b2e450c01 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 17 Feb 2022 16:28:46 +0100
Subject: [PATCH 23/51] Add multiQC module #13

---
 assets/multiqc_config.yaml           | 59 +++++++++++++++++++++++++--
 modules/local/module_core.nf         | 14 ++++++-
 modules/local/module_reports.nf      | 26 +++++++++++-
 sub-workflows/local/core_pipeline.nf |  4 ++
 workflow/illumina_qc.nf              | 60 +++++++++++++++++++++++++---
 5 files changed, 151 insertions(+), 12 deletions(-)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index d7106f3..e3e5c96 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -1,11 +1,64 @@
+## Report general informations
+title: "My Title"	# Change with option --title in command line in process
+#subtitle: "A subtitle to go underneath in grey"
+intro_text: "MultiQC reports summarise Quality Control analysis results."
+
 report_comment: >
-    This report has been generated by the <a href="https://forgemia.inra.fr/get-nextflow-ngl-bi/template-nf" target="_blank">nf-core/template</a>
+    This report has been generated by the <a href="https://forgemia.inra.fr/get-nextflow-ngl-bi/wf-illumina-nf" target="_blank">wf-illumina-nf</a>
     analysis pipeline. For information about how to interpret these results, please see the
-    <a href="https://forgemia.inra.fr/get-nextflow-ngl-bi/template-nf" target="_blank">documentation</a>.
+    <a href="https://forgemia.inra.fr/get-nextflow-ngl-bi/wf-illumina-nf" target="_blank">documentation</a>.
+
+show_analysis_paths: False
+show_analysis_time: False
+
+## Number formatting
+thousandsSep_format: " "
+
+## Plot config
+export_plots: true
+plots_force_interactive: true
+
+## Module config
 report_section_order:
     software_versions:
         order: -1000
     summary:
         order: -1001
+        
+module_order:
+  - fastqc:
+        name: "ReadsStats"
+        #info: "Analysis performed with FastQC, which is a quality control tool for high throughput sequence data, written by Simon Andrews at the Babraham Institute in Cambridge"
+        href: "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
+        target: "FastQC"
+  - qualimap:
+        name: "AlignmentStat"
+        #info: "Analysis performed with QualiMap"
+        href: "http://qualimap.bioinfo.cipf.es/"
+        target: "QualiMap"
+  - fastq_screen:
+        name: "ContaminationSearch"
+        #info: "This section shows the module with different files"
+        target: "FastQ-Screen"
 
-export_plots: true
+# Pattern
+sp:
+  fastqc:
+    fn: "*.zip"
+  fastq_screen:
+    fn: '*_screen.txt'
+
+
+custom_logo: "./get_logo.png"
+custom_logo_url: "https://get.genotoul.fr/"
+custom_logo_title: "GeT-GenoToul"
+
+# FastQC
+#top_modules:         # Keep FastQC on top of the report
+#  - "fastqc"
+
+
+# FastQC-Screen
+fastqscreen_simpleplot: true
+
+# Qualimap
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index d037bcb..b5d43fb 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -114,11 +114,13 @@ process fastqc {
 	time { 45.m * task.attempt }
 	memory '1.GB'
 	
+	tag " $name"
+	
 	input:
 		tuple val(name), path(read)
 		
 	output:
-		path "*_fastqc.{zip,html}" , emit: ch_fastqc_result
+		tuple val(name), path("*_fastqc.{zip,html}") , emit: report
 		// path log files
 		
 	script:
@@ -138,6 +140,8 @@ process illuminaFilter {
 	time { 1.h * task.attempt }
 	memory '1.GB'
 	
+	tag " $name"
+	
 	input:
 		tuple val(name), path(read)
 	
@@ -199,6 +203,8 @@ process search_conta_samtools {
 	module 'bioinfo/samtools-1.9'
 	time { 10.m * task.attempt }
 	
+	tag " $sample"
+	
 	input:
 		tuple val(name), path("*")
 	
@@ -218,6 +224,8 @@ process search_conta_summary {
 	time { 10.m * task.attempt }
 	memory '1.GB'
 	
+	tag " $sample"
+	
 	input:
 		//tuple val(name), path("*")
 		path("*")
@@ -237,11 +245,13 @@ process FASTQSCREEN {
 	
 	module 'bioinfo/FastQ-Screen-0.15.2'
 	
+	tag " $sample"
+	
 	input:
 		tuple val(sample), path(reads)
 	
 	output:
-		tuple val(sample), path("*.txt"), emit: file
+		tuple val(sample), path("*.txt"), emit: report
 	
 	script:
 	"""
diff --git a/modules/local/module_reports.nf b/modules/local/module_reports.nf
index 397793f..047ae62 100644
--- a/modules/local/module_reports.nf
+++ b/modules/local/module_reports.nf
@@ -1,4 +1,6 @@
-params.outdir=''
+/*
+ *	Module pour la gÃ©nÃ©ration de rapports
+*/
 
 summary = [:]
 
@@ -32,4 +34,24 @@ process workflow_summary {
 		workflow_summary(summary)
 	 
  }
-  
\ No newline at end of file
+
+ 
+process MULTIQC {
+	publishDir path: "${params.outdir}/MultiQC" , mode: 'copy'
+	
+	module '/tools/share/Modules/bioinfo/MultiQC-v1.11'
+	
+	input:
+		path fastqc
+		path fastqscreen
+		path qualimap
+		
+	output:
+		path "*.html", emit: html
+	
+	script:
+	"""
+		module list
+		multiqc -f . --config $baseDir/assets/multiqc_config.yaml
+	"""	
+}
\ No newline at end of file
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 3017477..3b8967a 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -140,4 +140,8 @@ workflow Core {
 		// ----------- ContaminationSearch
 		//Search_conta(ch_read_good, banksForConta)
 		FASTQSCREEN(ch_read_good)
+		
+	emit:
+		fastqc_report = fastqc.out.report
+		fastqscreen_report = FASTQSCREEN.out.report
 }
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 0a25e4d..e600ef1 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -67,7 +67,7 @@ if (params.help) {
 */
 
 // ------------- Test 10x ------------ //
-
+/*
 params.sequencer = 'NovaSeq'
 params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'	// In config file
 params.raw_data = ''
@@ -75,7 +75,7 @@ params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/2107
 params.isMultiplex = true
 params.chemistry = '10X'
 ch_ss = Channel.fromPath(params.data+'/SampleSheet_global.csv')
-
+*/
 
 // ------------- Test MiSeq ------------ //
 /*
@@ -87,7 +87,7 @@ params.isMultiplex = true
 params.chemistry = 'amplicon'
 */
 
-
+/*
 //ch_ss = Channel.fromPath(params.data+'/SampleSheet.csv')
 ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
 ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
@@ -96,13 +96,33 @@ ch_read=Channel
 	//.fromPath(params.data+'/ROME/B20CG-*_R{1,2}_*.fastq.gz')
 	.map{$it -> [$it.simpleName, $it]}
 	.groupTuple()
+*/
 
+// ------------- Test Amplicon ------------ //
+params.sequencer = 'MiSeq'
+//params.outdir = ''	// In config file
+params.raw_data = ''
+//params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/211129_A00318_0259_AHNMTTDSX2_Lane1_1638345606_dna'
+//params.isMultiplex = true
+//params.chemistry = 'Default'
+ch_ss = Channel.fromPath(params.samplesheet)		// utilitÃ© d'aprÃ¨s la SS dans un params ??
+ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt")
+ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml')
+//params.pairedEnd = true
+//params.splitReads = true	// ????
+//params.referenceGenome = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Quercus_robur/genome/GCA_900291515.1/BWA/GCA_900291515.1_Q_robur_v1_genomic.fna'
+ch_read=Channel
+	.fromPath(params.data+'/*_R{1,2}_*.fastq.gz')
+	.map{$it -> [$it.simpleName, $it]}
+	//.fromFilePairs(params.data+'/*_R{1,2}_*.fastq.gz')
+	//.groupTuple()
 
 
 mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
 
 banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
 
+System.out.println "On y est presque..."
 createDir = file(params.outdir).mkdir()
 
 // -------------------------------------------------
@@ -124,14 +144,44 @@ if amplicon {
 	}
 }
 */
-include { Core as CORE	} from '../sub-workflows/local/core_pipeline.nf'
-
+include { Core as CORE	} from "$baseDir/sub-workflows/local/core_pipeline.nf"
+include { DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
+include { MULTIQC		} from "$baseDir/modules/local/module_reports.nf"
+System.out.println "Tous les includes : OK"
 // -------------------------------------------------
 // 					WORKFLOW
 // -------------------------------------------------
 workflow ILLUMINA_QC {
 
 	CORE(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta )		/*ch_ngl, ch_runInfo, mismatchNumber,  params.raw_data*/
+	
+	
+	if (params.chemistry == 'Default') {
+		DNA_QC(ch_read)
+	} else {
+		System.out.println "Pas de sous-workflow DNA_QC()"
+	  }
+
+
+	// MultiQC
+	MULTIQC(CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]),
+			CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]),
+			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([])
+	)
+	
+	/*	
+		if overlap, alors : 
+			diversity_qc sub-workflow
+			
+		else : 
+			if DNA, alors :
+				dna_qc sub-worflow
+			if RNA, alors :
+				rna_qc sub-workflow
+			if Methyl, alors :
+				methyl_qc sub-worflow
+		
+	*/
 
 }
 
-- 
GitLab


From b17a14e56cc314705efe3d31b8e3a7a97cf63dca Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 17 Feb 2022 16:30:13 +0100
Subject: [PATCH 24/51] Little optimisation of qualimap process #14

---
 modules/local/module_dna.nf | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 8b03855..a3fdef5 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -67,18 +67,20 @@ process QUALIMAP {
 	tag "$sample"
 
 	label 'qualimap'
+	
+	errorStrategy = { 'ignore' }
 
 	input:
 		tuple val(sample), path(bam)
 
 	output:
 		tuple val(sample), path("*.log"), emit: log
-		tuple val(sample), path("*"), emit: all
-		tuple val(sample), path("*.txt"), emit: report
+		tuple val(sample), path("*/*"), emit: all	// ${sample}_stats/*
+		tuple val(sample), path("${sample}"), emit: report
 
 	script:
 	"""
-		qualimap bamqc -bam ${bam} 1> ${sample}.log
+		qualimap bamqc -bam ${bam} -outdir ${sample} 1> ${sample}.log
 	"""
 }
 
-- 
GitLab


From dfa4c39c912dab2197e18a28e8215d93b5014dca Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 17 Feb 2022 16:51:19 +0100
Subject: [PATCH 25/51] Make adaptive report title #13

---
 assets/multiqc_config.yaml      | 5 +++--
 modules/local/module_reports.nf | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index e3e5c96..7358ead 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -1,7 +1,8 @@
 ## Report general informations
-title: "My Title"	# Change with option --title in command line in process
+# Change with option --title in command line in process
+title: "My Title"
 #subtitle: "A subtitle to go underneath in grey"
-intro_text: "MultiQC reports summarise Quality Control analysis results."
+intro_text: "This MultiQC report summarise Quality Control analysis results."
 
 report_comment: >
     This report has been generated by the <a href="https://forgemia.inra.fr/get-nextflow-ngl-bi/wf-illumina-nf" target="_blank">wf-illumina-nf</a>
diff --git a/modules/local/module_reports.nf b/modules/local/module_reports.nf
index 047ae62..7581ea5 100644
--- a/modules/local/module_reports.nf
+++ b/modules/local/module_reports.nf
@@ -51,7 +51,6 @@ process MULTIQC {
 	
 	script:
 	"""
-		module list
-		multiqc -f . --config $baseDir/assets/multiqc_config.yaml
+		multiqc -f . --config $baseDir/assets/multiqc_config.yaml --title ${params.project}
 	"""	
 }
\ No newline at end of file
-- 
GitLab


From 9f62401f814df9aa66f2fbb67bfb14623cb2a729 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 18 Feb 2022 15:12:58 +0100
Subject: [PATCH 26/51] Add parameter to trim samples name in multiqc report
 #13

---
 assets/multiqc_config.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index 7358ead..f894b64 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -15,6 +15,11 @@ show_analysis_time: False
 ## Number formatting
 thousandsSep_format: " "
 
+## Sample name formatting
+extra_fn_clean_trim:
+  - "_filtered"
+  - "_unmerged"
+
 ## Plot config
 export_plots: true
 plots_force_interactive: true
-- 
GitLab


From 84eeef144f3c30ba93a1ac17a5f7dba25f835118 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 12 Jul 2022 16:09:20 +0200
Subject: [PATCH 27/51] Move file from Eclipse to Visual Studio

---
 bin/contaCounter.pl             | 192 ++++++------
 bin/createNGLBiReadSets.pl      | 252 +++++++--------
 bin/demuxStatsFromXML.R         | 418 ++++++++++++-------------
 bin/extractInfoForDemuxStats.pl | 248 +++++++--------
 bin/extractInfoForReadSets.pl   | 210 ++++++-------
 conf/prod.config                |  66 ++--
 conf/report.config              |  64 ++--
 conf/test.config                |  60 ++--
 main.nf                         |   3 +-
 modules/local/module_NGL-Bi.nf  | 106 +++----
 modules/local/module_core.nf    | 522 ++++++++++++++++----------------
 modules/local/module_dna.nf     | 308 +++++++++----------
 modules/local/module_reports.nf | 110 +++----
 modules/local/module_test.nf    |  35 ++-
 sub-workflows/local/dna_qc.nf   |  48 +--
 workflow/illumina_qc.nf         | 363 +++++++++++-----------
 16 files changed, 1503 insertions(+), 1502 deletions(-)

diff --git a/bin/contaCounter.pl b/bin/contaCounter.pl
index c470d79..5c4bb6c 100644
--- a/bin/contaCounter.pl
+++ b/bin/contaCounter.pl
@@ -1,96 +1,96 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- contaCounter.pl
- 
-=head1 DESCRIPTION
-
- Make statistics on samtools outputs
- 
-=head1 SYNOPSIS
-
- contacounter.pl <pahto_to_folder>
-
-=head1 OPTIONS
-
-
- 
-=head1 EXEMPLES
-
- perl countaCounter.pl ./
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use File::Basename;
-
-##################################################################
-#
-#						INITIALISATION
-#
-##################################################################
-my @files = glob($ARGV[0]."*.txt");
-#my @files = glob("/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x/CheckContamination/*.txt");
-
-#print "FILE : @files\n";
-
-if ($#files == 0) {
-	print STDERR "[Erreur] Le repertoire $ARGV[0] ne contient aucun fichiers !\n";
-	exit 5;
-}
-
-my %hash;
-
-##################################################################
-#
-#							MAIN
-#
-##################################################################
-
-foreach my $file (@files) {
-	my $simpleFile = basename($file,  ".txt");
-	
-	# Extraction nom contaminant
-	my @simpleNameToSplit = split("_", $simpleFile);
-	my $contaminant = $simpleNameToSplit[-1];
-	
-	# Extraction nom echantillon
-	@simpleNameToSplit = split("_${contaminant}", $simpleFile);
-	my $sampleName = $simpleNameToSplit[0];
-	my ($shortSampleName, $direction) = ($sampleName =~ m/^[0-9a-zA-Z]*-([0-9a-zA-Z_]*).*_(R[1,2])/g);
-	#print "FILE : $simpleFile \nSAMPLE : $shortSampleName \nDIRECTION :  $direction\n";
-	
-	# Comptage 
-	my $count = `wc -l $file | cut -d' ' -f1`;
-	
-	# Ajout dans le hash
-	$hash{"$shortSampleName($direction)"}{$contaminant}=$count;
-}
-
-# Extract info from hash
-my $contentToYAML = "Statistics from contamination search.\n";
-foreach my $sample (keys(%hash)) {
-	$contentToYAML.="$sample:\n";
-	foreach my $conta (keys($hash{$sample})){
-		$contentToYAML.="\t${conta}:$hash{$sample}{$conta}";
-	}
-}
-
-# Print info to file
-open(my $fh, '>', "summary.yaml") or exit 1;
-print $fh $contentToYAML;
-close $fh;
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ contaCounter.pl
+ 
+=head1 DESCRIPTION
+
+ Make statistics on samtools outputs
+ 
+=head1 SYNOPSIS
+
+ contacounter.pl <pahto_to_folder>
+
+=head1 OPTIONS
+
+
+ 
+=head1 EXEMPLES
+
+ perl countaCounter.pl ./
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+##################################################################
+#
+#						INITIALISATION
+#
+##################################################################
+my @files = glob($ARGV[0]."*.txt");
+#my @files = glob("/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x/CheckContamination/*.txt");
+
+#print "FILE : @files\n";
+
+if ($#files == 0) {
+	print STDERR "[Erreur] Le repertoire $ARGV[0] ne contient aucun fichiers !\n";
+	exit 5;
+}
+
+my %hash;
+
+##################################################################
+#
+#							MAIN
+#
+##################################################################
+
+foreach my $file (@files) {
+	my $simpleFile = basename($file,  ".txt");
+	
+	# Extraction nom contaminant
+	my @simpleNameToSplit = split("_", $simpleFile);
+	my $contaminant = $simpleNameToSplit[-1];
+	
+	# Extraction nom echantillon
+	@simpleNameToSplit = split("_${contaminant}", $simpleFile);
+	my $sampleName = $simpleNameToSplit[0];
+	my ($shortSampleName, $direction) = ($sampleName =~ m/^[0-9a-zA-Z]*-([0-9a-zA-Z_]*).*_(R[1,2])/g);
+	#print "FILE : $simpleFile \nSAMPLE : $shortSampleName \nDIRECTION :  $direction\n";
+	
+	# Comptage 
+	my $count = `wc -l $file | cut -d' ' -f1`;
+	
+	# Ajout dans le hash
+	$hash{"$shortSampleName($direction)"}{$contaminant}=$count;
+}
+
+# Extract info from hash
+my $contentToYAML = "Statistics from contamination search.\n";
+foreach my $sample (keys(%hash)) {
+	$contentToYAML.="$sample:\n";
+	foreach my $conta (keys($hash{$sample})){
+		$contentToYAML.="\t${conta}:$hash{$sample}{$conta}";
+	}
+}
+
+# Print info to file
+open(my $fh, '>', "summary.yaml") or exit 1;
+print $fh $contentToYAML;
+close $fh;
diff --git a/bin/createNGLBiReadSets.pl b/bin/createNGLBiReadSets.pl
index fbfe6fd..e5cdf2e 100644
--- a/bin/createNGLBiReadSets.pl
+++ b/bin/createNGLBiReadSets.pl
@@ -1,127 +1,127 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- createNGLBiReadSets.pl
- 
-=head1 DESCRIPTION
-
- Performe readSets creation on NGL-Bi
- 
-=head1 SYNOPSIS
-
- createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
-
-=head1 OPTIONS
-
- --infoFile=s : path to the info file
- --env_ngl_bi=s : environment varible of ngl-bi
- 
-=head1 EXEMPLES
-
- perl createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use Log::Log4perl  qw(:easy);;
-
-##################################################################
-#
-#						INITIALISATION
-#
-##################################################################
-Log::Log4perl -> easy_init( {   level    => $TRACE,
-                                utf8     => 1,
-                                layout   => '[%d][%p>createNGLBiReadSets.pl:L%L] %m%n' } );
-
-my $logger = Log::Log4perl -> get_logger();
-
-my $infoFile="";
-my $env_ngl_bi = "";
-
-GetOptions ('infoFile=s' => \$infoFile,
-			"env_ngl_bi=s" => \$env_ngl_bi, 	# environnement path of NGL-Bi
-);
-
-if ($env_ngl_bi eq "" || $infoFile eq "" ) {
-	$logger -> logdie("USAGE : createNGLBiReadSets.pl --infoFile <File> --env_ngl_bi <ENV>\n");
-}
-
-my $experimentName="";
-my $runName="";
-my $laneNumber="";
-my $script_path="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl"; # RÃ©pertoire des scripts de l'API NGL
-
-##################################################################
-#
-#						NGL-Bi ENVIRONMENT
-#
-##################################################################
-
-$ENV{APIPERL}=$env_ngl_bi;
-$ENV{CONFFILE}=$env_ngl_bi."conf/prod_illumina_qc.conf";
-$logger = Log::Log4perl -> get_logger('loadConfFile');
-unless ($ENV{CONFFILE}) {
-	$logger -> logdie("$0 : Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
-}
-my $dbconf_file = $ENV{CONFFILE};
-unless (-f $dbconf_file) {
-	$logger -> logdie("$0 : Database configuration file does not exist : $dbconf_file. It's necessary for continue.");
-}
-open my $handle, '<', $dbconf_file;
-chomp ( my @lines = <$handle> );
-close $handle;
-foreach my $line (@lines) {
-	$line =~ s/#.*//o;
-	unless ($line) {next;}
-	if ($line =~ /(.*)=(.*)/o) {
-		my $key = $1;
-		my $value = $2;
-		$key =~ s/^\s*//o;
-		$key =~ s/\s*$//o;
-		$value =~ s/^\s*//o;
-		$value =~ s/^\s*//o;
-		$ENV{$key} = $value;
-	} else {
-		$logger -> logdie("$0 : Can't load variable to dababase configration file $dbconf_file in line : '$_'");
-	}
-}
-
-unshift @INC, $env_ngl_bi."Common_tools/src/perl/lib";
-unshift @INC, $env_ngl_bi."DB_tools/src/perl/lib";
-
-require illumina;
-require json;
-$logger -> info("\tVariables d'environnement pour NGL-Bi charÃ©es.");
-
-##################################################################
-#
-#						INFO FILE READING
-#
-##################################################################
-$experimentName=`grep "ExperimentName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep ExperimentName impossible : $!");
-$runName=`grep "NGLBiRunName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep NGLBiRunName impossible : $!");
-$laneNumber=`grep "LaneNumber" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep LaneNumber impossible : $!");
-
-chomp($experimentName);
-chomp($runName);
-chomp($laneNumber);
-
-
-my $commandNGLBiReadSets = "perl $script_path/createNGL-BiReadSets.pl --NGLBiRunCode $runName --NGLSqExperimentCode $experimentName --laneNumberToWorkOn $laneNumber";
-$logger -> info("\tCreation des readSets dans NGL-Bi : ".$commandNGLBiReadSets);
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ createNGLBiReadSets.pl
+ 
+=head1 DESCRIPTION
+
+ Performe readSets creation on NGL-Bi
+ 
+=head1 SYNOPSIS
+
+ createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
+
+=head1 OPTIONS
+
+ --infoFile=s : path to the info file
+ --env_ngl_bi=s : environment varible of ngl-bi
+ 
+=head1 EXEMPLES
+
+ perl createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use Log::Log4perl  qw(:easy);;
+
+##################################################################
+#
+#						INITIALISATION
+#
+##################################################################
+Log::Log4perl -> easy_init( {   level    => $TRACE,
+                                utf8     => 1,
+                                layout   => '[%d][%p>createNGLBiReadSets.pl:L%L] %m%n' } );
+
+my $logger = Log::Log4perl -> get_logger();
+
+my $infoFile="";
+my $env_ngl_bi = "";
+
+GetOptions ('infoFile=s' => \$infoFile,
+			"env_ngl_bi=s" => \$env_ngl_bi, 	# environnement path of NGL-Bi
+);
+
+if ($env_ngl_bi eq "" || $infoFile eq "" ) {
+	$logger -> logdie("USAGE : createNGLBiReadSets.pl --infoFile <File> --env_ngl_bi <ENV>\n");
+}
+
+my $experimentName="";
+my $runName="";
+my $laneNumber="";
+my $script_path="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl"; # RÃ©pertoire des scripts de l'API NGL
+
+##################################################################
+#
+#						NGL-Bi ENVIRONMENT
+#
+##################################################################
+
+$ENV{APIPERL}=$env_ngl_bi;
+$ENV{CONFFILE}=$env_ngl_bi."conf/prod_illumina_qc.conf";
+$logger = Log::Log4perl -> get_logger('loadConfFile');
+unless ($ENV{CONFFILE}) {
+	$logger -> logdie("$0 : Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
+}
+my $dbconf_file = $ENV{CONFFILE};
+unless (-f $dbconf_file) {
+	$logger -> logdie("$0 : Database configuration file does not exist : $dbconf_file. It's necessary for continue.");
+}
+open my $handle, '<', $dbconf_file;
+chomp ( my @lines = <$handle> );
+close $handle;
+foreach my $line (@lines) {
+	$line =~ s/#.*//o;
+	unless ($line) {next;}
+	if ($line =~ /(.*)=(.*)/o) {
+		my $key = $1;
+		my $value = $2;
+		$key =~ s/^\s*//o;
+		$key =~ s/\s*$//o;
+		$value =~ s/^\s*//o;
+		$value =~ s/^\s*//o;
+		$ENV{$key} = $value;
+	} else {
+		$logger -> logdie("$0 : Can't load variable to dababase configration file $dbconf_file in line : '$_'");
+	}
+}
+
+unshift @INC, $env_ngl_bi."Common_tools/src/perl/lib";
+unshift @INC, $env_ngl_bi."DB_tools/src/perl/lib";
+
+require illumina;
+require json;
+$logger -> info("\tVariables d'environnement pour NGL-Bi charÃ©es.");
+
+##################################################################
+#
+#						INFO FILE READING
+#
+##################################################################
+$experimentName=`grep "ExperimentName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep ExperimentName impossible : $!");
+$runName=`grep "NGLBiRunName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep NGLBiRunName impossible : $!");
+$laneNumber=`grep "LaneNumber" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep LaneNumber impossible : $!");
+
+chomp($experimentName);
+chomp($runName);
+chomp($laneNumber);
+
+
+my $commandNGLBiReadSets = "perl $script_path/createNGL-BiReadSets.pl --NGLBiRunCode $runName --NGLSqExperimentCode $experimentName --laneNumberToWorkOn $laneNumber";
+$logger -> info("\tCreation des readSets dans NGL-Bi : ".$commandNGLBiReadSets);
 my $result_commandNGLBiReadSets = `$commandNGLBiReadSets 2>&1`; $? and $logger -> logdie("[Erreur]Lancement de createNGL-BiReadSets.pl\n".$result_commandNGLBiReadSets);
\ No newline at end of file
diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R
index f250311..1f33529 100644
--- a/bin/demuxStatsFromXML.R
+++ b/bin/demuxStatsFromXML.R
@@ -1,209 +1,209 @@
-#!/usr/bin/env Rscript
-
-# R version : 4.0.4
-## module load system/R-4.0.4_gcc-9.3.0
-
-# demuxStatsFromXML.R
-# Lecture d'un fichier XML pour extraction et mise ne forme des statistiques de dÃ©multiplexage (orientÃ© 10X pour le moment)
-# Par Ã©chantillon, ce script rÃ©cupÃ¨re tous les index associÃ©s, le nombre de reads trouvÃ©s, dont le nombre de barcodes lus parfaitement et le nombre de barcode lus avec un mismatch.
-# Ce sctipt rÃ©cupÃ¨re aussi les index trÃ¨s souvent retrouvÃ©s mais non associÃ© Ã  un echantillon
-# Le pourcentage du nombre de fragments par Ã©chantillon sur le nombre total est calculÃ©
-
-## --------------------
-#       PACKAGES
-## --------------------
-library('xml2')
-library('stringr')
-library('optparse')
-
-## --------------------
-#       FUNCTIONS
-## --------------------
-concat_df = function(df1, df2, col.names) {
-	colnames(df2)<-col.names
-	df_tmp<-rbind(df1, df2)
-	return(df_tmp)
-}
-
-## --------------------
-#       PARAMETERS
-## --------------------
-option_list = list(
-		# All arguments are compulsory 
-		make_option(c("-x", "--xml"), type = "character", default = NULL, metavar = "character", 
-				help = "Path to the DemultiplexingStats.xml file."),
-		make_option(c("-i", "--indexNumber"), type = "character", default = NULL, metavar = "character", 
-				help = "Path to the .indexNumber file."),
-		make_option(c("-d", "--demuxSum"), type = "character", default = NULL, metavar = "character", 
-				help = "Path to the demuxSummary.txt file.")
-)
-
-opt_parser = OptionParser(usage="Make demultiplexStats easier to read.", option_list = option_list)
-opt = parse_args(opt_parser)
-
-if(is.null(opt$xml) | is.null(opt$indexNumber) | is.null(opt$demuxSum)) {
-	stop("At least one argument is missing.\n", call. = FALSE)
-}
-
-## --------------------
-#          LOG
-## --------------------
-cat("\nLancement du script demuxStatsFromXML.R avec les options suivantes :\n")
-cat(paste0("\tFichier XML :\t\t", opt$xml, "\n"))
-cat(paste0("\tFichier IndexNumber :\t", opt$indexNumber, "\n"))
-cat(paste0("\tDemux Summary :\t\t" , opt$demuxSum, "\n"))
-launchDir<-getwd()
-cat(paste0("\nLe fichier de sortie sera Ã©crit dans le rÃ©pertoire :\t",launchDir , "\n\n"))
-
-## --------------------
-#          MAIN
-## --------------------
-xml<-read_xml(opt$xml)
-
-df<-data.frame()
-vec.names<-c("Project", "Sample", "Barcode", "bcCount", "bcPerfect", "bcOneMismatch")
-
-projects<-xml_find_all(xml, "//Project")
-
-cat("Lecture du XML\n")
-for (pr in 1:length(projects)){
-	project<-xml_attr(projects[pr], "name")
-	Samples<-xml_children(projects[pr])
-	for (sample in 1:length(Samples)){
-		sample_name<-xml_attr(Samples[sample], "name")
-		xml_bc<-xml_children(Samples[sample])
-		barcode_names<-xml_attr(xml_bc, "name")
-		for (bc in 1:length(barcode_names)) {
-			if (barcode_names[bc] != "all"){
-				lane_path<-xml_path(xml_children(xml_bc[bc]))
-				BarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/BarcodeCount")))
-				PerfectBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/PerfectBarcodeCount")))
-				OneMismatchBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/OneMismatchBarcodeCount")))
-				
-				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<-"-" }
-				
-				df_to_add<-data.frame(project,sample_name, barcode_names[bc], BarcodeCount, PerfectBarcodeCount, OneMismatchBarcodeCount)
-				df<-concat_df(df, df_to_add, vec.names)
-
-			}
-		}		
-	}
-}
-
-cat("RÃ©sumÃ© des informaqtions extraites (nombre d'Ã©chantillons par projet) :")
-table(df$Project)
-
-# ConcatÃ©nation des index multilples
-# Ecrire script pour gÃ©nÃ©rer ce fichier Ã  partir de la SS
-cat("\nLecture du fichier contenant le nombre d'index pour chaque Ã©chantillon.\n")
-indexNumber<-read.table(opt$indexNumber, header=TRUE, sep="\t")
-
-df2<-data.frame()
-df.defaultLine<-df[which(df$Project == "default"),]
-df2<-concat_df(df2, df.defaultLine, vec.names)
-
-cat("Rassemblement des statistiques par Ã©chantillons.\n")
-for (line in 1:dim(indexNumber)[1]){
-	mySample<-indexNumber[line, "Sample"]
-	mySampleNumber<-indexNumber[line, "NumberOfIndex"]
-	
-	# Single Index Case
-	if (mySampleNumber == 1) {
-		df.singleLine<-df[which(df$Sample == mySample),]
-		df2<-concat_df(df2, df.singleLine, vec.names)
-	}
-	# Dual et 4 Index Cases
-	else if (mySampleNumber > 1) {
-		sub.df<-df[which(str_detect(df$Sample, mySample)), ]
-		#print(sub.df)
-		# Parcours du sous-data.frame
-		for (l in 1:dim(sub.df)[1]) {
-			sub.df.project<-sub.df[l, "Project"]
-			sub.df.barcode<-sub.df[l, "Barcode"]
-			sub.df.bcCount<-as.numeric(sub.df[l, "bcCount"])
-			sub.df.bcPerfect<-as.numeric(sub.df[l, "bcPerfect"])
-			sub.df.oneMismatch<-as.numeric(sub.df[l, "bcOneMismatch"])	# bcOneMismatch
-			
-			#print(paste(mySample, ":: Traitement du barcode :", sub.df.barcode))
-			
-			if (l == 1 ) {
-				sub.df.project.toAdd<-sub.df.project
-				sub.df.barcode.toAdd<-sub.df.barcode
-				sub.df.bcCount.toAdd<-sub.df.bcCount
-				sub.df.bcPerfect.toAdd<-sub.df.bcPerfect
-				sub.df.oneMismatch.toAdd<-sub.df.oneMismatch
-			} else {
-				sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode)
-				sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount
-				sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect
-				sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch
-			}
-		}	
-		
-		# Add to data.frame
-		df_to_add<-data.frame(sub.df.project,mySample, sub.df.barcode.toAdd, sub.df.bcCount.toAdd, sub.df.bcPerfect.toAdd, sub.df.oneMismatch.toAdd)
-		df2<-concat_df(df2, df_to_add, vec.names)
-	}
-}
-	
-cat("RÃ©sumÃ© des inforamtions extraites (nombre d'Ã©chantillons par projet) :")
-table(df2$Project)
-
-## Recherche des index indeterminÃ©s
-cat("\nRecherche des index indÃ©terminÃ©s.\n")
-bcCount.min<-min(as.numeric(df2[-which(df$Project == "default"), "bcCount"]))
-bcCount.threshold<-0.8*bcCount.min	
-
-# Rechercher tous les index trouvÃ©s au moins bcCount.threshold fois
-cat("Tentative de rÃ©cupÃ©rer des Ã©chantillons parmi les index retrouvÃ©s les plus frÃ©quemment.\n")
-cat("\tLecture du DemuxSummary.\n")
-linesToSkip<-as.numeric(system(paste("grep -n Most", opt$demuxSum, "| cut -d':' -f1"), intern = TRUE))
-tabDemuxSum<-read.table(opt$demuxSum, skip=linesToSkip, col.names=c("Index", "Count"))
-
-tabUndetermined<-tabDemuxSum[which(tabDemuxSum$Count >= bcCount.threshold),]
-
-cat("\tRÃ©sumÃ© des inforamtions extraites :\n")
-cat(paste0("\tNombre d'index indÃ©terminÃ©s retrouvÃ©s :\t", dim(tabUndetermined)[1], "\n"))
-head(tabUndetermined)
-
-
-# Construction du dataFrame pour intÃ©gration Ã  df2
-df2.Projects<-unique(df2$Project)
-myProject<-df2.Projects[which(df2.Projects != "default")]
-
-### Pour chaque ligne de tabUndertermined, on ajoute une ligne Ã  df2 :
-if (dim(tabUndetermined)[1] != 0) {
-	df.tabUndetermined<-data.frame()
-	for (i in 1:dim(tabUndetermined)[1]) {
-		df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
-		df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
-	}
-	
-	df2<-concat_df(df2, df.tabUndetermined, vec.names)
-	cat("\tLes index indÃ©terminÃ©s ont Ã©tÃ© ajoutÃ© au data.table.\n")
-} else {
-	cat("\tAuncun index indÃ©terminÃ©s trouvÃ©s.\n")
-}
-
-## Soustraction des undertermined aux allOthers
-# recuperer les Count de tabUndetermined et soustraire la somme Ã  df2[which(df2$Project == "default"), "bcCount"]
-cat("\nQuelques calculs sur les donnÃ©es avant de les exporter.\n")
-cat("\tActualisation du nombre d'index 'AllOthers'.\n")
-undertermined.count<-sum(as.numeric(tabUndetermined[,"Count"]))
-df2[which(df2$Project == "default"), "bcCount"]<-as.numeric(df2[which(df2$Project == "default"), "bcCount"])-undertermined.count
-
-# Calcul pourcentages de chaque barcode
-cat("\tCalcul du pourcentage sur le nombre de fragments total.\n")
-totalOfFragments<-sum(as.numeric(df2$bcCount))
-
-percentOfFragment<-as.data.frame(round((as.numeric(df2[,"bcCount"])/totalOfFragments)*100, 2))
-rownames(percentOfFragment)<-rownames(df2)
-colnames(percentOfFragment)<-"percentageOfFragment"
-
-df2<-cbind(df2, percentOfFragment)
-
-# Export du data.frame
-cat("\nSauvegarde du data.frame.\n")
-write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv"))
-cat(paste0("\tLe fichier suivant Ã  Ã©tÃ© crÃ©Ã© :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n"))
-cat("\nFin normale du script, on sort.\n")
+#!/usr/bin/env Rscript
+
+# R version : 4.0.4
+## module load system/R-4.0.4_gcc-9.3.0
+
+# demuxStatsFromXML.R
+# Lecture d'un fichier XML pour extraction et mise en forme des statistiques de dÃ©multiplexage (orientÃ© 10X pour le moment)
+# Par Ã©chantillon, ce script rÃ©cupÃ¨re tous les index associÃ©s, le nombre de reads trouvÃ©s, dont le nombre de barcodes lus parfaitement et le nombre de barcode lus avec un mismatch.
+# Ce sctipt rÃ©cupÃ¨re aussi les index trÃ¨s souvent retrouvÃ©s mais non associÃ© Ã  un echantillon
+# Le pourcentage du nombre de fragments par Ã©chantillon sur le nombre total est calculÃ©
+
+## --------------------
+#       PACKAGES
+## --------------------
+library('xml2')
+library('stringr')
+library('optparse')
+
+## --------------------
+#       FUNCTIONS
+## --------------------
+concat_df = function(df1, df2, col.names) {
+	colnames(df2)<-col.names
+	df_tmp<-rbind(df1, df2)
+	return(df_tmp)
+}
+
+## --------------------
+#       PARAMETERS
+## --------------------
+option_list = list(
+		# All arguments are compulsory 
+		make_option(c("-x", "--xml"), type = "character", default = NULL, metavar = "character", 
+				help = "Path to the DemultiplexingStats.xml file."),
+		make_option(c("-i", "--indexNumber"), type = "character", default = NULL, metavar = "character", 
+				help = "Path to the .indexNumber file."),
+		make_option(c("-d", "--demuxSum"), type = "character", default = NULL, metavar = "character", 
+				help = "Path to the demuxSummary.txt file.")
+)
+
+opt_parser = OptionParser(usage="Make demultiplexStats easier to read.", option_list = option_list)
+opt = parse_args(opt_parser)
+
+if(is.null(opt$xml) | is.null(opt$indexNumber) | is.null(opt$demuxSum)) {
+	stop("At least one argument is missing.\n", call. = FALSE)
+}
+
+## --------------------
+#          LOG
+## --------------------
+cat("\nLancement du script demuxStatsFromXML.R avec les options suivantes :\n")
+cat(paste0("\tFichier XML :\t\t", opt$xml, "\n"))
+cat(paste0("\tFichier IndexNumber :\t", opt$indexNumber, "\n"))
+cat(paste0("\tDemux Summary :\t\t" , opt$demuxSum, "\n"))
+launchDir<-getwd()
+cat(paste0("\nLe fichier de sortie sera Ã©crit dans le rÃ©pertoire :\t",launchDir , "\n\n"))
+
+## --------------------
+#          MAIN
+## --------------------
+xml<-read_xml(opt$xml)
+
+df<-data.frame()
+vec.names<-c("Project", "Sample", "Barcode", "bcCount", "bcPerfect", "bcOneMismatch")
+
+projects<-xml_find_all(xml, "//Project")
+
+cat("Lecture du XML\n")
+for (pr in 1:length(projects)){
+	project<-xml_attr(projects[pr], "name")
+	Samples<-xml_children(projects[pr])
+	for (sample in 1:length(Samples)){
+		sample_name<-xml_attr(Samples[sample], "name")
+		xml_bc<-xml_children(Samples[sample])
+		barcode_names<-xml_attr(xml_bc, "name")
+		for (bc in 1:length(barcode_names)) {
+			if (barcode_names[bc] != "all"){
+				lane_path<-xml_path(xml_children(xml_bc[bc]))
+				BarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/BarcodeCount")))
+				PerfectBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/PerfectBarcodeCount")))
+				OneMismatchBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/OneMismatchBarcodeCount")))
+				
+				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<-"-" }
+				
+				df_to_add<-data.frame(project,sample_name, barcode_names[bc], BarcodeCount, PerfectBarcodeCount, OneMismatchBarcodeCount)
+				df<-concat_df(df, df_to_add, vec.names)
+
+			}
+		}		
+	}
+}
+
+cat("RÃ©sumÃ© des informations extraites (nombre d'Ã©chantillons par projet) :")
+table(df$Project)
+
+# ConcatÃ©nation des index multilples
+# Ecrire script pour gÃ©nÃ©rer ce fichier Ã  partir de la SS
+cat("\nLecture du fichier contenant le nombre d'index pour chaque Ã©chantillon.\n")
+indexNumber<-read.table(opt$indexNumber, header=TRUE, sep="\t")
+
+df2<-data.frame()
+df.defaultLine<-df[which(df$Project == "default"),]
+df2<-concat_df(df2, df.defaultLine, vec.names)
+
+cat("Rassemblement des statistiques par Ã©chantillons.\n")
+for (line in 1:dim(indexNumber)[1]){
+	mySample<-indexNumber[line, "Sample"]
+	mySampleNumber<-indexNumber[line, "NumberOfIndex"]
+	
+	# Single Index Case
+	if (mySampleNumber == 1) {
+		df.singleLine<-df[which(df$Sample == mySample),]
+		df2<-concat_df(df2, df.singleLine, vec.names)
+	}
+	# Dual et 4 Index Cases
+	else if (mySampleNumber > 1) {
+		sub.df<-df[which(str_detect(df$Sample, mySample)), ]
+		#print(sub.df)
+		# Parcours du sous-data.frame
+		for (l in 1:dim(sub.df)[1]) {
+			sub.df.project<-sub.df[l, "Project"]
+			sub.df.barcode<-sub.df[l, "Barcode"]
+			sub.df.bcCount<-as.numeric(sub.df[l, "bcCount"])
+			sub.df.bcPerfect<-as.numeric(sub.df[l, "bcPerfect"])
+			sub.df.oneMismatch<-as.numeric(sub.df[l, "bcOneMismatch"])	# bcOneMismatch
+			
+			#print(paste(mySample, ":: Traitement du barcode :", sub.df.barcode))
+			
+			if (l == 1 ) {
+				sub.df.project.toAdd<-sub.df.project
+				sub.df.barcode.toAdd<-sub.df.barcode
+				sub.df.bcCount.toAdd<-sub.df.bcCount
+				sub.df.bcPerfect.toAdd<-sub.df.bcPerfect
+				sub.df.oneMismatch.toAdd<-sub.df.oneMismatch
+			} else {
+				sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode)
+				sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount
+				sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect
+				sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch
+			}
+		}	
+		
+		# Add to data.frame
+		df_to_add<-data.frame(sub.df.project,mySample, sub.df.barcode.toAdd, sub.df.bcCount.toAdd, sub.df.bcPerfect.toAdd, sub.df.oneMismatch.toAdd)
+		df2<-concat_df(df2, df_to_add, vec.names)
+	}
+}
+	
+cat("RÃ©sumÃ© des informations extraites (nombre d'Ã©chantillons par projet) :")
+table(df2$Project)
+
+## Recherche des index indeterminÃ©s
+cat("\nRecherche des index indÃ©terminÃ©s.\n")
+bcCount.min<-min(as.numeric(df2[-which(df$Project == "default"), "bcCount"]))
+bcCount.threshold<-0.8*bcCount.min	
+
+# Rechercher tous les index trouvÃ©s au moins bcCount.threshold fois
+cat("Tentative de rÃ©cupÃ©rer des Ã©chantillons parmi les index retrouvÃ©s les plus frÃ©quemment.\n")
+cat("\tLecture du DemuxSummary.\n")
+linesToSkip<-as.numeric(system(paste("grep -n Most", opt$demuxSum, "| cut -d':' -f1"), intern = TRUE))
+tabDemuxSum<-read.table(opt$demuxSum, skip=linesToSkip, col.names=c("Index", "Count"))
+
+tabUndetermined<-tabDemuxSum[which(tabDemuxSum$Count >= bcCount.threshold),]
+
+cat("\tRÃ©sumÃ© des inforamtions extraites :\n")
+cat(paste0("\tNombre d'index indÃ©terminÃ©s retrouvÃ©s :\t", dim(tabUndetermined)[1], "\n"))
+head(tabUndetermined)
+
+
+# Construction du dataFrame pour intÃ©gration Ã  df2
+df2.Projects<-unique(df2$Project)
+myProject<-df2.Projects[which(df2.Projects != "default")]
+
+### Pour chaque ligne de tabUndertermined, on ajoute une ligne Ã  df2 :
+if (dim(tabUndetermined)[1] != 0) {
+	df.tabUndetermined<-data.frame()
+	for (i in 1:dim(tabUndetermined)[1]) {
+		df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
+		df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
+	}
+	
+	df2<-concat_df(df2, df.tabUndetermined, vec.names)
+	cat("\tLes index indÃ©terminÃ©s ont Ã©tÃ© ajoutÃ© au data.table.\n")
+} else {
+	cat("\tAuncun index indÃ©terminÃ©s trouvÃ©s.\n")
+}
+
+## Soustraction des undertermined aux allOthers
+# recuperer les Count de tabUndetermined et soustraire la somme Ã  df2[which(df2$Project == "default"), "bcCount"]
+cat("\nQuelques calculs sur les donnÃ©es avant de les exporter.\n")
+cat("\tActualisation du nombre d'index 'AllOthers'.\n")
+undertermined.count<-sum(as.numeric(tabUndetermined[,"Count"]))
+df2[which(df2$Project == "default"), "bcCount"]<-as.numeric(df2[which(df2$Project == "default"), "bcCount"])-undertermined.count
+
+# Calcul pourcentages de chaque barcode
+cat("\tCalcul du pourcentage sur le nombre de fragments total.\n")
+totalOfFragments<-sum(as.numeric(df2$bcCount))
+
+percentOfFragment<-as.data.frame(round((as.numeric(df2[,"bcCount"])/totalOfFragments)*100, 2))
+rownames(percentOfFragment)<-rownames(df2)
+colnames(percentOfFragment)<-"percentageOfFragment"
+
+df2<-cbind(df2, percentOfFragment)
+
+# Export du data.frame
+cat("\nSauvegarde du data.frame.\n")
+write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv"))
+cat(paste0("\tLe fichier suivant Ã  Ã©tÃ© crÃ©Ã© :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n"))
+cat("\nFin normale du script, on sort.\n")
diff --git a/bin/extractInfoForDemuxStats.pl b/bin/extractInfoForDemuxStats.pl
index ccd29bb..71218fc 100644
--- a/bin/extractInfoForDemuxStats.pl
+++ b/bin/extractInfoForDemuxStats.pl
@@ -1,124 +1,124 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- extractInfoForDemuxStats.pl
- 
-=head1 DESCRIPTION
-
- Extract from the samplesheet of lane : (1) sample names and (2) how many index are associated. Ecriture dans un fichier .indexNumber
- 
-=head1 SYNOPSIS
-
- extractInfoForDemuxStats.pl --sampleSheet
-
-=head1 OPTIONS
-
- -sampleSheet|s : the samplesheet file
- 
-=head1 EXEMPLES
-
- perl extractInfoForDemuxStats.pl --sampleSheet 20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv 
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use utf8;
-
-###################################################################
-#
-#						INITIALISATION
-#
-####################################################################
-my $sampleSheet="";
-
-GetOptions ('sampleSheet=s' => \$sampleSheet,
-);
-
-if ($sampleSheet eq "") {
-	print STDERR ("Please, give a file !");
-	print STDERR ("USAGE : extractInfoForDemuxStats.pl --sampleSheet <File>\n");
-	exit 0;
-}
-
-#Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
-#Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
-
-# recuperer le nombre de fois oÃ¹ "*Index_ID" est Ã©crit et leur position
-# rÃ©cupere la position du sample_ID
-#Pour chaque ligne recupÃ©rer le ou les index_ID
-#Si index_ID =~ XX-XX-XX alors #index = 4
-#Sinon #index = 1
-#Faire la somme des #index par ligne
-#Ecrire le nom de l'Ã©chantillon et le nombre d'index associÃ©
-#Ne pas oublier l'entete du fichier de sortie
-
-
-### Lecture de la samplesheet :
-open (my $handle, '<', $sampleSheet) or exit 1;
-chomp(my @lines = <$handle>);
-close $handle;
-
-my $projectName="";
-my $sample_ID_position;
-my @index_ID_position=();
-my %sample_info=();
-
-
-foreach my $line (@lines) {
-	my @cur_line = split(',', $line);
-	
-	# Recherche du nom du projet
-	if ($line =~ /^Infos/) {
-		$projectName = $cur_line[1];
-	}
-	
-	# Recherche des positions des Sample_ID et des Index_ID
-	elsif ($line =~ /^Lane/) {
-		while ( my ( $indice, $valeur ) = each @cur_line ) { 
-			if ($valeur eq "Sample_ID") { $sample_ID_position=$indice;}
-			if ($valeur =~ /Index_ID$/) { push(@index_ID_position, $indice);}
-		}
-	}
-
-	# Association Sample_ID avec sont nombre d'index
-	elsif ($line =~ m/^(\d),/) {
-		my $sample_ID = $cur_line[$sample_ID_position];
-		my $index_number=0;
-		my @cur_index_ID = ();
-		foreach my $pos (@index_ID_position) {
-			if ($cur_line[$pos] =~ /\w{2}-\w{2}-\w{2}/) { $index_number = 4; } else { $index_number += 1; }
-		}
-		$sample_info{$sample_ID} = $index_number;
-	}
-}
-
-# ecriture du fichier de sortie :
-my $content ="";
-$content.="Sample\tNumberOfIndex\n";
-foreach my $k (keys(%sample_info)) {
-   $content.="$k\t$sample_info{$k}\n";
-}
-
-my $file2write = "$projectName.indexNumber";
-
-open(my $fh, '>', $file2write) or exit 1;
-print $fh $content;
-close $fh;
-
-
-
-
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ extractInfoForDemuxStats.pl
+ 
+=head1 DESCRIPTION
+
+ Extract from the samplesheet of lane : (1) sample names and (2) how many index are associated. Ecriture dans un fichier .indexNumber
+ 
+=head1 SYNOPSIS
+
+ extractInfoForDemuxStats.pl --sampleSheet
+
+=head1 OPTIONS
+
+ -sampleSheet|s : the samplesheet file
+ 
+=head1 EXEMPLES
+
+ perl extractInfoForDemuxStats.pl --sampleSheet 20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv 
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use utf8;
+
+###################################################################
+#
+#						INITIALISATION
+#
+####################################################################
+my $sampleSheet="";
+
+GetOptions ('sampleSheet=s' => \$sampleSheet,
+);
+
+if ($sampleSheet eq "") {
+	print STDERR ("Please, give a file !");
+	print STDERR ("USAGE : extractInfoForDemuxStats.pl --sampleSheet <File>\n");
+	exit 0;
+}
+
+#Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
+#Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
+
+# recuperer le nombre de fois oÃ¹ "*Index_ID" est Ã©crit et leur position
+# rÃ©cupere la position du sample_ID
+#Pour chaque ligne recupÃ©rer le ou les index_ID
+#Si index_ID =~ XX-XX-XX alors #index = 4
+#Sinon #index = 1
+#Faire la somme des #index par ligne
+#Ecrire le nom de l'Ã©chantillon et le nombre d'index associÃ©
+#Ne pas oublier l'entete du fichier de sortie
+
+
+### Lecture de la samplesheet :
+open (my $handle, '<', $sampleSheet) or exit 1;
+chomp(my @lines = <$handle>);
+close $handle;
+
+my $projectName="";
+my $sample_ID_position;
+my @index_ID_position=();
+my %sample_info=();
+
+
+foreach my $line (@lines) {
+	my @cur_line = split(',', $line);
+	
+	# Recherche du nom du projet
+	if ($line =~ /^Infos/) {
+		$projectName = $cur_line[1];
+	}
+	
+	# Recherche des positions des Sample_ID et des Index_ID
+	elsif ($line =~ /^Lane/) {
+		while ( my ( $indice, $valeur ) = each @cur_line ) { 
+			if ($valeur eq "Sample_ID") { $sample_ID_position=$indice;}
+			if ($valeur =~ /Index_ID$/) { push(@index_ID_position, $indice);}
+		}
+	}
+
+	# Association Sample_ID avec sont nombre d'index
+	elsif ($line =~ m/^(\d),/) {
+		my $sample_ID = $cur_line[$sample_ID_position];
+		my $index_number=0;
+		my @cur_index_ID = ();
+		foreach my $pos (@index_ID_position) {
+			if ($cur_line[$pos] =~ /\w{2}-\w{2}-\w{2}/) { $index_number = 4; } else { $index_number += 1; }
+		}
+		$sample_info{$sample_ID} = $index_number;
+	}
+}
+
+# ecriture du fichier de sortie :
+my $content ="";
+$content.="Sample\tNumberOfIndex\n";
+foreach my $k (keys(%sample_info)) {
+   $content.="$k\t$sample_info{$k}\n";
+}
+
+my $file2write = "$projectName.indexNumber";
+
+open(my $fh, '>', $file2write) or exit 1;
+print $fh $content;
+close $fh;
+
+
+
+
diff --git a/bin/extractInfoForReadSets.pl b/bin/extractInfoForReadSets.pl
index 36bdf05..b9a9dc1 100644
--- a/bin/extractInfoForReadSets.pl
+++ b/bin/extractInfoForReadSets.pl
@@ -1,105 +1,105 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- extractInfoForReaSets.pl
- 
-=head1 DESCRIPTION
-
- Extract (from samplesheet and RunNGL-Bi.created) and emit relevant informations for readSets creation
- 
-=head1 SYNOPSIS
-
- extractInfoForReaSet.pl --sampleSheet --runNGLBi
-
-=head1 OPTIONS
-
- -sampleSheet|s : the samplesheet file
- -runNGLBi|s : the RunNGL-Bi.created file
- 
-=head1 EXEMPLES
-
- perl extractInfoForReaSet.pl --sampleSheet 20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv --runNGLBi RunNGL-Bi.created
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use utf8;
-
-###################################################################
-#
-#						INITIALISATION
-#
-###################################################################
-my $sampleSheet="";
-my $runNGLBiFile="";
-
-GetOptions ('samplesheet=s' => \$sampleSheet,
-			'runNGLBi=s'=> \$runNGLBiFile,
-);
-
-if ($sampleSheet eq "" || $runNGLBiFile eq "") {
-	print STDERR ("At least one argument is missing !");
-	print STDERR ("USAGE : extractInfoForReaSet.pl --sampleSheet <File> --runNGLBi <File>\n");
-	exit 0;
-}
-
-my $laneNumber;
-my $experimentName;
-my $runName;
-my $content;
-my $file2write="readSetCreation.info";
-
-###################################################################
-#
-#						MAIN
-#
-###################################################################
-## Extract informations from files
-### SamplSheet
-#### ExperimentName
-my $experimentName_ligne = `grep "Experiment Name" $sampleSheet | head -1`;
-($experimentName) = $experimentName_ligne =~ m/Experiment Name,(.+)$/;
-
-#### LaneNumber
-
-if ($sampleSheet =~ "_MISEQ_") {
-	$laneNumber = "1";
-} else {
-	open (my $handle, '<', $sampleSheet) or exit 1;
-	chomp(my @lines = <$handle>);
-	close $handle;
-	
-	foreach my $line (@lines) {
-		if ($line =~ m/^(\d),/) {
-			($laneNumber) = $line =~ m/^(\d),/;
-			last;
-		}
-	}
-}
-### RunNGL-Bi.created
-$runName = `cat $runNGLBiFile`;
-chomp($runName);
-
-## Write exit file
-$content.="ExperimentName;$experimentName\n";
-$content.="NGLBiRunName;$runName\n";
-$content.="LaneNumber;$laneNumber\n";
-
-open(my $fh, '>', $file2write) or exit 1;
-print $fh $content;
-close $fh;
-
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ extractInfoForReaSets.pl
+ 
+=head1 DESCRIPTION
+
+ Extract (from samplesheet and RunNGL-Bi.created) and emit relevant informations for readSets creation
+ 
+=head1 SYNOPSIS
+
+ extractInfoForReaSet.pl --sampleSheet --runNGLBi
+
+=head1 OPTIONS
+
+ -sampleSheet|s : the samplesheet file
+ -runNGLBi|s : the RunNGL-Bi.created file
+ 
+=head1 EXEMPLES
+
+ perl extractInfoForReaSet.pl --sampleSheet 20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv --runNGLBi RunNGL-Bi.created
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use utf8;
+
+###################################################################
+#
+#						INITIALISATION
+#
+###################################################################
+my $sampleSheet="";
+my $runNGLBiFile="";
+
+GetOptions ('samplesheet=s' => \$sampleSheet,
+			'runNGLBi=s'=> \$runNGLBiFile,
+);
+
+if ($sampleSheet eq "" || $runNGLBiFile eq "") {
+	print STDERR ("At least one argument is missing !");
+	print STDERR ("USAGE : extractInfoForReaSet.pl --sampleSheet <File> --runNGLBi <File>\n");
+	exit 0;
+}
+
+my $laneNumber;
+my $experimentName;
+my $runName;
+my $content;
+my $file2write="readSetCreation.info";
+
+###################################################################
+#
+#						MAIN
+#
+###################################################################
+## Extract informations from files
+### SamplSheet
+#### ExperimentName
+my $experimentName_ligne = `grep "Experiment Name" $sampleSheet | head -1`;
+($experimentName) = $experimentName_ligne =~ m/Experiment Name,(.+)$/;
+
+#### LaneNumber
+
+if ($sampleSheet =~ "_MISEQ_") {
+	$laneNumber = "1";
+} else {
+	open (my $handle, '<', $sampleSheet) or exit 1;
+	chomp(my @lines = <$handle>);
+	close $handle;
+	
+	foreach my $line (@lines) {
+		if ($line =~ m/^(\d),/) {
+			($laneNumber) = $line =~ m/^(\d),/;
+			last;
+		}
+	}
+}
+### RunNGL-Bi.created
+$runName = `cat $runNGLBiFile`;
+chomp($runName);
+
+## Write exit file
+$content.="ExperimentName;$experimentName\n";
+$content.="NGLBiRunName;$runName\n";
+$content.="LaneNumber;$laneNumber\n";
+
+open(my $fh, '>', $file2write) or exit 1;
+print $fh $content;
+close $fh;
+
diff --git a/conf/prod.config b/conf/prod.config
index f46e5fb..d1e2306 100644
--- a/conf/prod.config
+++ b/conf/prod.config
@@ -1,34 +1,34 @@
-// ========================================
-//				PROCESSES
-//=========================================
-process {
-	withLabel: ngl_bi {
-		executor = 'local'
-		beforeScript = "export NGL_BI_CLIENT='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current'"
-		//errorStrategy = { 'ignore' }
-	}
-	
-	withLabel: samtools {
-		cpus = { 6 * task.attempt }
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.h * task.attempt }
-	}
-	
-	withLabel: qualimap {
-		cpus = { 8 * task.attempt }
-	    memory = { 2.GB * task.attempt }
-	    time = { 3.h * task.attempt }
-	}
-	
-	
-	withName: BWA_ALIGNMENT {
-		cpus = { 6 * task.attempt }
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.d * task.attempt }
-	}
-}
-
-// ========================================
-//				CONFIG FILES
-//=========================================
+// ========================================
+//				PROCESSES
+//=========================================
+process {
+	withLabel: ngl_bi {
+		executor = 'local'
+		beforeScript = "export NGL_BI_CLIENT='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current'"
+		//errorStrategy = { 'ignore' }
+	}
+	
+	withLabel: samtools {
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+	}
+	
+	withLabel: qualimap {
+		cpus = { 8 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+	}
+	
+	
+	withName: BWA_ALIGNMENT {
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.d * task.attempt }
+	}
+}
+
+// ========================================
+//				CONFIG FILES
+//=========================================
 includeConfig "$baseDir/conf/report.config"
\ No newline at end of file
diff --git a/conf/report.config b/conf/report.config
index 2c3ad2e..385b8ec 100644
--- a/conf/report.config
+++ b/conf/report.config
@@ -1,33 +1,33 @@
-// ========================================
-//				REPORTS
-//=========================================
-timeline {
-	enabled = true
-	file = "${params.outdir}/pipeline_info/execution_timeline.html"
-}
-
-trace {
-	enabled = true
-	file = "${params.outdir}/pipeline_info/execution_trace.txt"
-	fields = 'task_id,native_id,name,status,exit,realtime,%cpu,%mem,duration,script,rss'	// verifier ajout des champs
-}
-
-report {
-	enabled = true
-	file = "${params.outdir}/pipeline_info/execution_report.html"
-}
-
-dag {
-	enabled = true
-	file = "${params.outdir}/pipeline_info/pipeline_dag.svg"
-}
-
-manifest {
-	name = 'get-nextflow-ngl-bi/wf-nanopore-nf'
-	author = 'Jules Sabban'
-	homePage = 'https://forgemia.inra.fr/get-nextflow-ngl-bi/wf-illumina-nf'
-	description = 'Workflow for Nanopore data quality control'
-	mainScript = 'main.nf'
-	nextflowVersion = '>=0.32.0'
-	version = '1.0.0'
+// ========================================
+//				REPORTS
+//=========================================
+timeline {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/execution_timeline.html"
+}
+
+trace {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/execution_trace.txt"
+	fields = 'task_id,native_id,name,status,exit,realtime,%cpu,%mem,duration,script,rss'	// verifier ajout des champs
+}
+
+report {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/execution_report.html"
+}
+
+dag {
+	enabled = true
+	file = "${params.outdir}/pipeline_info/pipeline_dag.svg"
+}
+
+manifest {
+	name = 'get-nextflow-ngl-bi/wf-nanopore-nf'
+	author = 'Jules Sabban'
+	homePage = 'https://forgemia.inra.fr/get-nextflow-ngl-bi/wf-illumina-nf'
+	description = 'Workflow for Nanopore data quality control'
+	mainScript = 'main.nf'
+	nextflowVersion = '>=0.32.0'
+	version = '1.0.0'
 }
\ No newline at end of file
diff --git a/conf/test.config b/conf/test.config
index 8a01c75..6f51d0e 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -1,28 +1,34 @@
-// ========================================
-//				PROCESSES
-//=========================================
-process {
-	withLabel: ngl_bi {
-		executor = 'local'
-		beforeScript = "export NGL_BI_CLIENT='/work/sbsuser/test/jules/ngl-bi_client'"	// test
-		//errorStrategy = { 'ignore' }
-	}
-	
-	withLabel: samtools {
-		cpus = { 1 * task.attempt }
-	    memory = { 2.GB * task.attempt }
-	    time = { 10.m * task.attempt }
-	}
-	
-	withLabel: qualimap {
-		cpus = { 1 * task.attempt }
-	    memory = { 2.GB * task.attempt }
-	    time = { 10.m * task.attempt }
-	}
-}
-
-
-// ========================================
-//				CONFIG FILES
-//=========================================
+// ========================================
+//				PROCESSES
+//=========================================
+process {
+	withLabel: ngl_bi {
+		executor = 'local'
+		beforeScript = "export NGL_BI_CLIENT='/work/sbsuser/test/jules/ngl-bi_client'"	// test
+		//errorStrategy = { 'ignore' }
+	}
+	
+	withLabel: samtools {
+		cpus = { 1 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 10.m * task.attempt }
+	}
+	
+	withLabel: qualimap {
+		cpus = { 1 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 10.m * task.attempt }
+	}
+	
+	withName: BWA_ALIGNMENT {
+		cpus = { 3 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 1.h * task.attempt }
+	}
+}
+
+
+// ========================================
+//				CONFIG FILES
+//=========================================
 includeConfig "$baseDir/conf/report.config"
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 4ec72b3..9de8476 100644
--- a/main.nf
+++ b/main.nf
@@ -26,8 +26,7 @@ This script is based on :
     NAMED WORKFLOW FOR PIPELINE
 ========================================================================================
 */
-
-include { ILLUMINA_QC } from './workflow/illumina_qc.nf'
+include { ILLUMINA_QC } from "$baseDir/workflow/illumina_qc.nf"
 
 workflow QC_ANALYSIS {
     ILLUMINA_QC()
diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index 654615f..96f29d5 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -1,54 +1,54 @@
-params.outdir=''
-
-
-process prepareReadSetCreation {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
-	
-	input:
-		path sampleSheet
-		path runNGLBiCreated
-		
-	output:
-		file 'readSetCreation.info'
-		
-	script:
-	"""
-		extractInfoForReadSets.pl --sampleSheet $sampleSheet --runNGLBi $runNGLBiCreated
-	"""
-}
-
-process readsetNGLBiCreation {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
-	
-	executor = 'local'
-	beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
-	errorStrategy = { 'ignore' }
-	
-	input :
-		path infoFile
-	
-	output :
-		path 'ReadsetsNGL-Bi.created', emit: readSetFile
-		path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
-
-	script :
-	"""
-		createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
-		
-	"""
-}
-
-process checkErrorFromNGLBi {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
-	
-	input:
-		path logFile
-		
-	output:
-		path 'ReadsetsNGL-BiCreation.log'
-		
-	script:
-	"""
-		checkErrorNGLScripts.pl --file $logFile
-	"""
+params.outdir=''
+
+
+process prepareReadSetCreation {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
+	
+	input:
+		path sampleSheet
+		path runNGLBiCreated
+		
+	output:
+		file 'readSetCreation.info'
+		
+	script:
+	"""
+		extractInfoForReadSets.pl --sampleSheet $sampleSheet --runNGLBi $runNGLBiCreated
+	"""
+}
+
+process readsetNGLBiCreation {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
+	
+	executor = 'local'
+	beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
+	errorStrategy = { 'ignore' }
+	
+	input :
+		path infoFile
+	
+	output :
+		path 'ReadsetsNGL-Bi.created', emit: readSetFile
+		path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
+
+	script :
+	"""
+		createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
+		
+	"""
+}
+
+process checkErrorFromNGLBi {
+	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
+	
+	input:
+		path logFile
+		
+	output:
+		path 'ReadsetsNGL-BiCreation.log'
+		
+	script:
+	"""
+		checkErrorNGLScripts.pl --file $logFile
+	"""
 }
\ No newline at end of file
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index b5d43fb..6ec5bc9 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -1,262 +1,262 @@
-//params.sequencer = 'MiSeq'
-//params.rawdata_location = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
-params.outdir=''
-banksForConta = [ ]
-
-//mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1
-
-
-process decoupageSS {
-	// Not used anymore
-	publishDir path: "${params.outdir}/SampleSheets" , mode: 'copy'
-	
-	input:
-		path multiSS
-		
-	output:
-		path '*'
-		
-	shell:
-	"""
-		extractReads.pl $multiSS NovaSeq
-	
-	"""
-}
-
-
-
-process maskMaker {
-	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
-	
-	input:
-		path SampleSheet
-		path RunInfoXML
-	
-	output:
-		path 'Run.conf'
-	
-	script:
-	"""
-		extractInfo.pl -s $SampleSheet -r $RunInfoXML
-	
-	"""
-}
-
-process bcl2fastq {
-	publishDir path: "${params.outdir}/Demux/Reads" , mode: 'copy'
-	
-	echo=true
-	
-	input:
-		path SampleSheet
-		path Runconf
-		val mismatchNumber
-		path rawdata_location
-		
-	//output:
-		//path "*"
-		
-	shell:
-	"""
-		mask=\$(grep 'MASQUE' !{Runconf} | cut -d'=' -f2)
-		echo "bcl2fastq -p 10 -r 4 -w 4 \${mask} --barcode-mismatches !{mismatchNumber} --output-dir ./ -R !{rawdata_location} --sample-sheet !{SampleSheet} -l DEBUG"
-		
-	"""
-}
-
-process extractInfoForDemuxStats {
-	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
-	
-	input:
-		path SampleSheet
-	
-	output:
-		path "*.indexNumber"
-	
-	script:
-	"""
-		extractInfoForDemuxStats.pl --sampleSheet $SampleSheet
-	
-	"""
-}
-
-process demultiplexStats {
-	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
-	
-	module 'system/R-4.0.4_gcc-9.3.0'
-	
-	input:
-		path DemuxStatXML
-		path IndexNumberFile
-		path DemuxSummary
-	
-	output:
-		path 'demultiplexStats.log', emit: log
-		path "DemultiplexStats_*", emit: demultiplexStatsCSV
-	
-	script:
-	"""
-		Rscript /home/sbsuser/work/Nextflow/wf-illumina-nf/wf-illumina-nf/bin/demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
-	
-	"""
-}
-
-process fastqc {
-	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.zip', saveAs: { filename -> "${name}_fastqc.zip" }
-	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.html', saveAs: { filename -> "${name}.html" }
-	
-	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
-	maxRetries 3
-	module 'bioinfo/FastQC_v0.11.7'
-	executor 'slurm'
-	queue 'wflowq'
-	cpus 1 //{ 1 * task.attempt }
-	time { 45.m * task.attempt }
-	memory '1.GB'
-	
-	tag " $name"
-	
-	input:
-		tuple val(name), path(read)
-		
-	output:
-		tuple val(name), path("*_fastqc.{zip,html}") , emit: report
-		// path log files
-		
-	script:
-	"""
-		fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read}
-	"""
-}
-
-
-process illuminaFilter {
-	publishDir path: "${params.outdir}/IlluminaFilter" , mode: 'copy', pattern: '*.gz'/*, saveAs: { filename -> "${name}.fastq.gz" }*/
-	
-	module 'bioinfo/fastq_illumina_filter-0.1'
-	executor 'slurm'
-	queue 'wflowq'
-	cpus { 1 * task.attempt }
-	time { 1.h * task.attempt }
-	memory '1.GB'
-	
-	tag " $name"
-	
-	input:
-		tuple val(name), path(read)
-	
-	output:
-		tuple val("$name"), path("*.fastq.gz"), emit: reads
-		path("*.output"), emit: log
-	
-	script:
-	"""
-		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.output | gzip -c -f > ${name}_filtered.fastq.gz	
-	"""
-	
-}
-
-process search_conta_bwa {
-	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
-	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
-	module 'bioinfo/bwa-0.7.17'
-	time { 20.m * task.attempt }
-	memory { 5.GB * task.attempt }
-	
-	input:
-		tuple val(name), path(read)
-		each genomeRef
-		
-	output:
-		tuple val("${name}_${genomeName}"), path("${name}_${genomeName}.sam"), emit: sam
-		
-	script:
-	genomeName=file(genomeRef).simpleName
-	"""
-		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
-	"""
-}
-
-process BWA_ALIGNMENT {
-	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
-	
-	tag " $sample"
-	
-	input:
-		tuple val(sample), path(reads)
-		each genomeRef
-		
-	output:
-		//tuple val(sample), path("*.log"), emit: log
-		tuple val("${sample}_${genomeName}"), path("${sample}_${genomeName}.sam"), emit: sam
-		
-	script:
-	genomeName=file(genomeRef).simpleName
-	"""
-		bwa mem ${genomeRef} ${reads} 1> ${sample}_${genomeName}.sam 2> ${sample}.log
-	"""
-}
-
-process search_conta_samtools {
-	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
-	
-	module 'bioinfo/samtools-1.9'
-	time { 10.m * task.attempt }
-	
-	tag " $sample"
-	
-	input:
-		tuple val(name), path("*")
-	
-	output:
-		//tuple val("$name"), path("*")
-		path("*.txt")
-	
-	script:
-	"""
-		samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
-	"""
-}
-
-process search_conta_summary {
-	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
-	
-	time { 10.m * task.attempt }
-	memory '1.GB'
-	
-	tag " $sample"
-	
-	input:
-		//tuple val(name), path("*")
-		path("*")
-		
-	output:
-		path("*.yaml")
-		
-	script:
-	"""
-		contaCounter.pl ./
-	"""
-}
-
-
-process FASTQSCREEN {
-	publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy'
-	
-	module 'bioinfo/FastQ-Screen-0.15.2'
-	
-	tag " $sample"
-	
-	input:
-		tuple val(sample), path(reads)
-	
-	output:
-		tuple val(sample), path("*.txt"), emit: report
-	
-	script:
-	"""
-		fastq_screen $reads --conf $launchDir/../fastq_screen.conf
-	"""
+params.outdir=''	// utile ?
+banksForConta = [ ]	// utile ?
+
+//mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1	// utile ?
+
+process extractInfoForDemuxStats {
+	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
+	
+	input:
+		path SampleSheet
+	
+	output:
+		path "*.indexNumber"
+	
+	script:
+	"""
+		extractInfoForDemuxStats.pl --sampleSheet $SampleSheet
+	
+	"""
+}
+
+process demultiplexStats {
+	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
+	
+	module 'system/R-4.0.4_gcc-9.3.0'
+	
+	input:
+		path DemuxStatXML
+		path IndexNumberFile
+		path DemuxSummary
+	
+	output:
+		path 'demultiplexStats.log', emit: log
+		path "DemultiplexStats_*", emit: demultiplexStatsCSV
+	
+	script:
+	"""
+		Rscript /home/sbsuser/work/Nextflow/wf-illumina-nf/wf-illumina-nf/bin/demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
+	
+	"""
+}
+
+process fastqc {
+	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.zip', saveAs: { filename -> "${name}_fastqc.zip" }
+	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.html', saveAs: { filename -> "${name}.html" }
+	
+	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+	maxRetries 3
+	module 'bioinfo/FastQC_v0.11.7'
+	executor 'slurm'
+	queue 'wflowq'
+	cpus 1 //{ 1 * task.attempt }
+	time { 45.m * task.attempt }
+	memory '1.GB'
+	
+	tag " $name"
+	
+	input:
+		tuple val(name), path(read)
+		
+	output:
+		tuple val(name), path("*_fastqc.{zip,html}") , emit: report
+		// path log files
+		
+	script:
+	"""
+		fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read}
+	"""
+}
+
+
+process illuminaFilter {
+	publishDir path: "${params.outdir}/IlluminaFilter" , mode: 'copy', pattern: '*.gz'/*, saveAs: { filename -> "${name}.fastq.gz" }*/
+	
+	module 'bioinfo/fastq_illumina_filter-0.1'
+	executor 'slurm'
+	queue 'wflowq'
+	cpus { 1 * task.attempt }
+	time { 1.h * task.attempt }
+	memory '1.GB'
+	
+	tag " $name"
+	
+	input:
+		tuple val(name), path(read)
+	
+	output:
+		tuple val("$name"), path("*.fastq.gz"), emit: reads
+		path("*.output"), emit: log
+	
+	script:
+	"""
+		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.output | gzip -c -f > ${name}_filtered.fastq.gz	
+	"""
+	
+}
+
+process search_conta_bwa {
+	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
+	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
+	module 'bioinfo/bwa-0.7.17'
+	time { 20.m * task.attempt }
+	memory { 5.GB * task.attempt }
+	
+	input:
+		tuple val(name), path(read)
+		each genomeRef
+		
+	output:
+		tuple val("${name}_${genomeName}"), path("${name}_${genomeName}.sam"), emit: sam
+		
+	script:
+	genomeName=file(genomeRef).simpleName
+	"""
+		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
+	"""
+}
+
+process BWA_ALIGNMENT {
+	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
+	
+	tag " $sample"
+	
+	input:
+		tuple val(sample), path(reads)
+		each genomeRef
+		
+	output:
+		//tuple val(sample), path("*.log"), emit: log
+		tuple val("${sample}_${genomeName}"), path("${sample}_${genomeName}.sam"), emit: sam
+		
+	script:
+	genomeName=file(genomeRef).simpleName
+	"""
+		bwa mem ${genomeRef} ${reads} 1> ${sample}_${genomeName}.sam 2> ${sample}.log
+	"""
+}
+
+process search_conta_samtools {
+	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
+	
+	module 'bioinfo/samtools-1.9'
+	time { 10.m * task.attempt }
+	
+	tag " $sample"
+	
+	input:
+		tuple val(name), path("*")
+	
+	output:
+		//tuple val("$name"), path("*")
+		path("*.txt")
+	
+	script:
+	"""
+		samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
+	"""
+}
+
+process search_conta_summary {
+	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
+	
+	time { 10.m * task.attempt }
+	memory '1.GB'
+	
+	tag " $sample"
+	
+	input:
+		//tuple val(name), path("*")
+		path("*")
+		
+	output:
+		path("*.yaml")
+		
+	script:
+	"""
+		contaCounter.pl ./
+	"""
+}
+
+
+process FASTQSCREEN {
+	publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy'
+	
+	module 'bioinfo/FastQ-Screen-0.15.2'
+	
+	tag " $sample"
+	
+	input:
+		tuple val(sample), path(reads)
+	
+	output:
+		tuple val(sample), path("*.txt"), emit: report
+	
+	script:
+	"""
+		fastq_screen $reads --conf $launchDir/../fastq_screen.conf
+	"""
+}
+
+
+/* --------------------------------------------------------------------
+ * 								OLD PROCESS
+ * --------------------------------------------------------------------
+*/
+process decoupageSS {
+	// Not used anymore
+	publishDir path: "${params.outdir}/SampleSheets" , mode: 'copy'
+	
+	input:
+		path multiSS
+		
+	output:
+		path '*'
+		
+	shell:
+	"""
+		extractReads.pl $multiSS NovaSeq
+	
+	"""
+}
+
+
+
+process maskMaker {
+	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
+	
+	input:
+		path SampleSheet
+		path RunInfoXML
+	
+	output:
+		path 'Run.conf'
+	
+	script:
+	"""
+		extractInfo.pl -s $SampleSheet -r $RunInfoXML
+	
+	"""
+}
+
+process bcl2fastq {
+	publishDir path: "${params.outdir}/Demux/Reads" , mode: 'copy'
+	
+	echo=true
+	
+	input:
+		path SampleSheet
+		path Runconf
+		val mismatchNumber
+		path rawdata_location
+		
+	//output:
+		//path "*"
+		
+	shell:
+	"""
+		mask=\$(grep 'MASQUE' !{Runconf} | cut -d'=' -f2)
+		echo "bcl2fastq -p 10 -r 4 -w 4 \${mask} --barcode-mismatches !{mismatchNumber} --output-dir ./ -R !{rawdata_location} --sample-sheet !{SampleSheet} -l DEBUG"
+		
+	"""
 }
-
-
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index a3fdef5..75e56eb 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -1,155 +1,155 @@
-/*
- *	Module pour l'alignement des reads ADN sur gÃ©nome de rÃ©fÃ©rence et des statistiques associÃ©es
-*/
-
-process BWA_ALIGNMENT { BWA_ALIGNMENT
-	publishDir path: "${params.outdir}/alignment/bwa" , mode: 'copy'
-	
-	tag " $sample"
-	
-	input:
-		tuple val(sample), path(reads)
-		
-	output:
-		tuple val(sample), path("*.log"), emit: log
-		tuple val(sample), path("*.sam"), emit: sam
-		
-	script:
-	"""
-		module list
-		bwa mem ${params.referenceGenome} ${reads} 1> ${sample}.sam 2> ${sample}.log
-	"""
-}
-
-process SAMTOOLS_VIEW { 
-	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
-	
-	tag "$sample"
-	
-	label 'samtools'
-	
-	input:
-		tuple val(sample), path(sam)
-		
-	output:
-		tuple val(sample), path("*.bam"), emit: bam
-		
-	script:
-	"""
-		samtools view -bS ${sam} > ${sample}.bam
-	"""
-}
-
-process SAMTOOLS_SORT {
-	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
-
-	tag "$sample"
-	
-	label 'samtools'
-
-	input:
-		tuple val(sample), path(bam)
-
-	output:
-		tuple val(sample), path("*.log"), emit: log
-		tuple val(sample), path("*.bam"), emit: bam
-		//path("*.bam"), emit: bam
-
-	script:	// Pourquoi unmerged ??? https://forgemia.inra.fr/genotoul-bioinfo/ng6/-/blob/master/workflows/components/bwa.py#L97
-	"""	
-		samtools sort ${bam} -o ${sample}_unmerged.bam 2>> ${sample}.log
-	"""
-}
-
-process QUALIMAP {
-	publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy'
-
-	tag "$sample"
-
-	label 'qualimap'
-	
-	errorStrategy = { 'ignore' }
-
-	input:
-		tuple val(sample), path(bam)
-
-	output:
-		tuple val(sample), path("*.log"), emit: log
-		tuple val(sample), path("*/*"), emit: all	// ${sample}_stats/*
-		tuple val(sample), path("${sample}"), emit: report
-
-	script:
-	"""
-		qualimap bamqc -bam ${bam} -outdir ${sample} 1> ${sample}.log
-	"""
-}
-
-/*
-process alignmentQualityStats {
-	publishDir path: "${params.outdir}/alignmentStats/cigar" , mode: 'copy'
-	
-	label 'cigar'
-
-	input:
-		tuple val(sample), path(bam)
-
-	output:
-		tuple val(sample), path("*.log"), emit: log
-		tuple val(sample), path("*.csv"), emit: csv
-		tuple val(sample), path("*.png"), emit: graph
-
-	script:
-	cigarOptions = params.splitReads ? "--readsplit" : ""
-	
-	if (params.pairedEnd) {
-		"""
-			python
-			samtools view -F0x0100 ${bam} | cigarlineGraph.py -i - -t ${sample}_R1.csv ${sample}_R2.csv -o ${sample}_R1.png ${sample}_R2.png ${cigarOptions} 2> ${sample}.log
-		"""
-	} else {
-		"""
-			samtools view -F0x0100 ${bam} | cigarlineGraph.py -i - -t ${sample}_R1.csv ${cigarOptions} 2> ${sample}.log
-		"""
-	}
-}
-
-process alignmentSummary {
-	publishDir path: "${params.outdir}/alignmentStats/summary" , mode: 'copy'
-
-	label 'samtools'
-
-	input:
-		tuple val(sample), path(bam)
-
-	output:
-		tuple val(sample), path("*.stat"), emit: stat
-
-	script:
-	"""
-		samtools view -F0x0100 -bh ${bam} | samtools flagstat - > ${sample}.stat
-	"""
-}
-
-process readAlignementSummary  {	// addTreatment
-	publishDir path: "${params.outdir}/alignmentStats/summary" , mode: 'copy'
-
-	input:
-		tuple val(sample), path(statFile)
-
-	output:
-		tuple val(sample), path("*.log"), emit: log
-
-	script:
-	"""
-		alignementStatTreatment.pl --file ${statFile} 1> ${sample}.log
-	"""
-
-
-}
-
-		//alignmentQualityStats(samtoolsSort.out.bam)
-		//alignmentSummary(samtoolsSort.out.bam)
-		//readAlignementSummary(alignmentSummary.out.stat)
-		
-
+/*
+ *	Module pour l'alignement des reads ADN sur gÃ©nome de rÃ©fÃ©rence et des statistiques associÃ©es
+*/
+
+process BWA_ALIGNMENT { BWA_ALIGNMENT
+	publishDir path: "${params.outdir}/alignment/bwa" , mode: 'copy'
+	
+	tag " $sample"
+	
+	input:
+		tuple val(sample), path(reads)
+		
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.sam"), emit: sam
+		
+	script:
+	"""
+		module list
+		bwa mem ${params.referenceGenome} ${reads} 1> ${sample}.sam 2> ${sample}.log
+	"""
+}
+
+process SAMTOOLS_VIEW { 
+	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
+	
+	tag "$sample"
+	
+	label 'samtools'
+	
+	input:
+		tuple val(sample), path(sam)
+		
+	output:
+		tuple val(sample), path("*.bam"), emit: bam
+		
+	script:
+	"""
+		samtools view -bS ${sam} > ${sample}.bam
+	"""
+}
+
+process SAMTOOLS_SORT {
+	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
+
+	tag "$sample"
+	
+	label 'samtools'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.bam"), emit: bam
+		//path("*.bam"), emit: bam
+
+	script:	// Pourquoi unmerged ??? https://forgemia.inra.fr/genotoul-bioinfo/ng6/-/blob/master/workflows/components/bwa.py#L97
+	"""	
+		samtools sort ${bam} -o ${sample}_unmerged.bam 2>> ${sample}.log
+	"""
+}
+
+process QUALIMAP {
+	publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy'
+
+	tag "$sample"
+
+	label 'qualimap'
+	
+	errorStrategy = { 'ignore' }
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*/*"), emit: all	// ${sample}_stats/*
+		tuple val(sample), path("${sample}"), emit: report
+
+	script:
+	"""
+		qualimap bamqc -bam ${bam} -outdir ${sample} 1> ${sample}.log
+	"""
+}
+
+/*
+process alignmentQualityStats {
+	publishDir path: "${params.outdir}/alignmentStats/cigar" , mode: 'copy'
+	
+	label 'cigar'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.csv"), emit: csv
+		tuple val(sample), path("*.png"), emit: graph
+
+	script:
+	cigarOptions = params.splitReads ? "--readsplit" : ""
+	
+	if (params.pairedEnd) {
+		"""
+			python
+			samtools view -F0x0100 ${bam} | cigarlineGraph.py -i - -t ${sample}_R1.csv ${sample}_R2.csv -o ${sample}_R1.png ${sample}_R2.png ${cigarOptions} 2> ${sample}.log
+		"""
+	} else {
+		"""
+			samtools view -F0x0100 ${bam} | cigarlineGraph.py -i - -t ${sample}_R1.csv ${cigarOptions} 2> ${sample}.log
+		"""
+	}
+}
+
+process alignmentSummary {
+	publishDir path: "${params.outdir}/alignmentStats/summary" , mode: 'copy'
+
+	label 'samtools'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.stat"), emit: stat
+
+	script:
+	"""
+		samtools view -F0x0100 -bh ${bam} | samtools flagstat - > ${sample}.stat
+	"""
+}
+
+process readAlignementSummary  {	// addTreatment
+	publishDir path: "${params.outdir}/alignmentStats/summary" , mode: 'copy'
+
+	input:
+		tuple val(sample), path(statFile)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+
+	script:
+	"""
+		alignementStatTreatment.pl --file ${statFile} 1> ${sample}.log
+	"""
+
+
+}
+
+		//alignmentQualityStats(samtoolsSort.out.bam)
+		//alignmentSummary(samtoolsSort.out.bam)
+		//readAlignementSummary(alignmentSummary.out.stat)
+		
+
 */
\ No newline at end of file
diff --git a/modules/local/module_reports.nf b/modules/local/module_reports.nf
index 7581ea5..e6887d0 100644
--- a/modules/local/module_reports.nf
+++ b/modules/local/module_reports.nf
@@ -1,56 +1,56 @@
-/*
- *	Module pour la gÃ©nÃ©ration de rapports
-*/
-
-summary = [:]
-
-process workflow_summary {
-	publishDir path: "${params.outdir}/Reports" , mode: 'copy'
-	
-	output:
-	file 'workflow_summary_mqc.yaml'
-  
-	exec:
-	def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
-	yaml_file.text  = """
-  id: 'summary'
-  description: " - this information is collected when the pipeline is started."
-  section_name: 'Workflow Summary'
-  section_href: "${workflow.manifest.homePage}"
-  plot_type: 'html'
-  data: |
-        <dl class=\"dl-horizontal\">
-  ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
-        </dl>
-  """.stripIndent()
- }
- 
- 
- workflow summary {
-	 take:
-	 	summary
-		 
-	main:
-		workflow_summary(summary)
-	 
- }
-
- 
-process MULTIQC {
-	publishDir path: "${params.outdir}/MultiQC" , mode: 'copy'
-	
-	module '/tools/share/Modules/bioinfo/MultiQC-v1.11'
-	
-	input:
-		path fastqc
-		path fastqscreen
-		path qualimap
-		
-	output:
-		path "*.html", emit: html
-	
-	script:
-	"""
-		multiqc -f . --config $baseDir/assets/multiqc_config.yaml --title ${params.project}
-	"""	
+/*
+ *	Module pour la gÃ©nÃ©ration de rapports
+*/
+
+summary = [:]
+
+process workflow_summary {
+	publishDir path: "${params.outdir}/Reports" , mode: 'copy'
+	
+	output:
+	file 'workflow_summary_mqc.yaml'
+  
+	exec:
+	def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
+	yaml_file.text  = """
+  id: 'summary'
+  description: " - this information is collected when the pipeline is started."
+  section_name: 'Workflow Summary'
+  section_href: "${workflow.manifest.homePage}"
+  plot_type: 'html'
+  data: |
+        <dl class=\"dl-horizontal\">
+  ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
+        </dl>
+  """.stripIndent()
+ }
+ 
+ 
+ workflow summary {
+	 take:
+	 	summary
+		 
+	main:
+		workflow_summary(summary)
+	 
+ }
+
+ 
+process MULTIQC {
+	publishDir path: "${params.outdir}/MultiQC" , mode: 'copy'
+	
+	module '/tools/share/Modules/bioinfo/MultiQC-v1.11'
+	
+	input:
+		path fastqc
+		path fastqscreen
+		path qualimap
+		
+	output:
+		path "*.html", emit: html
+	
+	script:
+	"""
+		multiqc -f . --config $baseDir/assets/multiqc_config.yaml --title ${params.project}
+	"""	
 }
\ No newline at end of file
diff --git a/modules/local/module_test.nf b/modules/local/module_test.nf
index 26f01c6..a15894d 100644
--- a/modules/local/module_test.nf
+++ b/modules/local/module_test.nf
@@ -1,18 +1,17 @@
-process bar {
-	publishDir path: "/home/sbsuser/work/Nextflow/wf-illumina-nf/results" , mode: 'copy'
-	
-	input:
-	  	path x
-		path y
-	  
-	output:
-	  	path 'bar.txt', emit: fichier_de_sortie
-		// path 'foo.txt', emit: other_file  
-	  
-	script:
-	"""
-		(cat $x; head $y ) > bar.txt
-    """
-}
-
-
+process bar {
+	publishDir path: "/home/sbsuser/work/Nextflow/wf-illumina-nf/results" , mode: 'copy'
+
+	input:
+		path x
+		path y
+
+	output:
+		path 'bar.txt', emit: fichier_de_sortie
+		// path 'foo.txt', emit: other_file
+
+	script:
+	"""
+		(cat $x; head $y ) > bar.txt
+    """
+}
+
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index edfb190..958d444 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -1,25 +1,25 @@
-// -------------------------------------------------
-// 					MODULES
-// -------------------------------------------------
-include { 	BWA_ALIGNMENT;
-			SAMTOOLS_VIEW;
-			SAMTOOLS_SORT;
-			QUALIMAP		} from "$baseDir/modules/local/module_dna.nf"
-
-
-// -------------------------------------------------
-// 					WORKFLOW
-// -------------------------------------------------
-workflow DNA_QC {
-	take:
-		fastq
-			
-	main:
-		BWA_ALIGNMENT(fastq)
-		SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam)
-		SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam)
-		QUALIMAP(SAMTOOLS_SORT.out.bam)
-		
-	emit:
-		qualimap_report = QUALIMAP.out.report
+// -------------------------------------------------
+// 					MODULES
+// -------------------------------------------------
+include { 	BWA_ALIGNMENT;
+			SAMTOOLS_VIEW;
+			SAMTOOLS_SORT;
+			QUALIMAP		} from "$baseDir/modules/local/module_dna.nf"
+
+
+// -------------------------------------------------
+// 					WORKFLOW
+// -------------------------------------------------
+workflow DNA_QC {
+	take:
+		fastq
+
+	main:
+		BWA_ALIGNMENT(fastq)
+		SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam)
+		SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam)
+		QUALIMAP(SAMTOOLS_SORT.out.bam)
+
+	emit:
+		qualimap_report = QUALIMAP.out.report
 }
\ No newline at end of file
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index e600ef1..1df8626 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -1,189 +1,186 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-def helpMessage() {
-    log.info"""
-
-    Usage:
-
-    The typical command for running the pipeline is as follows:
-
-    nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
-
-    Mandatory arguments:
-      --inputdir                    Path to input directory 
-      -profile                      Configuration profile to use. Can use multiple (comma separated)
-                                    Available: conda, docker, singularity, path, genotoul, test and more.
-
-    Options:
-      --samplesheet                 Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
-      --contaminant                 Name of iGenomes // To be discussed ????
-      --outdir                      The output directory where the results will be saved
-      --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
-      --email_on_fail               Same as --email, except only send mail if the workflow is not successful
-      --maxMultiqcEmailFileSize     Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
-      
-      -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
-
-
-    =======================================================
-    Available profiles
-    -profile test                    Run the test dataset
-    -profile conda                   Build a new conda environment before running the pipeline. Use `--condaCacheDir` to define the conda cache path
-    -profile path                    Use the installation path defined for all tools. Use `--globalPath` to define the installation path
-    -profile docker                  Use the Docker images for each process
-    -profile singularity             Use the singularity images for each process
-    -profile genologin               Run the workflow on the cluster, instead of locally
-
-    """.stripIndent()
-}
-
-// Show help message
-if (params.help) {
-    helpMessage()
-    exit 0
-}
-
-// -------------------------------------------------
-// 					PARAMS
-// -------------------------------------------------
-/*params.sequencer = 'NovaSeq'
-//params.raw_data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
-//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-
-
-
-
-//my_data_miseq=Channel.fromPath('./data_test/20210713_MISEQ_7_BULKDEMUX_JRCVF.csv')
-//my_data_novaseq=Channel.fromPath('./data_test/20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv')
-
-
-//ch_ss=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/PipelineLogs_Lane1/20210713_MISEQ_7_IEM_JRCVF_Lane1.csv')
-//ch_ngl=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunNGL-Bi.created')
-//ch_runInfo=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunInfo.xml')
-//ch_ss=Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
-
-*/
-
-// ------------- Test 10x ------------ //
-/*
-params.sequencer = 'NovaSeq'
-params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'	// In config file
-params.raw_data = ''
-params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-params.isMultiplex = true
-params.chemistry = '10X'
-ch_ss = Channel.fromPath(params.data+'/SampleSheet_global.csv')
-*/
-
-// ------------- Test MiSeq ------------ //
-/*
-params.sequencer = 'MiSeq'
-//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/211022_M01945_0364_000000000-DB246_rnaseq'	// In config file
-params.raw_data = ''
-params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/211022_M01945_0364_000000000-DB246_rnaseq'
-params.isMultiplex = true
-params.chemistry = 'amplicon'
-*/
-
-/*
-//ch_ss = Channel.fromPath(params.data+'/SampleSheet.csv')
-ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
-ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
-ch_read=Channel
-	.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
-	//.fromPath(params.data+'/ROME/B20CG-*_R{1,2}_*.fastq.gz')
-	.map{$it -> [$it.simpleName, $it]}
-	.groupTuple()
-*/
-
-// ------------- Test Amplicon ------------ //
-params.sequencer = 'MiSeq'
-//params.outdir = ''	// In config file
-params.raw_data = ''
-//params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/211129_A00318_0259_AHNMTTDSX2_Lane1_1638345606_dna'
-//params.isMultiplex = true
-//params.chemistry = 'Default'
-ch_ss = Channel.fromPath(params.samplesheet)		// utilitÃ© d'aprÃ¨s la SS dans un params ??
-ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt")
-ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml')
-//params.pairedEnd = true
-//params.splitReads = true	// ????
-//params.referenceGenome = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Quercus_robur/genome/GCA_900291515.1/BWA/GCA_900291515.1_Q_robur_v1_genomic.fna'
-ch_read=Channel
-	.fromPath(params.data+'/*_R{1,2}_*.fastq.gz')
-	.map{$it -> [$it.simpleName, $it]}
-	//.fromFilePairs(params.data+'/*_R{1,2}_*.fastq.gz')
-	//.groupTuple()
-
-
-mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
-
-banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
-
-System.out.println "On y est presque..."
-createDir = file(params.outdir).mkdir()
-
-// -------------------------------------------------
-// 					INCLUDES
-// -------------------------------------------------
-// Mettre ca dans des fichiers de config ??
-/*
-if DNA {
-	include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
-}
-if RNA {
-	include { rna_qc as QC } from '../sub-workflows/local/rna_qc.nf'
-}
-if amplicon {
-	if taille_insert dans itervalle {
-		include { diversity_qc as QC } from '../sub-workflows/local/diversity_qc.nf'
-	} else {
-		include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
-	}
-}
-*/
-include { Core as CORE	} from "$baseDir/sub-workflows/local/core_pipeline.nf"
-include { DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
-include { MULTIQC		} from "$baseDir/modules/local/module_reports.nf"
-System.out.println "Tous les includes : OK"
-// -------------------------------------------------
-// 					WORKFLOW
-// -------------------------------------------------
-workflow ILLUMINA_QC {
-
-	CORE(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta )		/*ch_ngl, ch_runInfo, mismatchNumber,  params.raw_data*/
-	
-	
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+def helpMessage() {
+    log.info"""
+
+    Usage:
+
+    The typical command for running the pipeline is as follows:
+
+    nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
+
+    Mandatory arguments:
+      --inputdir                    Path to input directory
+      -profile                      Configuration profile to use. Can use multiple (comma separated)
+                                    Available: conda, docker, singularity, path, genotoul, test and more.
+
+    Options:
+      --samplesheet                 Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
+      --contaminant                 Name of iGenomes // To be discussed ????
+      --outdir                      The output directory where the results will be saved
+      --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
+      --email_on_fail               Same as --email, except only send mail if the workflow is not successful
+      --maxMultiqcEmailFileSize     Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
+      -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
+
+
+    =======================================================
+    Available profiles
+    -profile test                    Run the test dataset
+    -profile conda                   Build a new conda environment before running the pipeline. Use `--condaCacheDir` to define the conda cache path
+    -profile path                    Use the installation path defined for all tools. Use `--globalPath` to define the installation path
+    -profile docker                  Use the Docker images for each process
+    -profile singularity             Use the singularity images for each process
+    -profile genologin               Run the workflow on the cluster, instead of locally
+
+    """.stripIndent()
+}
+
+// Show help message
+if (params.help) {
+    helpMessage()
+    exit 0
+}
+
+// -------------------------------------------------
+// 					PARAMS
+// -------------------------------------------------
+/*params.sequencer = 'NovaSeq'
+//params.raw_data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
+//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
+
+
+
+
+//my_data_miseq=Channel.fromPath('./data_test/20210713_MISEQ_7_BULKDEMUX_JRCVF.csv')
+//my_data_novaseq=Channel.fromPath('./data_test/20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv')
+
+
+//ch_ss=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/PipelineLogs_Lane1/20210713_MISEQ_7_IEM_JRCVF_Lane1.csv')
+//ch_ngl=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunNGL-Bi.created')
+//ch_runInfo=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunInfo.xml')
+//ch_ss=Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
+
+*/
+
+// ------------- Test 10x ------------ //
+/*
+params.sequencer = 'NovaSeq'
+params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'	// In config file
+params.raw_data = ''
+params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
+params.isMultiplex = true
+params.chemistry = '10X'
+ch_ss = Channel.fromPath(params.data+'/SampleSheet_global.csv')
+*/
+
+// ------------- Test MiSeq ------------ //
+/*
+params.sequencer = 'MiSeq'
+//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/211022_M01945_0364_000000000-DB246_rnaseq'	// In config file
+params.raw_data = ''
+params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/211022_M01945_0364_000000000-DB246_rnaseq'
+params.isMultiplex = true
+params.chemistry = 'amplicon'
+*/
+
+/*
+//ch_ss = Channel.fromPath(params.data+'/SampleSheet.csv')
+ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
+ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
+ch_read=Channel
+	.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
+	//.fromPath(params.data+'/ROME/B20CG-*_R{1,2}_*.fastq.gz')
+	.map{$it -> [$it.simpleName, $it]}
+	.groupTuple()
+*/
+
+// ------------- Test Amplicon ------------ //
+params.sequencer = 'MiSeq'
+//params.outdir = ''	// In config file
+params.raw_data = ''
+//params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/211129_A00318_0259_AHNMTTDSX2_Lane1_1638345606_dna'
+//params.isMultiplex = true
+//params.chemistry = 'Default'
+ch_ss = Channel.fromPath(params.samplesheet)		// utilitÃ© d'aprÃ¨s la SS dans un params ??
+ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt")
+ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml')
+//params.pairedEnd = true
+//params.splitReads = true	// ????
+//params.referenceGenome = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Quercus_robur/genome/GCA_900291515.1/BWA/GCA_900291515.1_Q_robur_v1_genomic.fna'
+ch_read=Channel
+	.fromPath(params.data+'/*_R{1,2}_*.fastq.gz')
+	.map{$it -> [$it.simpleName, $it]}
+	//.fromFilePairs(params.data+'/*_R{1,2}_*.fastq.gz')
+	//.groupTuple()
+
+
+mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
+
+banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
+
+System.out.println "On y est presque..."
+createDir = file(params.outdir).mkdir()
+
+// -------------------------------------------------
+// 					INCLUDES
+// -------------------------------------------------
+// Mettre ca dans des fichiers de config ??
+/*
+if DNA {
+	include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
+}
+if RNA {
+	include { rna_qc as QC } from '../sub-workflows/local/rna_qc.nf'
+}
+if amplicon {
+	if taille_insert dans itervalle {
+		include { diversity_qc as QC } from '../sub-workflows/local/diversity_qc.nf'
+	} else {
+		include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
+	}
+}
+*/
+include { Core as CORE	} from "$baseDir/sub-workflows/local/core_pipeline.nf"
+include { DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
+include { MULTIQC		} from "$baseDir/modules/local/module_reports.nf"
+System.out.println "Tous les includes : OK"
+// -------------------------------------------------
+// 					WORKFLOW
+// -------------------------------------------------
+workflow ILLUMINA_QC {
+
+	CORE(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta )		/*ch_ngl, ch_runInfo, mismatchNumber,  params.raw_data*/
+
+
 	if (params.chemistry == 'Default') {
 		DNA_QC(ch_read)
 	} else {
 		System.out.println "Pas de sous-workflow DNA_QC()"
 	  }
 
-
-	// MultiQC
-	MULTIQC(CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]),
-			CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]),
-			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([])
-	)
-	
-	/*	
-		if overlap, alors : 
-			diversity_qc sub-workflow
-			
-		else : 
-			if DNA, alors :
-				dna_qc sub-worflow
-			if RNA, alors :
-				rna_qc sub-workflow
-			if Methyl, alors :
-				methyl_qc sub-worflow
-		
-	*/
-
-}
-
-
-
+
+	// MultiQC
+	MULTIQC(CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]),
+			CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]),
+			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([])
+	)
+	/*
+		if overlap, alors :
+			diversity_qc sub-workflow
+
+		else :
+			if DNA, alors :
+				dna_qc sub-worflow
+			if RNA, alors :
+				rna_qc sub-workflow
+			if Methyl, alors :
+				methyl_qc sub-worflow
+	*/
+
+}
+
+
+
-- 
GitLab


From 09ad59483eb24a4d032464b8c8b21cbde21c38c1 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 12 Jul 2022 16:45:52 +0200
Subject: [PATCH 28/51] Simplifies code readability for demultiplexStat steps

---
 sub-workflows/local/core_pipeline.nf | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 3b8967a..8c32c27 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -64,16 +64,6 @@ workflow Demultiplexage {
 		bcl2fastq(SampleSheet,maskMaker.out,mismatchNumber,rawdata_location)
 }
 
-workflow DemuxStat_10x {
-	take:
-		SampleSheet
-		DemuxStatXML
-		DemuxSummary
-		
-	main:
-		extractInfoForDemuxStats(SampleSheet)
-		demultiplexStats(DemuxStatXML, extractInfoForDemuxStats.out, DemuxSummary)
-}
 
 /*
 workflow Search_conta {
@@ -119,11 +109,8 @@ workflow Core {
 		//Demultiplexage(ch_sampleSheet, ch_RunInfoXML, mismatchNumber, rawdata_location)	// A voir plus tard !
 		
 		// ----------- DemultiplexStat
-		if (params.chemistry == '10X') {
-			DemuxStat_10x(ch_sampleSheet, ch_DemuxStatXML, ch_DemuxSummary)
-		} else {
-			System.out.println "Les donnÃ©es ne sont pas 10X !"
-		}
+		extractInfoForDemuxStats(ch_sampleSheet)
+		demultiplexStats(ch_DemuxStatXML, extractInfoForDemuxStats.out, ch_DemuxSummary)
 		
 		// ----------- Illumina Filter  // ou SubsetSeqFiles : dans quel cas on fait l'un ou l'autre ????
 		if (params.sequencer == 'NovaSeq' & params.isMultiplex) {
-- 
GitLab


From de29f352834beaa2c598e8612be0d410846b8b56 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 13 Jul 2022 11:10:31 +0200
Subject: [PATCH 29/51] Change path to new source location for VS

---
 conf/test.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test.config b/conf/test.config
index 6f51d0e..fa614b4 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -4,7 +4,7 @@
 process {
 	withLabel: ngl_bi {
 		executor = 'local'
-		beforeScript = "export NGL_BI_CLIENT='/work/sbsuser/test/jules/ngl-bi_client'"	// test
+		beforeScript = "export NGL_BI_CLIENT='/work/sbsuser/test/jules/VisualStudioSources/ngl-bi_client'"	// test
 		//errorStrategy = { 'ignore' }
 	}
 	
-- 
GitLab


From c1d2bd02d12f51986b8e1f1e27a1624b732fdef9 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 13 Jul 2022 11:25:23 +0200
Subject: [PATCH 30/51] Using of absolute pahts instead of relative ones

	reference : #16
---
 sub-workflows/local/core_pipeline.nf | 4 ++--
 workflow/illumina_qc.nf              | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 8c32c27..80e5ee8 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -12,14 +12,14 @@ include {
 	//search_conta_samtools as filter;
 	//search_conta_summary as summary;
 	FASTQSCREEN;
-} from '../../modules/local/module_core.nf'
+} from "$baseDir/modules/local/module_core.nf"
 
 
 include {
 	prepareReadSetCreation;
 	readsetNGLBiCreation as readsetCreation;
 	checkErrorFromNGLBi as checkError;
-} from '../../modules/local/module_NGL-Bi.nf'
+} from "$baseDir/modules/local/module_NGL-Bi.nf"
 
 //-------------------------------------------------
 
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 1df8626..b65350a 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -130,16 +130,16 @@ createDir = file(params.outdir).mkdir()
 // Mettre ca dans des fichiers de config ??
 /*
 if DNA {
-	include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
+	include { dna_qc as QC } from "$baseDir/sub-workflows/local/dna_qc.nf"
 }
 if RNA {
-	include { rna_qc as QC } from '../sub-workflows/local/rna_qc.nf'
+	include { rna_qc as QC } from "$baseDir/sub-workflows/local/rna_qc.nf"
 }
 if amplicon {
 	if taille_insert dans itervalle {
-		include { diversity_qc as QC } from '../sub-workflows/local/diversity_qc.nf'
+		include { diversity_qc as QC } from "$baseDir/sub-workflows/local/diversity_qc.nf"
 	} else {
-		include { dna_qc as QC } from '../sub-workflows/local/dna_qc.nf'
+		include { dna_qc as QC } from "$baseDir/sub-workflows/local/dna_qc.nf"
 	}
 }
 */
-- 
GitLab


From 410a6fc5e3ed7795226290322fc77326752fc9d3 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 11:33:05 +0100
Subject: [PATCH 31/51] Add shared_modules

	And remove one useless file
	Ref : #26
---
 conf/base.config                     | 94 +++++++++++++++++++---------
 modules/local/module_reports.nf      | 56 -----------------
 sub-workflows/local/core_pipeline.nf |  2 +
 workflow/illumina_qc.nf              |  6 +-
 4 files changed, 70 insertions(+), 88 deletions(-)
 delete mode 100644 modules/local/module_reports.nf

diff --git a/conf/base.config b/conf/base.config
index 55b7046..76dd352 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -5,45 +5,33 @@ System.out.println "Chargement des paramÃ¨tres de base"
 // Fixed params
 params {
 	// EMPTY INITIALISATION OF INPUT PARAMS
+	referenceGenome = ''
 	inputdir = ""
-	outdir = ""			// base output directory for all analysis
-	//outdir="/home/sbsuser/work/Nextflow/wf-illumina-nf/results"	// base output directory for all analysis
+	outdir = "./"			// base output directory for all analysis
 }
 
 import java.text.SimpleDateFormat
 SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyMMddHHmmss")
 
-System.out.println "Lecture de la configuration de run"
+System.out.println "Lecture du fichier de configuration du run : $launchDir/../params.config"
 includeConfig "$launchDir/../params.config"
-System.out.println "Lecture de la configuration de run terminÃ©e !"
+
 // Dynamic params
 params {
-	// Extract run info
-	/*runName=params.inputdir.split('/')[-1]
-	machine=params.inputdir.split('/')[-2]
-	runInfo=runName.split('_')
-	run_date=runInfo[0]
-	machineID=runInfo[1]
-	fcID=runInfo[3]
-	lane=runInfo[4]
-	demuxUniqueness=runInfo[5]*/
-	//-----------------------
-	
-	uniqueness = uniqueness_format.format(new Date())
-	outdir=params.inputdir+"/nextflow/"+uniqueness
+	nf_uniqueness = uniqueness_format.format(new Date())
+	outdir= params.inputdir + "/nextflow/" + nf_uniqueness
 
-	//samplesheet="${run_date}*.csv"
-	
+	System.out.println ""
 	System.out.println "runName : "+runName
-	System.out.println "machine : "+machine
+	System.out.println "data : "+dataNature
+	System.out.println "sequencer : "+sequencer
 	System.out.println "machineID : "+machineID
 	System.out.println "run_date : "+run_date
 	System.out.println "fcID : "+fcID
 	System.out.println "lane : "+lane
 	System.out.println "demuxUniqueness : "+demuxUniqueness
-	
-	System.out.println "uniqueness : "+uniqueness
 	System.out.println "outdir : "+outdir
+	System.out.println ""
 }
 
 // ========================================
@@ -64,8 +52,7 @@ process {
 	withName: BWA_ALIGNMENT {
 		module = ['bioinfo/bwa-0.7.17']
 	}
-	
-	
+
 	// ----- WithLabel
 	withLabel: littleJob {
 		executor = 'local'
@@ -73,9 +60,6 @@ process {
 	
 	withLabel: samtools {
 		module = ['bioinfo/samtools-1.14']
-		//cpus = { 6 * task.attempt }
-	    //memory = { 8.GB * task.attempt }
-	    //time = { 3.h * task.attempt }
 	}
 	
 	withLabel: cigar {
@@ -85,8 +69,58 @@ process {
 	withLabel: qualimap {
 		module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20']
 		beforeScript='unset DISPLAY'
-		//cpus = { 8 * task.attempt }
-	    //memory = { 2.GB * task.attempt }
-	    //time = { 3.h * task.attempt }
 	}
+}
+
+// ========================================
+//			   SHARED MODULES
+//=========================================
+params.shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules'
+
+process {
+    withName: GZIP {
+		ext.args = '-f'
+        publishDir = [
+            path: { "${params.outdir}/archives" },
+            mode: 'symlink',
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: "*.gz"
+        ]
+	}
+
+    withName: GUNZIP {
+		ext.args = [
+			'-f'
+		].join(' ')
+	}
+
+	withName: SEQTK_SAMPLE {
+        ext.args = '-s100'
+        ext.args2 = 100000
+
+        module = 'bioinfo/seqtk-1.3'
+
+        publishDir = [
+            path: { "${params.outdir}/subset" },
+            mode: 'symlink',
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: "*.fast{a,q}"
+        ]
+    }
+
+	withName: MULTIQC {
+        ext.args = [
+            "--config ${baseDir}/assets/multiqc_config.yaml",
+            params.project ? "--title '${params.project}'" : ''
+        ].join(' ')
+
+		module = '/tools/share/Modules/bioinfo/MultiQC-v1.11'
+
+		publishDir = [
+            path: { "${params.outdir}/MultiQC" },
+            mode: 'copy',
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: "*.html"
+        ]
+    }
 }
\ No newline at end of file
diff --git a/modules/local/module_reports.nf b/modules/local/module_reports.nf
deleted file mode 100644
index e6887d0..0000000
--- a/modules/local/module_reports.nf
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *	Module pour la gÃ©nÃ©ration de rapports
-*/
-
-summary = [:]
-
-process workflow_summary {
-	publishDir path: "${params.outdir}/Reports" , mode: 'copy'
-	
-	output:
-	file 'workflow_summary_mqc.yaml'
-  
-	exec:
-	def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
-	yaml_file.text  = """
-  id: 'summary'
-  description: " - this information is collected when the pipeline is started."
-  section_name: 'Workflow Summary'
-  section_href: "${workflow.manifest.homePage}"
-  plot_type: 'html'
-  data: |
-        <dl class=\"dl-horizontal\">
-  ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
-        </dl>
-  """.stripIndent()
- }
- 
- 
- workflow summary {
-	 take:
-	 	summary
-		 
-	main:
-		workflow_summary(summary)
-	 
- }
-
- 
-process MULTIQC {
-	publishDir path: "${params.outdir}/MultiQC" , mode: 'copy'
-	
-	module '/tools/share/Modules/bioinfo/MultiQC-v1.11'
-	
-	input:
-		path fastqc
-		path fastqscreen
-		path qualimap
-		
-	output:
-		path "*.html", emit: html
-	
-	script:
-	"""
-		multiqc -f . --config $baseDir/assets/multiqc_config.yaml --title ${params.project}
-	"""	
-}
\ No newline at end of file
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 80e5ee8..77f6b00 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -21,6 +21,8 @@ include {
 	checkErrorFromNGLBi as checkError;
 } from "$baseDir/modules/local/module_NGL-Bi.nf"
 
+include { GUNZIP	} from "${params.shared_modules}/gzip.nf"
+include { SEQTK_SAMPLE } from "${params.shared_modules}/seqtk.nf"
 //-------------------------------------------------
 
 inNGL=true
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index b65350a..d058dad 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -145,8 +145,10 @@ if amplicon {
 */
 include { Core as CORE	} from "$baseDir/sub-workflows/local/core_pipeline.nf"
 include { DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
-include { MULTIQC		} from "$baseDir/modules/local/module_reports.nf"
-System.out.println "Tous les includes : OK"
+//include { MULTIQC		} from "$baseDir/modules/local/module_reports.nf"
+include { MULTIQC		} from "${params.shared_modules}/multiqc.nf"
+include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf"
+
 // -------------------------------------------------
 // 					WORKFLOW
 // -------------------------------------------------
-- 
GitLab


From f1b6e9276e9122dd9549b63ae1ceb71e0695da90 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 11:43:38 +0100
Subject: [PATCH 32/51] Add fastp to esitmate duplicated reads

	Ref : #21
---
 assets/multiqc_config.yaml           |  5 +++++
 modules/local/module_core.nf         | 30 ++++++++++++++++++++++++++--
 sub-workflows/local/core_pipeline.nf | 14 ++++++++++---
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index f894b64..fc109cd 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -42,6 +42,11 @@ module_order:
         #info: "Analysis performed with QualiMap"
         href: "http://qualimap.bioinfo.cipf.es/"
         target: "QualiMap"
+  - samtools:
+  - fastp:
+        name: "Duplicats"
+        href: "https://github.com/OpenGene/fastp"
+        target: "Fastp"
   - fastq_screen:
         name: "ContaminationSearch"
         #info: "This section shows the module with different files"
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 6ec5bc9..cf7e9f0 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -198,6 +198,34 @@ process FASTQSCREEN {
 	"""
 }
 
+process DUPLICATED_READS {
+	
+	tag "$sample"
+
+	input:
+		tuple val(sample), path(fastq)
+
+	output:
+		tuple val(sample), path("*.json"), emit: json
+		tuple val(sample), path("*.log")
+
+	shell:
+	R1_name=file(fastq[0]).simpleName
+	R2_name=file(fastq[1]).simpleName
+	'''
+		fastp \
+		-i !{fastq[0]} \
+		-o !{R1_name}_dedupl.fastq \
+		-I !{fastq[1]} \
+		-O !{R2_name}_dedupl.fastq \
+		--disable_adapter_trimming \
+		--disable_quality_filtering \
+		--disable_length_filtering \
+		--json !{R1_name}_fastp.json \
+		2> !{R1_name}.log
+	'''
+}
+
 
 /* --------------------------------------------------------------------
  * 								OLD PROCESS
@@ -220,8 +248,6 @@ process decoupageSS {
 	"""
 }
 
-
-
 process maskMaker {
 	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
 	
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 77f6b00..cc8ac60 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -12,9 +12,9 @@ include {
 	//search_conta_samtools as filter;
 	//search_conta_summary as summary;
 	FASTQSCREEN;
+	DUPLICATED_READS;
 } from "$baseDir/modules/local/module_core.nf"
 
-
 include {
 	prepareReadSetCreation;
 	readsetNGLBiCreation as readsetCreation;
@@ -129,8 +129,16 @@ workflow Core {
 		// ----------- ContaminationSearch
 		//Search_conta(ch_read_good, banksForConta)
 		FASTQSCREEN(ch_read_good)
+		DUPLICATED_READS(
+			SEQTK_SAMPLE.out
+				.collect{it[1]}
+				.flatten()
+				.map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] }
+				.groupTuple()
+		) // need fastq paired !!!
 		
 	emit:
-		fastqc_report = fastqc.out.report
-		fastqscreen_report = FASTQSCREEN.out.report
+		fastqc_report = fastqc.out.report ?: Channel.empty()
+		fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty()
+		fastp_report = DUPLICATED_READS.out.json
 }
-- 
GitLab


From 22fbb6759610cb36eed4dbbf0ff1fb8d5d550c47 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 11:48:48 +0100
Subject: [PATCH 33/51] Add samtools flagstat module

	Ref : #17
---
 assets/multiqc_config.yaml    |  1 +
 modules/local/module_dna.nf   | 20 ++++++++++++++++++++
 sub-workflows/local/dna_qc.nf | 26 ++++++++++++++++++++------
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index fc109cd..8a2b597 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -19,6 +19,7 @@ thousandsSep_format: " "
 extra_fn_clean_trim:
   - "_filtered"
   - "_unmerged"
+  - "_flagstat"
 
 ## Plot config
 export_plots: true
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 75e56eb..894b3be 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -61,6 +61,26 @@ process SAMTOOLS_SORT {
 	"""
 }
 
+process SAMTOOLS_FLAGSTATS {
+	publishDir path: "${params.outdir}/alignmentStats/samtools" , mode: 'copy'
+
+	tag "$sample"
+	
+	label 'samtools'
+
+	input:
+		tuple val(sample), path(bam)
+
+	output:
+		tuple val(sample), path("*.log"), emit: log
+		tuple val(sample), path("*.txt"), emit: txt
+
+	script:
+	"""
+		samtools flagstat ${bam} > ${sample}_flagstat.txt 2>> ${sample}.log
+	"""
+}
+
 process QUALIMAP {
 	publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy'
 
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index 958d444..2b0557c 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -4,7 +4,9 @@
 include { 	BWA_ALIGNMENT;
 			SAMTOOLS_VIEW;
 			SAMTOOLS_SORT;
-			QUALIMAP		} from "$baseDir/modules/local/module_dna.nf"
+			SAMTOOLS_FLAGSTATS;
+			QUALIMAP;
+} from "$baseDir/modules/local/module_dna.nf"
 
 
 // -------------------------------------------------
@@ -15,11 +17,23 @@ workflow DNA_QC {
 		fastq
 
 	main:
-		BWA_ALIGNMENT(fastq)
-		SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam)
-		SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam)
-		QUALIMAP(SAMTOOLS_SORT.out.bam)
+		if ( "$params.referenceGenome" != '' ) {
+			BWA_ALIGNMENT(fastq)
+			SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam)
+			SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam)
+			SAMTOOLS_FLAGSTATS(SAMTOOLS_VIEW.out.bam)
+			QUALIMAP(SAMTOOLS_SORT.out.bam)
+
+			qualimap_report_emitted =  QUALIMAP.out.report
+			flagstats_output_emitted = SAMTOOLS_FLAGSTATS.out.txt
+
+		} else {
+			// If Qualimap and Samtools were not executed
+			qualimap_report_emitted =  Channel.empty()
+			flagstats_output_emitted = Channel.empty()
+		}
 
 	emit:
-		qualimap_report = QUALIMAP.out.report
+		qualimap_report = qualimap_report_emitted
+		flagstats_output = flagstats_output_emitted
 }
\ No newline at end of file
-- 
GitLab


From c86df0f4580ad0aefef922100183a4994fe85c6d Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 11:53:45 +0100
Subject: [PATCH 34/51] Improve MultiQC calling

	Ref : # 21
---
 workflow/illumina_qc.nf | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index d058dad..256e725 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -163,11 +163,21 @@ workflow ILLUMINA_QC {
 		System.out.println "Pas de sous-workflow DNA_QC()"
 	  }
 
-
 	// MultiQC
-	MULTIQC(CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]),
+	if ( "$params.referenceGenome" != '' ) {
+		System.out.println "CrÃ©ation de Channels vides pour les process non exÃ©cutÃ©s."
+		DNA_QC.out.qualimap_report = Channel.empty()
+		DNA_QC.out.flagstats_output = Channel.empty()
+	}
+	
+	MULTIQC(WORKFLOW_SUMMARY.out.ifEmpty([])
+		.mix(
+			CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]),
 			CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]),
-			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([])
+			CORE.out.fastp_report.collect{it[1]}.ifEmpty([]),
+			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]),
+			DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([])
+		).collect()
 	)
 	/*
 		if overlap, alors :
@@ -182,7 +192,4 @@ workflow ILLUMINA_QC {
 				methyl_qc sub-worflow
 	*/
 
-}
-
-
-
+}
\ No newline at end of file
-- 
GitLab


From bf6f934467dc1ea59803fa0a34783181f2fb6202 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 11:55:00 +0100
Subject: [PATCH 35/51] Add fastp configuration

	Ref : # 21
---
 conf/base.config | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index 76dd352..2ce161c 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -53,6 +53,14 @@ process {
 		module = ['bioinfo/bwa-0.7.17']
 	}
 
+	withName: DUPLICATED_READS {
+		publishDir path: "${params.outdir}/Duplicats" , mode: 'copy', pattern: "*.log"
+		module = ['bioinfo/fastp-0.23.2']
+		time = { 5.h * task.attempt }
+		memory = { 3.GB * task.attempt }
+		cpus = { 3 * task.attempt }
+	}
+
 	// ----- WithLabel
 	withLabel: littleJob {
 		executor = 'local'
-- 
GitLab


From 4415e4f073d7b97ce47a455f79cdcb8dbd4033ad Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 11:58:07 +0100
Subject: [PATCH 36/51] Add Gunzip step

	Ref : #22
---
 sub-workflows/local/core_pipeline.nf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index cc8ac60..2020ecd 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -127,8 +127,11 @@ workflow Core {
 		fastqc(ch_read_good)
 		
 		// ----------- ContaminationSearch
-		//Search_conta(ch_read_good, banksForConta)
 		FASTQSCREEN(ch_read_good)
+
+		// ----------- Recherche Duplicats
+		GUNZIP(ch_read_good)
+		SEQTK_SAMPLE(GUNZIP.out)
 		DUPLICATED_READS(
 			SEQTK_SAMPLE.out
 				.collect{it[1]}
-- 
GitLab


From e90199f1ad1674a9ab9b680991be471d51e0230e Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 12:03:45 +0100
Subject: [PATCH 37/51] Increase time job

	Ref : #24
---
 conf/base.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index 2ce161c..ab1f058 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -100,6 +100,8 @@ process {
 		ext.args = [
 			'-f'
 		].join(' ')
+
+		time = { 2.h * task.attempt }
 	}
 
 	withName: SEQTK_SAMPLE {
-- 
GitLab


From 7155f4cea3c4166cb48efdff8bbd9c0fe8cb3dd5 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 12:12:26 +0100
Subject: [PATCH 38/51] Increase and clean params for fastqc

	Ref: #23
---
 conf/base.config                     | 21 +++++++++++++++++++++
 modules/local/module_core.nf         | 12 +-----------
 sub-workflows/local/core_pipeline.nf | 25 ++++++++++++++++---------
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index ab1f058..78238b1 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -61,6 +61,27 @@ process {
 		cpus = { 3 * task.attempt }
 	}
 
+	withName: FASTQC {
+		publishDir = [
+			path: "${params.outdir}/ReadsStats",
+			mode: 'symlink',
+			pattern: '*.zip',
+			saveAs: { filename -> "${name}_fastqc.zip" }
+		]
+		publishDir = [
+			path: "${params.outdir}/ReadsStats",
+			mode: 'copy',
+			pattern: '*.html',
+			saveAs: { filename -> "${name}.html" }
+		]
+		
+		errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+		maxRetries = 3
+		module = ['bioinfo/FastQC_v0.11.7']
+		time = { 1.h * task.attempt }
+
+	}
+
 	// ----- WithLabel
 	withLabel: littleJob {
 		executor = 'local'
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index cf7e9f0..c703614 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -40,18 +40,8 @@ process demultiplexStats {
 	"""
 }
 
-process fastqc {
-	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.zip', saveAs: { filename -> "${name}_fastqc.zip" }
-	publishDir path: "${params.outdir}/ReadsStats" , mode: 'copy', pattern: '*.html', saveAs: { filename -> "${name}.html" }
+process FASTQC {
 	
-	errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
-	maxRetries 3
-	module 'bioinfo/FastQC_v0.11.7'
-	executor 'slurm'
-	queue 'wflowq'
-	cpus 1 //{ 1 * task.attempt }
-	time { 45.m * task.attempt }
-	memory '1.GB'
 	
 	tag " $name"
 	
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 2020ecd..ac469b9 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -1,16 +1,23 @@
 
-banksForConta = [ ]
+// -------------------------------------------------
+// 					CORE PIPELINE
+// -------------------------------------------------
+/*
+ * Creation readsets NGL-Bi -> plus tard
+ * Statistiques de dÃ©multiplexage
+ * QC des reads
+ * Recherche contaminations
+ * Recherche duplicats
+*/
 
+// -------------------------------------------------
+// 					MODULES
+// -------------------------------------------------
 include {
-	maskMaker;
-	bcl2fastq;
 	extractInfoForDemuxStats;
 	demultiplexStats;
-	fastqc;
+	FASTQC;
 	illuminaFilter;
-	//BWA_ALIGNMENT as align;	//search_conta_bwa		//BWA_ALIGNMENT
-	//search_conta_samtools as filter;
-	//search_conta_summary as summary;
 	FASTQSCREEN;
 	DUPLICATED_READS;
 } from "$baseDir/modules/local/module_core.nf"
@@ -124,7 +131,7 @@ workflow Core {
 		}
 		
 		// ----------- FASTQC
-		fastqc(ch_read_good)
+		FASTQC(ch_read_good)
 		
 		// ----------- ContaminationSearch
 		FASTQSCREEN(ch_read_good)
@@ -141,7 +148,7 @@ workflow Core {
 		) // need fastq paired !!!
 		
 	emit:
-		fastqc_report = fastqc.out.report ?: Channel.empty()
+		fastqc_report = FASTQC.out.report ?: Channel.empty()
 		fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty()
 		fastp_report = DUPLICATED_READS.out.json
 }
-- 
GitLab


From aebe17d298e87db8bd6d2919e6680ec4d58150f0 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 16:45:37 +0100
Subject: [PATCH 39/51] Remove old scripts

---
 bin/checkErrorNGLScripts.pl | 80 -------------------------------
 bin/contaCounter.pl         | 96 -------------------------------------
 modules/.gitkeep            |  0
 3 files changed, 176 deletions(-)
 delete mode 100644 bin/checkErrorNGLScripts.pl
 delete mode 100644 bin/contaCounter.pl
 delete mode 100644 modules/.gitkeep

diff --git a/bin/checkErrorNGLScripts.pl b/bin/checkErrorNGLScripts.pl
deleted file mode 100644
index c8a2d87..0000000
--- a/bin/checkErrorNGLScripts.pl
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- checkErrorNGLScripts.pl
- 
-=head1 DESCRIPTION
-
- Read log from NGL scripts and search any errors
- 
-=head1 SYNOPSIS
-
- checkErrorNGLScripts.pl --file <path>
-
-=head1 OPTIONS
-
- --file=s : path to a log file
- 
-=head1 EXEMPLES
-
- perl checkErrorNGLScripts.pl --file <path>
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-
-##################################################################
-#
-#						INITIALISATION
-#
-##################################################################
-my $file = "";
-
-GetOptions( 
-	"file=s" => \$file, 	# path to error file
-);
-
-if ($file eq "") {
-	print STDERR ("USAGE : checkErrorNGLScripts.pl --file <LOG_FILE>\n");
-	exit 1;
-}
-
-##################################################################
-#
-#							MAIN
-#
-##################################################################
-open my $handle, '<', $file or die "Lecture du fichier $file impossible : $!\n";
-chomp( my @lines = <$handle> );
-close $handle;
-my $ErrorExists = 0;
-foreach my $line (@lines) {
-	if ($line =~ /Erreur/ || $line =~ /ERROR/ || $line =~ /error/) {
-		$ErrorExists = 1;
-		last;
-	}
-}
-
-if ($ErrorExists) {
-	foreach my $line (@lines) {
-		print STDERR "$line\n";	
-	}
-} else {
-	foreach my $line (@lines) {
-		print STDOUT "$line\n";	
-	}
-}
\ No newline at end of file
diff --git a/bin/contaCounter.pl b/bin/contaCounter.pl
deleted file mode 100644
index 5c4bb6c..0000000
--- a/bin/contaCounter.pl
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- contaCounter.pl
- 
-=head1 DESCRIPTION
-
- Make statistics on samtools outputs
- 
-=head1 SYNOPSIS
-
- contacounter.pl <pahto_to_folder>
-
-=head1 OPTIONS
-
-
- 
-=head1 EXEMPLES
-
- perl countaCounter.pl ./
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use File::Basename;
-
-##################################################################
-#
-#						INITIALISATION
-#
-##################################################################
-my @files = glob($ARGV[0]."*.txt");
-#my @files = glob("/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x/CheckContamination/*.txt");
-
-#print "FILE : @files\n";
-
-if ($#files == 0) {
-	print STDERR "[Erreur] Le repertoire $ARGV[0] ne contient aucun fichiers !\n";
-	exit 5;
-}
-
-my %hash;
-
-##################################################################
-#
-#							MAIN
-#
-##################################################################
-
-foreach my $file (@files) {
-	my $simpleFile = basename($file,  ".txt");
-	
-	# Extraction nom contaminant
-	my @simpleNameToSplit = split("_", $simpleFile);
-	my $contaminant = $simpleNameToSplit[-1];
-	
-	# Extraction nom echantillon
-	@simpleNameToSplit = split("_${contaminant}", $simpleFile);
-	my $sampleName = $simpleNameToSplit[0];
-	my ($shortSampleName, $direction) = ($sampleName =~ m/^[0-9a-zA-Z]*-([0-9a-zA-Z_]*).*_(R[1,2])/g);
-	#print "FILE : $simpleFile \nSAMPLE : $shortSampleName \nDIRECTION :  $direction\n";
-	
-	# Comptage 
-	my $count = `wc -l $file | cut -d' ' -f1`;
-	
-	# Ajout dans le hash
-	$hash{"$shortSampleName($direction)"}{$contaminant}=$count;
-}
-
-# Extract info from hash
-my $contentToYAML = "Statistics from contamination search.\n";
-foreach my $sample (keys(%hash)) {
-	$contentToYAML.="$sample:\n";
-	foreach my $conta (keys($hash{$sample})){
-		$contentToYAML.="\t${conta}:$hash{$sample}{$conta}";
-	}
-}
-
-# Print info to file
-open(my $fh, '>', "summary.yaml") or exit 1;
-print $fh $contentToYAML;
-close $fh;
diff --git a/modules/.gitkeep b/modules/.gitkeep
deleted file mode 100644
index e69de29..0000000
-- 
GitLab


From de2be4c0b0b9629ee80cd4a03879ad5df354419c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 16:54:18 +0100
Subject: [PATCH 40/51] Improvements of demultiplexStat analysis

---
 bin/demuxStatsFromXML.R      | 13 +++++++++----
 modules/local/module_core.nf |  8 ++++----
 2 files changed, 13 insertions(+), 8 deletions(-)
 mode change 100644 => 100755 bin/demuxStatsFromXML.R

diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R
old mode 100644
new mode 100755
index 1f33529..1aec58c
--- a/bin/demuxStatsFromXML.R
+++ b/bin/demuxStatsFromXML.R
@@ -78,11 +78,12 @@ for (pr in 1:length(projects)){
 				lane_path<-xml_path(xml_children(xml_bc[bc]))
 				BarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/BarcodeCount")))
 				PerfectBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/PerfectBarcodeCount")))
+				if (length(PerfectBarcodeCount) == 0) { PerfectBarcodeCount<-0 }
 				OneMismatchBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/OneMismatchBarcodeCount")))
 				
-				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<-"-" }
-				
-				df_to_add<-data.frame(project,sample_name, barcode_names[bc], BarcodeCount, PerfectBarcodeCount, OneMismatchBarcodeCount)
+				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<- "-"}
+			
+				df_to_add<-data.frame(project, sample_name, barcode_names[bc], BarcodeCount, PerfectBarcodeCount, OneMismatchBarcodeCount)
 				df<-concat_df(df, df_to_add, vec.names)
 
 			}
@@ -114,7 +115,8 @@ for (line in 1:dim(indexNumber)[1]){
 	}
 	# Dual et 4 Index Cases
 	else if (mySampleNumber > 1) {
-		sub.df<-df[which(str_detect(df$Sample, mySample)), ]
+		#sub.df<-df[which(str_detect(df$Sample, mySample)), ]
+		sub.df<-df[which(df$Sample == mySample), ]
 		#print(sub.df)
 		# Parcours du sous-data.frame
 		for (l in 1:dim(sub.df)[1]) {
@@ -204,6 +206,9 @@ df2<-cbind(df2, percentOfFragment)
 
 # Export du data.frame
 cat("\nSauvegarde du data.frame.\n")
+myProject<-"DEBUG"
+# mettre des 0 Ã  la place des NA dans df2
 write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv"))
+# Ecrire un fichier par valeur de myProject ! Cas ou il y a plusieurs projets sur la mÃªme lane.
 cat(paste0("\tLe fichier suivant Ã  Ã©tÃ© crÃ©Ã© :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n"))
 cat("\nFin normale du script, on sort.\n")
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index c703614..6584a87 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -20,9 +20,9 @@ process extractInfoForDemuxStats {
 }
 
 process demultiplexStats {
-	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
+	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
 	
-	module 'system/R-4.0.4_gcc-9.3.0'
+	//module 'system/R-4.0.4_gcc-9.3.0'		// Ne fonctionne pas !
 	
 	input:
 		path DemuxStatXML
@@ -35,8 +35,8 @@ process demultiplexStats {
 	
 	script:
 	"""
-		Rscript /home/sbsuser/work/Nextflow/wf-illumina-nf/wf-illumina-nf/bin/demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
-	
+		module load system/R-4.0.4_gcc-9.3.0
+		demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
 	"""
 }
 
-- 
GitLab


From 5a08226cec77d82cb120f3adc55867b47f38e09a Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 16:56:31 +0100
Subject: [PATCH 41/51] Make scripts runnable

---
 bin/extractInfo.pl              |    0
 bin/extractInfoForDemuxStats.pl |    0
 bin/extractInfoForReadSets.pl   |    0
 bin/extractReads.pl             | 1012 +++++++++++++++----------------
 4 files changed, 506 insertions(+), 506 deletions(-)
 mode change 100644 => 100755 bin/extractInfo.pl
 mode change 100644 => 100755 bin/extractInfoForDemuxStats.pl
 mode change 100644 => 100755 bin/extractInfoForReadSets.pl
 mode change 100644 => 100755 bin/extractReads.pl

diff --git a/bin/extractInfo.pl b/bin/extractInfo.pl
old mode 100644
new mode 100755
diff --git a/bin/extractInfoForDemuxStats.pl b/bin/extractInfoForDemuxStats.pl
old mode 100644
new mode 100755
diff --git a/bin/extractInfoForReadSets.pl b/bin/extractInfoForReadSets.pl
old mode 100644
new mode 100755
diff --git a/bin/extractReads.pl b/bin/extractReads.pl
old mode 100644
new mode 100755
index 2a1bfc8..a3f5b2b
--- a/bin/extractReads.pl
+++ b/bin/extractReads.pl
@@ -1,506 +1,506 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- extractReads.pl
- 
-=head1 DESCRIPTION
-
- Initailisation du pipeline wf-Illumina-nf
- Decoupage de la samplesheet
- Creation du run dans NGL-Bi
- Parametrage et lancement des analyses qualite via wf-Illumina-nf/main.nf
- 
-=head1 SYNOPSIS
-
- extractReads.pl -h | |-sequencer|s type_sequencer] 2>> /work/sbsuser/Logs/cronMACHINE.txt
-
-=head1 OPTIONS
-
- -sequencer|s : Type de sequenceur (MiSeq ou NovaSeq) -> Obligatoire
- -test|t : Activer le mode test -> Facultatif
- -mailTest|m : Preciser l'adresse mail a laquelle envoyer les messages de log -> obligatoire si test
- -samplesheetDemux|i : i comme IEM pour prÃ©ciser la samplesheet Ã© prendre en compte -> Facultatif
- -jFlow|j : pour prÃ©ciser la feuille jflow Ã© prendre en compte  -> Facultatif
- 
-=head1 EXEMPLES
-
- perl extractReads.pl -s MiSeq
- perl extractReads.pl -s MiSeq -t -m hermione.granger@poudlard.uk
-
- 
-=head1 DEPENDENCIES
-
- - Web service permettant la recuperation des adresses mails a partir de l'id 
-
-=head1 AUTHOR
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use utf8;
-use Log::Log4perl ();
-use Log::Log4perl qw(:easy);#FATAL ERROR WARN INFO DEBUG TRACE
-#use File::Util;
-use File::chdir;
-use File::Copy "cp";
-use File::Copy "move";
-use Cwd 'abs_path';
-
-
-###################################################################
-#
-#						MAIN
-#
-###################################################################
-MAIN:
-{
-	###############################################################
-	#						INITIALISATION
-	###############################################################	
-	
-	# Initialisation du log
-	Log::Log4perl -> easy_init( { 	level    => $TRACE,
-                            		utf8     => 1,
-                            		layout   => '[%d][%p> extractReads.pl:L%L %M] %m%n' } );
-	my $logger = Log::Log4perl -> get_logger();
-
-	# RÃ©cupÃ©ration des options
-	my $help = 0 ;
-	my $sequencer = "";
-	my $demuxType_int;
-	my $demuxType;
-	my $file_samplesheet = "";
-	my $file_jflow = "";
-	my $arg_timestamp = ""; # on supprime
-	my $arg_jobid = "";	# on supprime
-	my $mailTEST = "";
-	my $checkTest = "";
-
-	GetOptions ('help|h' => \$help, 
-				'sequencer|s=s' => \$sequencer,
-				'samplesheetDemux|i:s'=> \$file_samplesheet, # i forIEM...
-				'jFlow|j:s'=> \$file_jflow,
-				'timestamp:i'=>\$arg_timestamp,
-				'demuxJobid:s'=>\$arg_jobid,
-				'mailTesteur|m:s' => \$mailTEST,
-				'isTest|t' => \$checkTest,
-	);
-	
-	if($help){
-		pod2usage(-verbose => 1 );
-	}
-
-	print STDERR "\n";
-	print STDERR "#    #    #    #    #    #    #    #    #    #\n";
-	print STDERR "#    #    extractReads.pl is happening  #    #\n";
-	print STDERR "#    #    #    #    #    #    #    #    #    #\n";
-	print STDERR "\n";
-		
-	$logger -> info("VÃ©rification des arguments");
-
-	# Verification du sÃ©quenceur
-	$sequencer ne ""? $logger -> info("\tSequenceur = " . $sequencer) : $logger -> logdie("\tPas de sÃ©quenceur prÃ©cisÃ©...");
-	unless ($sequencer eq "MiSeq" or $sequencer eq "NovaSeq"){
-		$logger -> logdie("Erreur dans le nom du sequenceur : ".$sequencer." n'existe pas");
-	}
-
-	# vÃ©rification de la SS
-	$file_samplesheet ne "" ? $logger -> info("\tSamplesheet fournie = " . $file_samplesheet ." !") : $logger -> info("\tPas de samplesheet fournie!");
-	
-	# Gestion du test et/ou des mails
-	$mailTEST ne ""? $logger -> info("\tmailTEST = " . $mailTEST) : $logger -> info("\tPas de mailTEST!");
-	$checkTest ne ""? $logger -> info("\tcheckTEST = " . $checkTest) : $logger -> info("\tPas en mode test!");
-	$checkTest = $checkTest ne ""? 1 : 0;
-	# Si on est en test, on veut une adresse mail!
-	$logger -> logdie("MODE TEST ACTIVE, MERCI DE DONNER UN MAIL AVEC L'OPTION -m MONMAIL\@MONSERVEUR") if( ($checkTest) && ($mailTEST eq "") );
-	my $raw_data="";
-	my $path_to_scripts="";
-	if ($checkTest) {
-		$raw_data = $sequencer eq "MiSeq"? "/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq" : "/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq";
-		$path_to_scripts=abs_path($0);
-	} else {
-		$raw_data="/$sequencer";
-		$path_to_scripts=abs_path($0);
-	}
-	$logger -> info("\tLes donnÃ©es brutes sont ici : $raw_data");
-	
-	# Configuration API NGL-Bi
-	my $ngl_api_base_prod = "/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/";
-	my $ngl_api_base_test = "/save/devcrgs/src/NGL_REST_Client/ngl-bi_client/IG/SystemeInteractionNGL-Bi/";
-	my $ngl_api_base = $checkTest? $ngl_api_base_test : $ngl_api_base_prod;
-	my $ngl_bi_scripts="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl";
-	$ENV{'APIPERL'}=$ngl_api_base;
-	$ENV{'CONFFILE'}=$ngl_api_base."conf/prod_illumina_qc.conf";
-	loadConfFile();
-	unshift @INC,  $ngl_api_base."Common_tools/src/perl/lib/";
-	unshift @INC,  $ngl_api_base."DB_tools/src/perl/lib/";
-	require illumina;
-	require json;
-	$logger -> info("Variables d'environnement pour NGL-Bi chargÃ©es depuis : ".$ngl_api_base);
-	# Initialisation des variables
-	my $runExistsInNGL = 0;
-	my $NGLBiRunCreatedFile = 'RunNGL-Bi.created';
-	my $NGLBiReadsetCreatedFil = 'ReadsetsNGL-Bi.created';
-	my $NGLBiRunName = "";
-	my $NGLSQExperimentCode;
-	
-	# ParamÃ©trage gÃ©nÃ©ral
-	my $prefixLogFolder = "PipelineLogs_Lane";
-
-	
-	###############################################################
-	#					RECHERCHE SAMPLESHEET
-	###############################################################
-	## Recherche SS
-	### parcours des sous rÃ©pertoires de /$sequencer
-	my $regexpPSS = '^[0-9]{8}_.*_BULKDEMUX_.*csv$';
-	#my @run_directories = $f -> list_dir('/'.$sequencer => {dirs_only = 1, no_fsdots = 1}=; # ls 
-	my @run_directories = `ls $raw_data`; $? and $logger -> logdie("[Erreur] Impossible de rÃ©cupÃ©er la liste des dossiers de $raw_data}");
-	foreach my $dir (@run_directories){
-		chomp($dir);
-	    #my @RunInfo = ();
-	    my @RunInfo = split("_", $dir); # [$#dir]
-	    # Extraction des infos contenues dans le nom du rÃ©pertoire
-		my $runDate = $RunInfo[0];
-		my ($annee, $mois, $jour) = ($runDate =~ m/([0-9]{2})([0-9]{2})([0-9]{2})/);
-		my $sequencerID = $RunInfo[1];
-		my $barcodeFlowcell; # Sert Ã© l'unicitÃ© des noms des .fastq.gz
-		if ($RunInfo[3] =~ m/000000000-/){
-			my @FCBarcode = split('-', $RunInfo[3]);
-			$barcodeFlowcell = $FCBarcode[$#FCBarcode];
-		} else {
-			$barcodeFlowcell = $RunInfo[3];
-		}
-
-		# Recherche de la SS
-		$logger -> info("Recherche de SampleSheet dans $raw_data/$dir");
-		chdir "$raw_data/$dir" or $logger -> logdie("[Erreur] Impossible de se dÃ©placer dans $raw_data/$dir");
-		#$CWD = "$raw_data/$dir" or $logger -> logdie("[Erreur] Impossible de se dÃ©placer dans $raw_data/$dir");
-		my $preSampleSheet = "PreSampleSheet.csv";
-		my $lastPSS = `ls -t | egrep $regexpPSS | head -1`; $? and $logger -> logdie("[Erreur] Recup de la derniere BulkSS");
-		chomp($lastPSS);
-		if( $lastPSS ne ""){
-			$logger -> info("Check de PSS ".$lastPSS);
-			my $checkPSS = check_my_samplesheet($lastPSS, $preSampleSheet);
-			
-			###############################################################
-			#					CREATION RUN NGL-Bi
-			###############################################################
-			$NGLSQExperimentCode = getNGLSeqExperimentCode($preSampleSheet);
-			$runExistsInNGL = 1 if($NGLSQExperimentCode ne " -");
-			if ($runExistsInNGL){
-				if (! -e $NGLBiRunCreatedFile){
-					# INTEGRATION DU RUN A NGL-BI	#   #   #   #   #   #   #   #   #   #   #
-					$logger -> info("Pas de fichier $NGLBiRunCreatedFile dans $raw_data/$dir -> Le run NGL-Bi semble ne pas exister ");
-					my $commandNGLBiRun = "perl $ngl_bi_scripts/createNGL-BiRun.pl --sequencer $sequencer --NGLSqExperimentCode $NGLSQExperimentCode";
-					$logger -> info("\tCreation du run avec : ".$commandNGLBiRun);
-					my $result_commandNGLBiRun = `$commandNGLBiRun 2>&1`; 
-						$? and $logger -> logdie("[Erreur]Lancement de createNGL-BiRun.pl\n".$result_commandNGLBiRun);
-					$logger -> info("\n".$result_commandNGLBiRun);
-				}else{
-					$logger -> info("Le run existe dÃ©jÃ  dans NGL-Bi");
-				}
-			}else{
-				$logger -> info("\tRun en autonomie : n'existe pas dans NGL-SQ");
-				`touch $NGLBiRunCreatedFile`; $? and $logger -> logdie("[Erreur] Impossible de crÃ©er le fichier");
-			}
-		} else {
-			$logger -> logdie("Aucune SampleSheet trouvÃ©e dans $raw_data/$dir");
-		}
-	
-		# Recherche du fichier de fin de run
-		my $file2checkForEndOfRun = $sequencerID eq "M07093" ? "RTAComplete.txt" : "CopyComplete.txt";
-		if (! -e $file2checkForEndOfRun){
-			$logger -> info("Pas de fichier de fin de run -> sortie du script!");
-			exit;
-		} else {
-			# DÃ©tection du nombre de lane
-			$logger -> info("DÃ©tection du nombre de headers") ;
-			my $nbHeader = `grep "Header" $preSampleSheet | wc -l` ;  $? and $logger -> logdie("Comptage de [Header] en echec");
-			chomp($nbHeader);
-			$logger -> info("\t$preSampleSheet -> Nb de [header] = ".$nbHeader );
-			
-			# CrÃ©ation des rÃ©pertoires de logs par lane
-			$logger -> info("DÃ©tection des rÃ©pertoires de log");
-			foreach my $count (1..$nbHeader){
-				my $logFolder = $prefixLogFolder.$count;
-				if (! -d "$raw_data/$dir/$logFolder"){ # Si le rep n'existe pas, alors on le crÃ©e
-					$logger -> info("\tCrÃ©ation du rÃ©pertoire".$logFolder." + chmod 770" );
-					mkdir "$raw_data/$dir/$logFolder" or $logger -> logdie("Impossible de crÃ©er le rÃ©pertoire ".$logFolder );
-					chmod 0770, "$raw_data/$dir/$logFolder" or $logger -> logdie($!);
-				} else {
-					$logger -> info("\tLe rÃ©pertoire ".$logFolder." existe dÃ©jÃ©");
-				}
-			}
-			
-			###############################################################
-			#					DECOUPAGE SAMPLESHEET
-			###############################################################
-			$logger -> info("DÃ©coupe de ".$preSampleSheet) ;
-			my $laneExtraite = '';
-			my $counterIEMFiles = 0; #counter to store the number of IEM files found in the bulk file
-			my $IEMFileContent = '';
-			my $IEMFilePrefixe = $lastPSS;
-			$IEMFilePrefixe =~ s/BULKDEMUX/IEM/g; # Replace Bulk by IEM
-			$IEMFilePrefixe =~ s/.csv//g; # Supprime le .csv de la fin pour faciliter l'ajout du compteur de lanes
-			$IEMFilePrefixe .= '_Lane';
-		
-			open my $handle, '<', $preSampleSheet;
-			chomp(my @lines = <$handle>);
-			close $handle;
-			
-			foreach my $line (@lines) {
-				if ($line eq '[Header]'){
-					if($counterIEMFiles > 0){ # a 1st line  was already found and $IEMFileContent contains a single IEM file content
-						# ecriture du fichier
-						my $subSampleSheet = "$raw_data/$dir/${prefixLogFolder}${laneExtraite}/${IEMFilePrefixe}_IEM_Lane${laneExtraite}.csv";
-						print2file($IEMFileContent, $subSampleSheet);
-					}
-					$IEMFileContent = '';
-					$counterIEMFiles++;
-				}
-				$IEMFileContent .= $line."\n";
-				($laneExtraite) = $line =~ m/^(\d),/;
-				$laneExtraite = '1' if ($sequencer eq 'MiSeq' );
-			}
-			# ecriture du dernier fichier
-			my $subSampleSheet = "$raw_data/$dir/${prefixLogFolder}${laneExtraite}/${IEMFilePrefixe}_IEM_Lane${laneExtraite}.csv";
-			print2file($IEMFileContent, $subSampleSheet);
-		
-			# DÃ©sactivation de la SampleSheet
-			$logger -> info("DÃ©sactivation de la SampleSheet.");
-			move($lastPSS, $lastPSS.".old") or $logger -> logdie("Le renommage de ".$lastPSS." en .old est en erreur ".$!);
-			
-			###############################################################
-			#					INTEROP DANS NEXTCLOUD
-			###############################################################
-			if (!$checkTest){
-				# RÃ©cupÃ©ration de l'annÃ©e pour le rÃ©pertoire de destination
-				my $year = "20".$annee;
-				
-				# Ecriture de la commande de synchronisation
-				my $aws_source = "$raw_data/$dir/";
-				my $aws_target = "s3://partage/externes/Illumina-SAV/$sequencer/$year/$dir"; #X:\partage\externes\Illumina-SAV\NovaSeq		[$#dir]
-				my $aws_prefixcmd = "aws s3 --endpoint-url https://s3r-tls.stockage.inra.fr";
-				
-				# Ecriture du script de lancement de synchronisation
-				my $aws_script_file = "scriptAWS_$sequencerID.sbatch";
-				my $aws_script = "#!/bin/sh \n";
-				$aws_script .= "#SBATCH -p wflowq\n#SBATCH -t 20\n#SBATCH --mem-per-cpu=200M\n";
-				$aws_script .= "#SBATCH -J $aws_script_file\n#SBATCH -e %x.e%j\n#SBATCH -o %x.o%j\n\n";
-				$aws_script .= "module load system/Python-3.6.7_shared\n";
-				$aws_script .= "$aws_prefixcmd sync $aws_source $aws_target ";
-				$aws_script .= "--exclude \"*\" --include \"[Rr]un[A-Za-z]*.xml\" --include \"InterOp/[A-Za-z]*.bin\" ";
-				$aws_script .= "--exclude \"InterOp/C[0-9]*.1*\"\n";
-				print2file($aws_script, "$aws_source/$aws_script_file");
-				
-				
-				# Lancement du script
-				my $sleepLastingForAWS = 300;
-				my $aws_launchcmd = "sbatch $aws_script_file";
-				my $aws_joboutput = `$aws_launchcmd`; $? and $logger -> logdie("Commande $aws_launchcmd impossible : ".$!);
-				my ($aws_jobID) = $aws_joboutput =~ m/Submitted batch job (\d+)/;
-				chomp($aws_jobID);
-				$logger -> info("\tDossier " . $aws_source." -> JobID : ".$aws_jobID."\nCommande exÃ©cutÃ©e : " . $aws_launchcmd );
-				
-				# Attente de la fin du job
-				my $boolOver = is_my_jobID_over($aws_jobID);
-				while (!$boolOver){
-					$boolOver = is_my_jobID_over($aws_jobID);
-					if (!$boolOver){
-						$logger -> info("\tEn attente de la fin de $aws_jobID, Ã© dans ".($sleepLastingForAWS/60)." minutes!");
-						sleep($sleepLastingForAWS); # toutes les 5 minutes (*60 = 300)
-					}
-				}
-				
-				# VÃ©rification qu'on est bon, sinon envoi d'un mail pour prÃ©venir
-				if (-e $aws_script_file.".e".$aws_jobID){
-					$logger -> info("\tLe fichier d'erreur pour AWS existe bien!");
-					if (! -z $aws_script_file.".e".$aws_jobID){
-						my $testObjectPrefixe = $checkTest? "[TEST]" : "";
-						$logger -> error("\tLe fichier d'erreur pour AWS n'est pas vide, il a dÃ© se passer quelque chose de louche, Ã© investiguer!" );
-						my $mailRecipients = $checkTest? $mailTEST :'get-plage.bioinfo@genotoul.fr';
-						my $mailContent = "Une erreur est survenue lors de la copie des fichiers SAV vers CEPH avec la commande contenue dans\n${aws_source}${aws_script_file}.\n\n";
-						$mailContent .= "Le fichier d'erreur contient \n".`cat $aws_script_file.e$aws_jobID`;
-						send_and_check_my_email($mailContent, "${$testObjectPrefixe}Erreur sauvegarde SAV sur CEPH", $mailRecipients, $mailRecipients);
-					}else{
-						$logger -> info("\tLe fichier d'erreur pour AWS est vide, j'aime quand un plan se dÃ©roule sans accroc!");
-					}
-				}
-			} else { $logger -> info("Nous sommes en mode test : pas besoin de sauvegarder InterOp"); }
-
-			###############################################################
-			#					CREATION READSETS NGL-Bi
-			###############################################################
-=head1 A_SUPPRIMER
-			if ($runExistsInNGL){
-				# parcours des dossier PipelineLogs_Lane*
-				
-				# recherche du $NGLBiReadsetCreatedFile
-				## Si trouvÃ© : on ne fait rien, les readsets existent deja
-				
-				
-				
-				
-				if (! -e $NGLBiReadsetCreatedFil){
-					# CREATION DES READSETS DANS NGL-BI	#   #   #   #   #   #   #   #   #   #   #
-					$logger -> info("Pas de fichier $NGLBiReadsetCreatedFil dans $raw_data/$dir -> Les readsets ne semblent ne pas exister dans NGL-Bi");
-				}
-			}
-=cut 
-
-			###############################################################
-			#					LANCEMENT DE NEXTFLOW
-			###############################################################
-			# crÃ©ation du dossier dans /work, se dÃ©placer dedans et lancer nextflow
-					
-		} # Fichier de fin de run trouvÃ©
-	} # fin parcours des rÃ©pertoires
-}
-
-###################################################################
-#
-#						FONCTIONS
-#
-###################################################################
-
-sub print2file {
-	my ($content, $file2write) = @_;
-	my $logger = Log::Log4perl -> get_logger('print2file');
-	$logger -> info("\tEcriture du fichier $file2write");
-	open(my $fh, '>', $file2write) or exit 1;
-	print $fh $content;
-	close $fh;
-}
-
-sub check_my_samplesheet{
-	my ($file2check, $file2write) = @_;
-	my $logger = Log::Log4perl -> get_logger('check_my_samplesheet');
-
-	my $isfile2checkwindows;
-	my $isfile2checklinux;
-	
-	$logger -> info("Etude de $file2check");
-	if (-s $file2check){ # $file2check exists and has a non zero size
-		$logger -> info("VÃ©rification des fins de ligne");
-		$isfile2checkwindows = is_my_file_Windows($file2check);
-		$logger -> info("Sortie de is_my_file_Windows : " . $isfile2checkwindows);
-		if ($isfile2checkwindows){
-			$logger -> warn($file2check." a des fins de ligne Windows : on le convertit!");
-			convert_file_2_linux($file2check);
-			my $isfile2checkwindows2 = is_my_file_Windows($file2check);
-			if ($isfile2checkwindows2){
-				$logger -> logdie("La conversion dos2linux n'a pas fonctionnÃ©!");
-			} else {
-				$logger -> info("La conversion dos2linux a fonctionnÃ©!");
-			}
-		}else {
-			$logger -> info("Donc fins de ligne de " . $file2check . " : Linux");
-		}
-		
-		$logger -> info("Etude de $file2write");
-		if(-s $file2write){# $file2write a une taille diffÃ©rente de 0 byte
-			if( $file2write eq $file2check ){#Fichier correct
-				$logger -> info($file2write." est dÃ©jÃ© l'Ã©quivalent de ".$file2check.", on garde!");
-			}else{#Renommer le nouveau fichier CSV $file2write et l'ancien OLD_$file2write
-				chomp($file2check);
-				$logger -> info("Copie de ".$file2write." en OLD_$file2write");
-				cp($file2write,"OLD_$file2write") or $logger -> logdie("Impossible de copier le fichier ".$file2write);
-				$logger -> info("Copie de ".$file2check." en ".$file2write);
-				cp($file2check,$file2write)or $logger -> logdie("Impossible de copier le fichier ".$file2check);
-			}
-		}else{#Si $file2write est vide, on en fait une copie avec le nom correct
-			chomp($file2check);
-			$logger -> info("Copie de ".$file2check." en ".$file2write);
-			cp($file2check,$file2write)or $logger -> logdie("Impossible de copier le fichier ".$file2check);
-		}
-		return 1;
-	}else{
-		$logger -> info("Il n'y a pas de SampleSheet ".$file2check);
-		return 0;
-	}
-}
-
-# RÃ©cupere le code d'expÃ©rience NGL-SQ dans une samplesheet
-sub getNGLSeqExperimentCode{
-	my ($samplesheet) = @_;
-	my $logger = Log::Log4perl -> get_logger('getNGLSeqExperimentCode');
-	my $NGLSQExperimentCode = "";
-	my $experimentName_ligne = `grep "Experiment Name" $samplesheet | head -1` ;  $? and $logger -> logdie("RÃ©cupÃ©ration de 'Experiment Name' dans '".$samplesheet."' en echec" );
-	($NGLSQExperimentCode) = $experimentName_ligne =~ m/Experiment Name,(.+)$/;
-	$logger -> info("NGLSQExperimentCode : ".$NGLSQExperimentCode);
-	$logger -> info("L'expÃ©rience ne sera pas rentrÃ©e dans NGL-Bi car pas de correspondance dans NGL-SQ") if($NGLSQExperimentCode eq '-');
-	$logger -> logdie("Echec de la rÃ©cup du code d'expÃ©rience") if($NGLSQExperimentCode eq "");  
-	return $NGLSQExperimentCode;
-}
-
-# Charge les variables d'environnement du fichier de configuration NGL
-sub loadConfFile{
-	my $logger = Log::Log4perl -> get_logger('loadConfFile');
-	unless ($ENV{CONFFILE}) {
-		$logger -> logdie("$0: Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
-	};
-	my $dbconf_file = $ENV{CONFFILE};
-	unless (-f $dbconf_file) {
-		$logger -> logdie("$0: Database configuration file not exist: $dbconf_file. It's necessary for continue");
-	};
-	open my $handle, '<', $dbconf_file;
-	chomp( my @lines = <$handle> );
-	close $handle;
-	foreach my $line (@lines) {
-		$line =~ s/#.*//o;
-		unless ($line) { next; }
-		if ($line =~ /(.*)=(.*)/o) {
-			my $key = $1;
-			my $value = $2;
-			$key =~ s/^\s*//o;
-			$key =~ s/\s*$//o;
-			$value =~ s/^\s*//o;
-			$value =~ s/\s*$//o;
-			$ENV{$key} = $value;
-		}else {
-			$logger -> logdie("$0: Can't load variable to database configuration file $dbconf_file in line: '$_'");
-		}
-	}
-}
-
-=head2 function is_my_file_Windows
-
-	Title		 : is_my_file_Windows
-	Usage		 : $boolean = is_my_file_Windows($file);
-	Prerequisite : None
-	Function	 : Retourne 0 si les fins de ligne du fichier sont linux, 1 si Windows
-	Returns	     : Nombre
-	Args		 : $file, string
-	Globals	     : none
-
-=cut
-
-sub is_my_file_Windows {
-	my ($file) = @_ ;
-	my $logger = Log::Log4perl -> get_logger('is_my_file_Windows');
-	$logger -> info("Fichier en entrÃ©e : " . $file);
-	my $fileOutput;
-	my $ismyfileWindows = 0;
-	
-	$fileOutput = `file $file`; $? and $logger -> logdie("[Erreur]Lancement de file");
-	chomp($fileOutput);
-	$logger -> info("Message de sortie : " . $fileOutput);
-	if ($fileOutput =~ /with CRLF.* line terminators/){
-		$logger -> info("Le fichier est Windows");
-		$ismyfileWindows = 1;
-	}
-	return $ismyfileWindows;	
-}
-
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ extractReads.pl
+ 
+=head1 DESCRIPTION
+
+ Initailisation du pipeline wf-Illumina-nf
+ Decoupage de la samplesheet
+ Creation du run dans NGL-Bi
+ Parametrage et lancement des analyses qualite via wf-Illumina-nf/main.nf
+ 
+=head1 SYNOPSIS
+
+ extractReads.pl -h | |-sequencer|s type_sequencer] 2>> /work/sbsuser/Logs/cronMACHINE.txt
+
+=head1 OPTIONS
+
+ -sequencer|s : Type de sequenceur (MiSeq ou NovaSeq) -> Obligatoire
+ -test|t : Activer le mode test -> Facultatif
+ -mailTest|m : Preciser l'adresse mail a laquelle envoyer les messages de log -> obligatoire si test
+ -samplesheetDemux|i : i comme IEM pour prÃ©ciser la samplesheet Ã© prendre en compte -> Facultatif
+ -jFlow|j : pour prÃ©ciser la feuille jflow Ã© prendre en compte  -> Facultatif
+ 
+=head1 EXEMPLES
+
+ perl extractReads.pl -s MiSeq
+ perl extractReads.pl -s MiSeq -t -m hermione.granger@poudlard.uk
+
+ 
+=head1 DEPENDENCIES
+
+ - Web service permettant la recuperation des adresses mails a partir de l'id 
+
+=head1 AUTHOR
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use utf8;
+use Log::Log4perl ();
+use Log::Log4perl qw(:easy);#FATAL ERROR WARN INFO DEBUG TRACE
+#use File::Util;
+use File::chdir;
+use File::Copy "cp";
+use File::Copy "move";
+use Cwd 'abs_path';
+
+
+###################################################################
+#
+#						MAIN
+#
+###################################################################
+MAIN:
+{
+	###############################################################
+	#						INITIALISATION
+	###############################################################	
+	
+	# Initialisation du log
+	Log::Log4perl -> easy_init( { 	level    => $TRACE,
+                            		utf8     => 1,
+                            		layout   => '[%d][%p> extractReads.pl:L%L %M] %m%n' } );
+	my $logger = Log::Log4perl -> get_logger();
+
+	# RÃ©cupÃ©ration des options
+	my $help = 0 ;
+	my $sequencer = "";
+	my $demuxType_int;
+	my $demuxType;
+	my $file_samplesheet = "";
+	my $file_jflow = "";
+	my $arg_timestamp = ""; # on supprime
+	my $arg_jobid = "";	# on supprime
+	my $mailTEST = "";
+	my $checkTest = "";
+
+	GetOptions ('help|h' => \$help, 
+				'sequencer|s=s' => \$sequencer,
+				'samplesheetDemux|i:s'=> \$file_samplesheet, # i forIEM...
+				'jFlow|j:s'=> \$file_jflow,
+				'timestamp:i'=>\$arg_timestamp,
+				'demuxJobid:s'=>\$arg_jobid,
+				'mailTesteur|m:s' => \$mailTEST,
+				'isTest|t' => \$checkTest,
+	);
+	
+	if($help){
+		pod2usage(-verbose => 1 );
+	}
+
+	print STDERR "\n";
+	print STDERR "#    #    #    #    #    #    #    #    #    #\n";
+	print STDERR "#    #    extractReads.pl is happening  #    #\n";
+	print STDERR "#    #    #    #    #    #    #    #    #    #\n";
+	print STDERR "\n";
+		
+	$logger -> info("VÃ©rification des arguments");
+
+	# Verification du sÃ©quenceur
+	$sequencer ne ""? $logger -> info("\tSequenceur = " . $sequencer) : $logger -> logdie("\tPas de sÃ©quenceur prÃ©cisÃ©...");
+	unless ($sequencer eq "MiSeq" or $sequencer eq "NovaSeq"){
+		$logger -> logdie("Erreur dans le nom du sequenceur : ".$sequencer." n'existe pas");
+	}
+
+	# vÃ©rification de la SS
+	$file_samplesheet ne "" ? $logger -> info("\tSamplesheet fournie = " . $file_samplesheet ." !") : $logger -> info("\tPas de samplesheet fournie!");
+	
+	# Gestion du test et/ou des mails
+	$mailTEST ne ""? $logger -> info("\tmailTEST = " . $mailTEST) : $logger -> info("\tPas de mailTEST!");
+	$checkTest ne ""? $logger -> info("\tcheckTEST = " . $checkTest) : $logger -> info("\tPas en mode test!");
+	$checkTest = $checkTest ne ""? 1 : 0;
+	# Si on est en test, on veut une adresse mail!
+	$logger -> logdie("MODE TEST ACTIVE, MERCI DE DONNER UN MAIL AVEC L'OPTION -m MONMAIL\@MONSERVEUR") if( ($checkTest) && ($mailTEST eq "") );
+	my $raw_data="";
+	my $path_to_scripts="";
+	if ($checkTest) {
+		$raw_data = $sequencer eq "MiSeq"? "/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq" : "/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq";
+		$path_to_scripts=abs_path($0);
+	} else {
+		$raw_data="/$sequencer";
+		$path_to_scripts=abs_path($0);
+	}
+	$logger -> info("\tLes donnÃ©es brutes sont ici : $raw_data");
+	
+	# Configuration API NGL-Bi
+	my $ngl_api_base_prod = "/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/";
+	my $ngl_api_base_test = "/save/devcrgs/src/NGL_REST_Client/ngl-bi_client/IG/SystemeInteractionNGL-Bi/";
+	my $ngl_api_base = $checkTest? $ngl_api_base_test : $ngl_api_base_prod;
+	my $ngl_bi_scripts="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl";
+	$ENV{'APIPERL'}=$ngl_api_base;
+	$ENV{'CONFFILE'}=$ngl_api_base."conf/prod_illumina_qc.conf";
+	loadConfFile();
+	unshift @INC,  $ngl_api_base."Common_tools/src/perl/lib/";
+	unshift @INC,  $ngl_api_base."DB_tools/src/perl/lib/";
+	require illumina;
+	require json;
+	$logger -> info("Variables d'environnement pour NGL-Bi chargÃ©es depuis : ".$ngl_api_base);
+	# Initialisation des variables
+	my $runExistsInNGL = 0;
+	my $NGLBiRunCreatedFile = 'RunNGL-Bi.created';
+	my $NGLBiReadsetCreatedFil = 'ReadsetsNGL-Bi.created';
+	my $NGLBiRunName = "";
+	my $NGLSQExperimentCode;
+	
+	# ParamÃ©trage gÃ©nÃ©ral
+	my $prefixLogFolder = "PipelineLogs_Lane";
+
+	
+	###############################################################
+	#					RECHERCHE SAMPLESHEET
+	###############################################################
+	## Recherche SS
+	### parcours des sous rÃ©pertoires de /$sequencer
+	my $regexpPSS = '^[0-9]{8}_.*_BULKDEMUX_.*csv$';
+	#my @run_directories = $f -> list_dir('/'.$sequencer => {dirs_only = 1, no_fsdots = 1}=; # ls 
+	my @run_directories = `ls $raw_data`; $? and $logger -> logdie("[Erreur] Impossible de rÃ©cupÃ©er la liste des dossiers de $raw_data}");
+	foreach my $dir (@run_directories){
+		chomp($dir);
+	    #my @RunInfo = ();
+	    my @RunInfo = split("_", $dir); # [$#dir]
+	    # Extraction des infos contenues dans le nom du rÃ©pertoire
+		my $runDate = $RunInfo[0];
+		my ($annee, $mois, $jour) = ($runDate =~ m/([0-9]{2})([0-9]{2})([0-9]{2})/);
+		my $sequencerID = $RunInfo[1];
+		my $barcodeFlowcell; # Sert Ã© l'unicitÃ© des noms des .fastq.gz
+		if ($RunInfo[3] =~ m/000000000-/){
+			my @FCBarcode = split('-', $RunInfo[3]);
+			$barcodeFlowcell = $FCBarcode[$#FCBarcode];
+		} else {
+			$barcodeFlowcell = $RunInfo[3];
+		}
+
+		# Recherche de la SS
+		$logger -> info("Recherche de SampleSheet dans $raw_data/$dir");
+		chdir "$raw_data/$dir" or $logger -> logdie("[Erreur] Impossible de se dÃ©placer dans $raw_data/$dir");
+		#$CWD = "$raw_data/$dir" or $logger -> logdie("[Erreur] Impossible de se dÃ©placer dans $raw_data/$dir");
+		my $preSampleSheet = "PreSampleSheet.csv";
+		my $lastPSS = `ls -t | egrep $regexpPSS | head -1`; $? and $logger -> logdie("[Erreur] Recup de la derniere BulkSS");
+		chomp($lastPSS);
+		if( $lastPSS ne ""){
+			$logger -> info("Check de PSS ".$lastPSS);
+			my $checkPSS = check_my_samplesheet($lastPSS, $preSampleSheet);
+			
+			###############################################################
+			#					CREATION RUN NGL-Bi
+			###############################################################
+			$NGLSQExperimentCode = getNGLSeqExperimentCode($preSampleSheet);
+			$runExistsInNGL = 1 if($NGLSQExperimentCode ne " -");
+			if ($runExistsInNGL){
+				if (! -e $NGLBiRunCreatedFile){
+					# INTEGRATION DU RUN A NGL-BI	#   #   #   #   #   #   #   #   #   #   #
+					$logger -> info("Pas de fichier $NGLBiRunCreatedFile dans $raw_data/$dir -> Le run NGL-Bi semble ne pas exister ");
+					my $commandNGLBiRun = "perl $ngl_bi_scripts/createNGL-BiRun.pl --sequencer $sequencer --NGLSqExperimentCode $NGLSQExperimentCode";
+					$logger -> info("\tCreation du run avec : ".$commandNGLBiRun);
+					my $result_commandNGLBiRun = `$commandNGLBiRun 2>&1`; 
+						$? and $logger -> logdie("[Erreur]Lancement de createNGL-BiRun.pl\n".$result_commandNGLBiRun);
+					$logger -> info("\n".$result_commandNGLBiRun);
+				}else{
+					$logger -> info("Le run existe dÃ©jÃ  dans NGL-Bi");
+				}
+			}else{
+				$logger -> info("\tRun en autonomie : n'existe pas dans NGL-SQ");
+				`touch $NGLBiRunCreatedFile`; $? and $logger -> logdie("[Erreur] Impossible de crÃ©er le fichier");
+			}
+		} else {
+			$logger -> logdie("Aucune SampleSheet trouvÃ©e dans $raw_data/$dir");
+		}
+	
+		# Recherche du fichier de fin de run
+		my $file2checkForEndOfRun = $sequencerID eq "M07093" ? "RTAComplete.txt" : "CopyComplete.txt";
+		if (! -e $file2checkForEndOfRun){
+			$logger -> info("Pas de fichier de fin de run -> sortie du script!");
+			exit;
+		} else {
+			# DÃ©tection du nombre de lane
+			$logger -> info("DÃ©tection du nombre de headers") ;
+			my $nbHeader = `grep "Header" $preSampleSheet | wc -l` ;  $? and $logger -> logdie("Comptage de [Header] en echec");
+			chomp($nbHeader);
+			$logger -> info("\t$preSampleSheet -> Nb de [header] = ".$nbHeader );
+			
+			# CrÃ©ation des rÃ©pertoires de logs par lane
+			$logger -> info("DÃ©tection des rÃ©pertoires de log");
+			foreach my $count (1..$nbHeader){
+				my $logFolder = $prefixLogFolder.$count;
+				if (! -d "$raw_data/$dir/$logFolder"){ # Si le rep n'existe pas, alors on le crÃ©e
+					$logger -> info("\tCrÃ©ation du rÃ©pertoire".$logFolder." + chmod 770" );
+					mkdir "$raw_data/$dir/$logFolder" or $logger -> logdie("Impossible de crÃ©er le rÃ©pertoire ".$logFolder );
+					chmod 0770, "$raw_data/$dir/$logFolder" or $logger -> logdie($!);
+				} else {
+					$logger -> info("\tLe rÃ©pertoire ".$logFolder." existe dÃ©jÃ©");
+				}
+			}
+			
+			###############################################################
+			#					DECOUPAGE SAMPLESHEET
+			###############################################################
+			$logger -> info("DÃ©coupe de ".$preSampleSheet) ;
+			my $laneExtraite = '';
+			my $counterIEMFiles = 0; #counter to store the number of IEM files found in the bulk file
+			my $IEMFileContent = '';
+			my $IEMFilePrefixe = $lastPSS;
+			$IEMFilePrefixe =~ s/BULKDEMUX/IEM/g; # Replace Bulk by IEM
+			$IEMFilePrefixe =~ s/.csv//g; # Supprime le .csv de la fin pour faciliter l'ajout du compteur de lanes
+			$IEMFilePrefixe .= '_Lane';
+		
+			open my $handle, '<', $preSampleSheet;
+			chomp(my @lines = <$handle>);
+			close $handle;
+			
+			foreach my $line (@lines) {
+				if ($line eq '[Header]'){
+					if($counterIEMFiles > 0){ # a 1st line  was already found and $IEMFileContent contains a single IEM file content
+						# ecriture du fichier
+						my $subSampleSheet = "$raw_data/$dir/${prefixLogFolder}${laneExtraite}/${IEMFilePrefixe}_IEM_Lane${laneExtraite}.csv";
+						print2file($IEMFileContent, $subSampleSheet);
+					}
+					$IEMFileContent = '';
+					$counterIEMFiles++;
+				}
+				$IEMFileContent .= $line."\n";
+				($laneExtraite) = $line =~ m/^(\d),/;
+				$laneExtraite = '1' if ($sequencer eq 'MiSeq' );
+			}
+			# ecriture du dernier fichier
+			my $subSampleSheet = "$raw_data/$dir/${prefixLogFolder}${laneExtraite}/${IEMFilePrefixe}_IEM_Lane${laneExtraite}.csv";
+			print2file($IEMFileContent, $subSampleSheet);
+		
+			# DÃ©sactivation de la SampleSheet
+			$logger -> info("DÃ©sactivation de la SampleSheet.");
+			move($lastPSS, $lastPSS.".old") or $logger -> logdie("Le renommage de ".$lastPSS." en .old est en erreur ".$!);
+			
+			###############################################################
+			#					INTEROP DANS NEXTCLOUD
+			###############################################################
+			if (!$checkTest){
+				# RÃ©cupÃ©ration de l'annÃ©e pour le rÃ©pertoire de destination
+				my $year = "20".$annee;
+				
+				# Ecriture de la commande de synchronisation
+				my $aws_source = "$raw_data/$dir/";
+				my $aws_target = "s3://partage/externes/Illumina-SAV/$sequencer/$year/$dir"; #X:\partage\externes\Illumina-SAV\NovaSeq		[$#dir]
+				my $aws_prefixcmd = "aws s3 --endpoint-url https://s3r-tls.stockage.inra.fr";
+				
+				# Ecriture du script de lancement de synchronisation
+				my $aws_script_file = "scriptAWS_$sequencerID.sbatch";
+				my $aws_script = "#!/bin/sh \n";
+				$aws_script .= "#SBATCH -p wflowq\n#SBATCH -t 20\n#SBATCH --mem-per-cpu=200M\n";
+				$aws_script .= "#SBATCH -J $aws_script_file\n#SBATCH -e %x.e%j\n#SBATCH -o %x.o%j\n\n";
+				$aws_script .= "module load system/Python-3.6.7_shared\n";
+				$aws_script .= "$aws_prefixcmd sync $aws_source $aws_target ";
+				$aws_script .= "--exclude \"*\" --include \"[Rr]un[A-Za-z]*.xml\" --include \"InterOp/[A-Za-z]*.bin\" ";
+				$aws_script .= "--exclude \"InterOp/C[0-9]*.1*\"\n";
+				print2file($aws_script, "$aws_source/$aws_script_file");
+				
+				
+				# Lancement du script
+				my $sleepLastingForAWS = 300;
+				my $aws_launchcmd = "sbatch $aws_script_file";
+				my $aws_joboutput = `$aws_launchcmd`; $? and $logger -> logdie("Commande $aws_launchcmd impossible : ".$!);
+				my ($aws_jobID) = $aws_joboutput =~ m/Submitted batch job (\d+)/;
+				chomp($aws_jobID);
+				$logger -> info("\tDossier " . $aws_source." -> JobID : ".$aws_jobID."\nCommande exÃ©cutÃ©e : " . $aws_launchcmd );
+				
+				# Attente de la fin du job
+				my $boolOver = is_my_jobID_over($aws_jobID);
+				while (!$boolOver){
+					$boolOver = is_my_jobID_over($aws_jobID);
+					if (!$boolOver){
+						$logger -> info("\tEn attente de la fin de $aws_jobID, Ã© dans ".($sleepLastingForAWS/60)." minutes!");
+						sleep($sleepLastingForAWS); # toutes les 5 minutes (*60 = 300)
+					}
+				}
+				
+				# VÃ©rification qu'on est bon, sinon envoi d'un mail pour prÃ©venir
+				if (-e $aws_script_file.".e".$aws_jobID){
+					$logger -> info("\tLe fichier d'erreur pour AWS existe bien!");
+					if (! -z $aws_script_file.".e".$aws_jobID){
+						my $testObjectPrefixe = $checkTest? "[TEST]" : "";
+						$logger -> error("\tLe fichier d'erreur pour AWS n'est pas vide, il a dÃ© se passer quelque chose de louche, Ã© investiguer!" );
+						my $mailRecipients = $checkTest? $mailTEST :'get-plage.bioinfo@genotoul.fr';
+						my $mailContent = "Une erreur est survenue lors de la copie des fichiers SAV vers CEPH avec la commande contenue dans\n${aws_source}${aws_script_file}.\n\n";
+						$mailContent .= "Le fichier d'erreur contient \n".`cat $aws_script_file.e$aws_jobID`;
+						send_and_check_my_email($mailContent, "${$testObjectPrefixe}Erreur sauvegarde SAV sur CEPH", $mailRecipients, $mailRecipients);
+					}else{
+						$logger -> info("\tLe fichier d'erreur pour AWS est vide, j'aime quand un plan se dÃ©roule sans accroc!");
+					}
+				}
+			} else { $logger -> info("Nous sommes en mode test : pas besoin de sauvegarder InterOp"); }
+
+			###############################################################
+			#					CREATION READSETS NGL-Bi
+			###############################################################
+=head1 A_SUPPRIMER
+			if ($runExistsInNGL){
+				# parcours des dossier PipelineLogs_Lane*
+				
+				# recherche du $NGLBiReadsetCreatedFile
+				## Si trouvÃ© : on ne fait rien, les readsets existent deja
+				
+				
+				
+				
+				if (! -e $NGLBiReadsetCreatedFil){
+					# CREATION DES READSETS DANS NGL-BI	#   #   #   #   #   #   #   #   #   #   #
+					$logger -> info("Pas de fichier $NGLBiReadsetCreatedFil dans $raw_data/$dir -> Les readsets ne semblent ne pas exister dans NGL-Bi");
+				}
+			}
+=cut 
+
+			###############################################################
+			#					LANCEMENT DE NEXTFLOW
+			###############################################################
+			# crÃ©ation du dossier dans /work, se dÃ©placer dedans et lancer nextflow
+					
+		} # Fichier de fin de run trouvÃ©
+	} # fin parcours des rÃ©pertoires
+}
+
+###################################################################
+#
+#						FONCTIONS
+#
+###################################################################
+
+sub print2file {
+	my ($content, $file2write) = @_;
+	my $logger = Log::Log4perl -> get_logger('print2file');
+	$logger -> info("\tEcriture du fichier $file2write");
+	open(my $fh, '>', $file2write) or exit 1;
+	print $fh $content;
+	close $fh;
+}
+
+sub check_my_samplesheet{
+	my ($file2check, $file2write) = @_;
+	my $logger = Log::Log4perl -> get_logger('check_my_samplesheet');
+
+	my $isfile2checkwindows;
+	my $isfile2checklinux;
+	
+	$logger -> info("Etude de $file2check");
+	if (-s $file2check){ # $file2check exists and has a non zero size
+		$logger -> info("VÃ©rification des fins de ligne");
+		$isfile2checkwindows = is_my_file_Windows($file2check);
+		$logger -> info("Sortie de is_my_file_Windows : " . $isfile2checkwindows);
+		if ($isfile2checkwindows){
+			$logger -> warn($file2check." a des fins de ligne Windows : on le convertit!");
+			convert_file_2_linux($file2check);
+			my $isfile2checkwindows2 = is_my_file_Windows($file2check);
+			if ($isfile2checkwindows2){
+				$logger -> logdie("La conversion dos2linux n'a pas fonctionnÃ©!");
+			} else {
+				$logger -> info("La conversion dos2linux a fonctionnÃ©!");
+			}
+		}else {
+			$logger -> info("Donc fins de ligne de " . $file2check . " : Linux");
+		}
+		
+		$logger -> info("Etude de $file2write");
+		if(-s $file2write){# $file2write a une taille diffÃ©rente de 0 byte
+			if( $file2write eq $file2check ){#Fichier correct
+				$logger -> info($file2write." est dÃ©jÃ© l'Ã©quivalent de ".$file2check.", on garde!");
+			}else{#Renommer le nouveau fichier CSV $file2write et l'ancien OLD_$file2write
+				chomp($file2check);
+				$logger -> info("Copie de ".$file2write." en OLD_$file2write");
+				cp($file2write,"OLD_$file2write") or $logger -> logdie("Impossible de copier le fichier ".$file2write);
+				$logger -> info("Copie de ".$file2check." en ".$file2write);
+				cp($file2check,$file2write)or $logger -> logdie("Impossible de copier le fichier ".$file2check);
+			}
+		}else{#Si $file2write est vide, on en fait une copie avec le nom correct
+			chomp($file2check);
+			$logger -> info("Copie de ".$file2check." en ".$file2write);
+			cp($file2check,$file2write)or $logger -> logdie("Impossible de copier le fichier ".$file2check);
+		}
+		return 1;
+	}else{
+		$logger -> info("Il n'y a pas de SampleSheet ".$file2check);
+		return 0;
+	}
+}
+
+# RÃ©cupere le code d'expÃ©rience NGL-SQ dans une samplesheet
+sub getNGLSeqExperimentCode{
+	my ($samplesheet) = @_;
+	my $logger = Log::Log4perl -> get_logger('getNGLSeqExperimentCode');
+	my $NGLSQExperimentCode = "";
+	my $experimentName_ligne = `grep "Experiment Name" $samplesheet | head -1` ;  $? and $logger -> logdie("RÃ©cupÃ©ration de 'Experiment Name' dans '".$samplesheet."' en echec" );
+	($NGLSQExperimentCode) = $experimentName_ligne =~ m/Experiment Name,(.+)$/;
+	$logger -> info("NGLSQExperimentCode : ".$NGLSQExperimentCode);
+	$logger -> info("L'expÃ©rience ne sera pas rentrÃ©e dans NGL-Bi car pas de correspondance dans NGL-SQ") if($NGLSQExperimentCode eq '-');
+	$logger -> logdie("Echec de la rÃ©cup du code d'expÃ©rience") if($NGLSQExperimentCode eq "");  
+	return $NGLSQExperimentCode;
+}
+
+# Charge les variables d'environnement du fichier de configuration NGL
+sub loadConfFile{
+	my $logger = Log::Log4perl -> get_logger('loadConfFile');
+	unless ($ENV{CONFFILE}) {
+		$logger -> logdie("$0: Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
+	};
+	my $dbconf_file = $ENV{CONFFILE};
+	unless (-f $dbconf_file) {
+		$logger -> logdie("$0: Database configuration file not exist: $dbconf_file. It's necessary for continue");
+	};
+	open my $handle, '<', $dbconf_file;
+	chomp( my @lines = <$handle> );
+	close $handle;
+	foreach my $line (@lines) {
+		$line =~ s/#.*//o;
+		unless ($line) { next; }
+		if ($line =~ /(.*)=(.*)/o) {
+			my $key = $1;
+			my $value = $2;
+			$key =~ s/^\s*//o;
+			$key =~ s/\s*$//o;
+			$value =~ s/^\s*//o;
+			$value =~ s/\s*$//o;
+			$ENV{$key} = $value;
+		}else {
+			$logger -> logdie("$0: Can't load variable to database configuration file $dbconf_file in line: '$_'");
+		}
+	}
+}
+
+=head2 function is_my_file_Windows
+
+	Title		 : is_my_file_Windows
+	Usage		 : $boolean = is_my_file_Windows($file);
+	Prerequisite : None
+	Function	 : Retourne 0 si les fins de ligne du fichier sont linux, 1 si Windows
+	Returns	     : Nombre
+	Args		 : $file, string
+	Globals	     : none
+
+=cut
+
+sub is_my_file_Windows {
+	my ($file) = @_ ;
+	my $logger = Log::Log4perl -> get_logger('is_my_file_Windows');
+	$logger -> info("Fichier en entrÃ©e : " . $file);
+	my $fileOutput;
+	my $ismyfileWindows = 0;
+	
+	$fileOutput = `file $file`; $? and $logger -> logdie("[Erreur]Lancement de file");
+	chomp($fileOutput);
+	$logger -> info("Message de sortie : " . $fileOutput);
+	if ($fileOutput =~ /with CRLF.* line terminators/){
+		$logger -> info("Le fichier est Windows");
+		$ismyfileWindows = 1;
+	}
+	return $ismyfileWindows;	
+}
+
-- 
GitLab


From 63d01818d9ff10c439b69c47d8a633e50fe9e578 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:00:53 +0100
Subject: [PATCH 42/51] Script for Treatment

	Ref: #28
---
 bin/alignementStatTreatment.pl | 202 +++++++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)
 create mode 100755 bin/alignementStatTreatment.pl

diff --git a/bin/alignementStatTreatment.pl b/bin/alignementStatTreatment.pl
new file mode 100755
index 0000000..54b7f88
--- /dev/null
+++ b/bin/alignementStatTreatment.pl
@@ -0,0 +1,202 @@
+#!/usr/bin/perl -w
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+binmode STDERR, ':encoding(UTF-8)';
+
+=head1 NAME
+
+ alignmentStatTreatment.pl
+ 
+=head1 DESCRIPTION
+
+ Lit les fichiers de sortie d'alignement et ajoute les informations extraites au treatment NGL-Bi
+ 
+=head1 SYNOPSIS
+
+ alignmentStatTreatment.pl --file <path>
+
+=head1 OPTIONS
+
+ --file=s : path to a stat file
+ 
+=head1 EXEMPLES
+
+ perl alignmentStatTreatment.pl --file /path/to/my/file.stat
+
+=head1 AUTHOR
+
+ Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
+ 
+=cut
+
+###################################################################
+#
+#						LIBRAIRIES
+#
+###################################################################
+use strict;
+use Getopt::Long;
+use Log::Log4perl;
+
+##################################################################
+#
+#						INITIALISATION
+#
+##################################################################
+Log::Log4perl -> init('/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/conf/log4perl.conf');
+my $logger = Log::Log4perl->get_logger("MyLog");
+
+my $file = "";
+
+GetOptions( 
+	"file=s" => \$file, 	# path to statistic file
+);
+
+if ($file eq "") {
+	$logger -> warn("USAGE : alignmentStatTreatment.pl --file <STAT_FILE>\n");
+	$logger -> fatal("At least one argument is missing !") and die;
+}
+
+##################################################################
+#
+#							MAIN
+#
+##################################################################
+MAIN:
+{	
+	# Initialisation du hash qui contiendra les info a inserer dans NGL-Bi
+	my %TreatmentProperties = ();
+
+	# DÃ©finitions des regex
+	my $total_regex = '(\d+) .*in total';									# total regexp
+	my $qcfailure_regex = '(\d+ \+ (\d+) in total)|((\d+) QC failure)';		# qcfailure regexp 
+	my $duplicates_regex = '(\d+) .*duplicates';							# duplicates regexp 
+	my $mapped_regex = '(\d+) .*mapped \(([^:]*).*\)';						# mapped regexp   
+	my $paired_regex = '(\d+) .*paired in sequencing';						# paired regexp  	
+	my $read1_regex = '(\d+) .*read1';										# read1 regexp   
+	my $read2_regex = '(\d+) .*read2';										# read2 regexp    
+	my $matemapped_regex = '(\d+) .*with itself and mate mapped';			# matemapped regexp   
+	my $properlypaired_regex = '(\d+) .*properly paired \(([^:]*).*\)';		# properlypaired regexp    
+	my $singletons_regex = '(\d+) .*singletons \(([^:]*).*\)';				# singletons regexp 
+	my $mapch1_regex = '(\d+) .*with mate mapped to a different chr';		# mapch1 regexp
+	my $supplementary_regex = '(\d+).*supplementary';						# supplementary regexp
+    
+    # Lecture du fichier de statistiques
+	open my $openFile, '<', $file; $? and $logger -> fatal("Impossible d'ouvrir le fichier $file") and die;
+	chomp( my @lines = <$openFile> );
+	close $openFile;
+	
+	foreach my $line (@lines) {
+		#$logger -> info("Evaluation de la ligne : ". $line);
+		if ($line =~  qr/$total_regex/) {
+			$TreatmentProperties{"total"} = $1;
+			$logger -> info("total_regex a ete trouvee et vaut : ". $TreatmentProperties{"total"});
+		}
+		if ($line =~  qr/$qcfailure_regex/) {
+			if ($2 ne '') {
+				$TreatmentProperties{"qcfailure"} = $2;
+			} else {
+				$TreatmentProperties{"qcfailure"} = $4;
+			}
+			
+			$logger -> info("qcfailure a ete trouvee et vaut : ". $TreatmentProperties{"qcfailure"});
+		}
+		if ($line =~  qr/$duplicates_regex/) {
+			$TreatmentProperties{"duplicates"} = $1;
+			$logger -> info("duplicates a ete trouvee et vaut : ". $TreatmentProperties{"duplicates"});
+		}
+		if ($line =~  qr/$mapped_regex/) {
+			if (index($line,'primary') != -1) {
+				$TreatmentProperties{"primary_mapped_nb"} = $1;
+				$TreatmentProperties{"primary_mapped_perc"} = $2;
+				$logger -> info("primary_mapped_nb a ete trouvee et vaut : ". $TreatmentProperties{"primary_mapped_nb"});
+				$logger -> info("primary_mapped_perc a ete trouvee et vaut : ". $TreatmentProperties{"primary_mapped_perc"});
+			} else {
+				$TreatmentProperties{"mapped_nb"} = $1;
+				$TreatmentProperties{"mapped_perc"} = $2;
+				$logger -> info("mapped_nb a ete trouvee et vaut : ". $TreatmentProperties{"mapped_nb"});
+				$logger -> info("mapped_perc a ete trouvee et vaut : ". $TreatmentProperties{"mapped_perc"});
+			}
+		}
+		if ($line =~  qr/$paired_regex/) {
+			$TreatmentProperties{"paired"} = $1;
+			$logger -> info("paired a ete trouvee et vaut : ". $TreatmentProperties{"paired"});
+		}
+		if ($line =~  qr/$read1_regex/) {
+			$TreatmentProperties{"read1"} = $1;
+			$logger -> info("read1 a ete trouvee et vaut : ". $TreatmentProperties{"read1"});
+		}
+		if ($line =~  qr/$read2_regex/) {
+			$TreatmentProperties{"read2"} = $1;
+			$logger -> info("read2 a ete trouvee et vaut : ". $TreatmentProperties{"read2"});
+		}
+		if ($line =~  qr/$matemapped_regex/) {
+			$TreatmentProperties{"matemapped"} = $1;
+			$logger -> info("matemapped a ete trouvee et vaut : ". $TreatmentProperties{"matemapped"});
+		}
+		if ($line =~  qr/$properlypaired_regex/) {
+			$TreatmentProperties{"properlypaired_nb"} = $1;
+			$TreatmentProperties{"properlypaired_perc"} = $2;
+			$logger -> info("properlypaired_nb a ete trouvee et vaut : ". $TreatmentProperties{"properlypaired_nb"});
+			$logger -> info("properlypaired_perc a ete trouvee et vaut : ". $TreatmentProperties{"properlypaired_perc"});
+		}
+		if ($line =~  qr/$singletons_regex/) {
+			$TreatmentProperties{"singletons_nb"} = $1;
+			$TreatmentProperties{"singletons_perc"} = $2;
+			$logger -> info("singletons_nb a ete trouvee et vaut : ". $TreatmentProperties{"singletons_nb"});
+			$logger -> info("singletons_perc a ete trouvee et vaut : ". $TreatmentProperties{"singletons_perc"});
+		}
+		if ($line =~  qr/$mapch1_regex/ && index($line,'mapQ') == -1) {
+			$TreatmentProperties{"mapch1"} = $1;
+			$logger -> info("mapch1 a ete trouvee et vaut : ". $TreatmentProperties{"mapch1"});
+		}
+		if ($line =~  qr/$supplementary_regex/) {
+			$TreatmentProperties{"supplementary"} = $1;
+			$logger -> info("supplementary a ete trouvee et vaut : ". $TreatmentProperties{"supplementary"});
+		}
+	}
+
+
+	## Insertion du treatment
+	## TODO
+
+}
+$logger -> info("Fin normale du script.");
+
+##################################################################
+#
+#						FUNCTIONS
+#
+##################################################################
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-- 
GitLab


From 59ae4478a9737c019e5645e563e2aa9a81114be9 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:01:54 +0100
Subject: [PATCH 43/51] Add example for params config file

---
 params.config_example | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 params.config_example

diff --git a/params.config_example b/params.config_example
new file mode 100644
index 0000000..0bd525e
--- /dev/null
+++ b/params.config_example
@@ -0,0 +1,19 @@
+params {
+    inputdir="/home/sbsuser/work/data/NovaSeq/230116_A00318_0372_BHNKY7DRX2_Lane1_1673933427_10x"
+    samplesheet = inputdir+'/SampleSheet.csv'
+    project = 'MAGICs'
+    data=inputdir+'/'+project
+    isMultiplex = true
+    dataNature = 'DNA'
+    //pairedEnd = true
+    splitReads = true
+    referenceGenome = ''
+    addBankForConta = ''
+    runName='Test_10X'
+    sequencer='NovaSeq'
+    run_date='230116'
+    machineID='NOVA'
+    fcID='BHNKY7DRX2'
+    lane='1'
+    demuxUniqueness='1673933427'
+}
\ No newline at end of file
-- 
GitLab


From fbb3d2be3a2b1a9345a069e4176d1a2b7e9a5fe3 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:02:36 +0100
Subject: [PATCH 44/51] Improve sample name filtering in MultiQC

---
 assets/multiqc_config.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index 8a2b597..528c27c 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -19,7 +19,10 @@ thousandsSep_format: " "
 extra_fn_clean_trim:
   - "_filtered"
   - "_unmerged"
+  - "_unmerged_stats"
   - "_flagstat"
+  - "_subset"
+  - "_screen"
 
 ## Plot config
 export_plots: true
-- 
GitLab


From 48050e5088bd2a3aae2cffac591feca2a817e0d8 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:03:06 +0100
Subject: [PATCH 45/51] Make script runnable

---
 bin/createNGLBiReadSets.pl | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 bin/createNGLBiReadSets.pl

diff --git a/bin/createNGLBiReadSets.pl b/bin/createNGLBiReadSets.pl
old mode 100644
new mode 100755
-- 
GitLab


From 4fe217705b44ed4ba44b1503864fdda546a18753 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:04:11 +0100
Subject: [PATCH 46/51] Remove old process

	Ref: #28
---
 modules/local/module_core.nf | 135 +++++++++++++++++------------------
 1 file changed, 67 insertions(+), 68 deletions(-)

diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 6584a87..ca2a7bf 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -1,7 +1,6 @@
-params.outdir=''	// utile ?
-banksForConta = [ ]	// utile ?
-
-//mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1	// utile ?
+/*
+ *	Module pour les analyses de base du pipeline
+*/
 
 process extractInfoForDemuxStats {
 	publishDir path: "${params.outdir}/Demux/Stats" , mode: 'copy'
@@ -85,27 +84,6 @@ process illuminaFilter {
 	
 }
 
-process search_conta_bwa {
-	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
-	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
-	module 'bioinfo/bwa-0.7.17'
-	time { 20.m * task.attempt }
-	memory { 5.GB * task.attempt }
-	
-	input:
-		tuple val(name), path(read)
-		each genomeRef
-		
-	output:
-		tuple val("${name}_${genomeName}"), path("${name}_${genomeName}.sam"), emit: sam
-		
-	script:
-	genomeName=file(genomeRef).simpleName
-	"""
-		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
-	"""
-}
-
 process BWA_ALIGNMENT {
 	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
 	
@@ -126,53 +104,11 @@ process BWA_ALIGNMENT {
 	"""
 }
 
-process search_conta_samtools {
-	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
-	
-	module 'bioinfo/samtools-1.9'
-	time { 10.m * task.attempt }
-	
-	tag " $sample"
-	
-	input:
-		tuple val(name), path("*")
-	
-	output:
-		//tuple val("$name"), path("*")
-		path("*.txt")
-	
-	script:
-	"""
-		samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
-	"""
-}
-
-process search_conta_summary {
-	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
-	
-	time { 10.m * task.attempt }
-	memory '1.GB'
-	
-	tag " $sample"
-	
-	input:
-		//tuple val(name), path("*")
-		path("*")
-		
-	output:
-		path("*.yaml")
-		
-	script:
-	"""
-		contaCounter.pl ./
-	"""
-}
-
-
 process FASTQSCREEN {
 	publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy'
 	
 	module 'bioinfo/FastQ-Screen-0.15.2'
+	time { 1.h * task.attempt }
 	
 	tag " $sample"
 	
@@ -276,3 +212,66 @@ process bcl2fastq {
 		
 	"""
 }
+
+process search_conta_bwa {
+	// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
+	publishDir path: "${params.outdir}/ContaminationSearch/tmp" , mode: 'copy'
+	module 'bioinfo/bwa-0.7.17'
+	time { 20.m * task.attempt }
+	memory { 5.GB * task.attempt }
+	
+	input:
+		tuple val(name), path(read)
+		each genomeRef
+		
+	output:
+		tuple val("${name}_${genomeName}"), path("${name}_${genomeName}.sam"), emit: sam
+		
+	script:
+	genomeName=file(genomeRef).simpleName
+	"""
+		bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
+	"""
+}
+
+process search_conta_samtools {
+	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
+	
+	module 'bioinfo/samtools-1.9'
+	time { 10.m * task.attempt }
+	
+	tag " $sample"
+	
+	input:
+		tuple val(name), path("*")
+	
+	output:
+		//tuple val("$name"), path("*")
+		path("*.txt")
+	
+	script:
+	"""
+		samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
+	"""
+}
+
+process search_conta_summary {
+	publishDir path: "${params.outdir}/ContaminationSearch" , mode: 'copy'
+	
+	time { 10.m * task.attempt }
+	memory '1.GB'
+	
+	tag " $sample"
+	
+	input:
+		//tuple val(name), path("*")
+		path("*")
+		
+	output:
+		path("*.yaml")
+		
+	script:
+	"""
+		contaCounter.pl ./
+	"""
+}
\ No newline at end of file
-- 
GitLab


From 818768eef36045d532d331bc5e5fd288bb9add6f Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:09:25 +0100
Subject: [PATCH 47/51] Cleanning code

---
 conf/prod.config                     |   1 +
 modules/local/module_dna.nf          |   3 +-
 nextflow.config                      |  10 +--
 sub-workflows/local/core_pipeline.nf |  55 +-------------
 sub-workflows/local/dna_qc.nf        |   9 +++
 workflow/illumina_qc.nf              | 106 ++++-----------------------
 6 files changed, 31 insertions(+), 153 deletions(-)

diff --git a/conf/prod.config b/conf/prod.config
index d1e2306..b36b1a7 100644
--- a/conf/prod.config
+++ b/conf/prod.config
@@ -1,3 +1,4 @@
+System.out.println "Chargement des paramÃ¨tres de la config PROD"
 // ========================================
 //				PROCESSES
 //=========================================
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 894b3be..ea95679 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -16,7 +16,6 @@ process BWA_ALIGNMENT { BWA_ALIGNMENT
 		
 	script:
 	"""
-		module list
 		bwa mem ${params.referenceGenome} ${reads} 1> ${sample}.sam 2> ${sample}.log
 	"""
 }
@@ -104,6 +103,8 @@ process QUALIMAP {
 	"""
 }
 
+
+
 /*
 process alignmentQualityStats {
 	publishDir path: "${params.outdir}/alignmentStats/cigar" , mode: 'copy'
diff --git a/nextflow.config b/nextflow.config
index 2fa2203..26777bd 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,16 +1,11 @@
 // ========================================
 //				PARAMS
-//=========================================
+// =========================================
 // Global params
 params {	
 	// PARAMETRE POUR OUTILS
 	// TODO
 
-
-  	// CHECK CONTAMINATION
-  	genomesRefForConta = [ '/work/bank/bwadb/Escherichia_coli_FRIK2069', '/work/bank/bwadb/phi.fa', '/work/bank/bwadb/yeast.nt' ]
-  	addBankForConta = ''		// Ajout ponctuel d'un ou plusieurs genomes
-
 	// OTHERS
 	email="jules.sabban@inrae.fr"
 	email_on_fail="jules.sabban@inrae.fr"
@@ -23,9 +18,8 @@ params {
 	config_profile_description = false	// ??
 	config_profile_contact = false	// ??
 	config_profile_url = false	// ??
-
 }
-System.out.println "Les paramÃ¨tres globaux sont chargÃ©s"
+
 // ========================================
 //				PROFILES
 //=========================================
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index ac469b9..9ac1545 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -25,7 +25,6 @@ include {
 include {
 	prepareReadSetCreation;
 	readsetNGLBiCreation as readsetCreation;
-	checkErrorFromNGLBi as checkError;
 } from "$baseDir/modules/local/module_NGL-Bi.nf"
 
 include { GUNZIP	} from "${params.shared_modules}/gzip.nf"
@@ -40,11 +39,8 @@ isResume=workflow.resume
 
 workflow NGLBi_readsets {
 	/*
-	 * Decoupage samplesheet -> non
 	 * Creation readsets NGL-Bi -> oui !!
 	 * Sauvegarde NextCloud -> non
-	 * Decoupage jFlow ?? -> non a priori
-	 * 
 	 */
 	take:
 		sampleSheet
@@ -59,63 +55,16 @@ workflow NGLBi_readsets {
 }
 
 
-workflow Demultiplexage {
-	 //ecriture du masque
-	 //demux avec bcl2fastq / cellRanger
-	take:
-		SampleSheet
-		RunInfoXML
-		mismatchNumber
-		rawdata_location
-	
-	main:
-		maskMaker(SampleSheet, RunInfoXML)
-		bcl2fastq(SampleSheet,maskMaker.out,mismatchNumber,rawdata_location)
-}
-
-
-/*
-workflow Search_conta {
-	take:
-		ch_read
-		banksForConta
-	
-	main:
-		align(ch_read, banksForConta)
-		filter(align.out.sam)
-		summary(filter.out.collect())
-}
-*/
-
-/*
-workflow Search_conta_debug {
-	take:
-		ch_read
-		banksForConta
-	
-	main:
-		illuminaFilter(ch_read)
-		fastqc(illuminaFilter.out.reads)
-		Search_conta(illuminaFilter.out.reads, banksForConta)
-}
-*/
-
-
-workflow Core {
+workflow CORE {
 	take:
 		ch_sampleSheet
 		//ch_runNGLBiCreated
-		//ch_RunInfoXML
 		ch_DemuxStatXML
 		ch_DemuxSummary
 		ch_read
-		banksForConta
-		//mismatchNumber
-		//rawdata_location
 		
 	main:
-		//NGLBi_readsets(ch_sampleSheet, ch_runNGLBiCreated)
-		//Demultiplexage(ch_sampleSheet, ch_RunInfoXML, mismatchNumber, rawdata_location)	// A voir plus tard !
+		//NGLBi_readsets(ch_sampleSheet, ch_runNGLBiCreated)	// Fait dans NGS_Illumina, Ã  voir plus tard pour le dÃ©placer ici
 		
 		// ----------- DemultiplexStat
 		extractInfoForDemuxStats(ch_sampleSheet)
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index 2b0557c..794f7aa 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -1,3 +1,12 @@
+// -------------------------------------------------
+// 					DNA QC
+// -------------------------------------------------
+/*
+ * QC des donnÃ©es ADN :
+ * 		- Alignement contre gÃ©nome de rÃ©fÃ©rence
+ * 		- Rapport d'alignement avec Qualimap
+*/
+
 // -------------------------------------------------
 // 					MODULES
 // -------------------------------------------------
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 256e725..778ec1e 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -9,12 +9,11 @@ def helpMessage() {
 
     The typical command for running the pipeline is as follows:
 
-    nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
+    nextflow run get-nf/template -profile prod -ansi-log false
 
     Mandatory arguments:
-      --inputdir                    Path to input directory
       -profile                      Configuration profile to use. Can use multiple (comma separated)
-                                    Available: conda, docker, singularity, path, genotoul, test and more.
+                                    Available: prod / dev.
 
     Options:
       --samplesheet                 Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
@@ -45,105 +44,30 @@ if (params.help) {
 }
 
 // -------------------------------------------------
-// 					PARAMS
+// 					CHANNELS
 // -------------------------------------------------
-/*params.sequencer = 'NovaSeq'
-//params.raw_data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
-//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-
-
-
-
-//my_data_miseq=Channel.fromPath('./data_test/20210713_MISEQ_7_BULKDEMUX_JRCVF.csv')
-//my_data_novaseq=Channel.fromPath('./data_test/20210607_NOVASEQ6000_BULKDEMUX_HFMH7DRXY.csv')
-
-
-//ch_ss=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/PipelineLogs_Lane1/20210713_MISEQ_7_IEM_JRCVF_Lane1.csv')
-//ch_ngl=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunNGL-Bi.created')
-//ch_runInfo=Channel.fromPath('/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad/RunInfo.xml')
-//ch_ss=Channel.fromPath('/NovaSeq/data/210722_A00318_0223_BH3GHCDRXY/PipelineLogs_Lane1/20210722_NOVASEQ6000_IEM_H3GHCDRXY_Lane1.csv')
-
-*/
-
-// ------------- Test 10x ------------ //
-/*
-params.sequencer = 'NovaSeq'
-params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'	// In config file
-params.raw_data = ''
-params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/210722_A00318_0223_BH3GHCDRXY_Lane1_1627020907_10x'
-params.isMultiplex = true
-params.chemistry = '10X'
-ch_ss = Channel.fromPath(params.data+'/SampleSheet_global.csv')
-*/
-
-// ------------- Test MiSeq ------------ //
-/*
-params.sequencer = 'MiSeq'
-//params.outdir = '/home/sbsuser/work/Nextflow/wf-illumina-nf/results/211022_M01945_0364_000000000-DB246_rnaseq'	// In config file
-params.raw_data = ''
-params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/211022_M01945_0364_000000000-DB246_rnaseq'
-params.isMultiplex = true
-params.chemistry = 'amplicon'
-*/
-
-/*
-//ch_ss = Channel.fromPath(params.data+'/SampleSheet.csv')
-ch_DemuxStatXML=Channel.fromPath(params.data+'/Stats/DemultiplexingStats.xml')
-ch_DemuxSummary=Channel.fromPath(params.data+'/Stats/DemuxSummaryF1L1.txt')
-ch_read=Channel
-	.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
-	//.fromPath(params.data+'/ROME/B20CG-*_R{1,2}_*.fastq.gz')
-	.map{$it -> [$it.simpleName, $it]}
-	.groupTuple()
-*/
-
-// ------------- Test Amplicon ------------ //
-params.sequencer = 'MiSeq'
-//params.outdir = ''	// In config file
-params.raw_data = ''
-//params.data = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/NovaSeq/211129_A00318_0259_AHNMTTDSX2_Lane1_1638345606_dna'
-//params.isMultiplex = true
-//params.chemistry = 'Default'
-ch_ss = Channel.fromPath(params.samplesheet)		// utilitÃ© d'aprÃ¨s la SS dans un params ??
+ch_ss = Channel.fromPath(params.samplesheet)
 ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt")
 ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml')
-//params.pairedEnd = true
-//params.splitReads = true	// ????
-//params.referenceGenome = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/Quercus_robur/genome/GCA_900291515.1/BWA/GCA_900291515.1_Q_robur_v1_genomic.fna'
+
+// fastq one by one
 ch_read=Channel
 	.fromPath(params.data+'/*_R{1,2}_*.fastq.gz')
 	.map{$it -> [$it.simpleName, $it]}
-	//.fromFilePairs(params.data+'/*_R{1,2}_*.fastq.gz')
-	//.groupTuple()
 
+// fastq paired
+//ch_read_merged=Channel.fromFilePairs(params.data+'/*_R{1,2}_*.fastq.gz')
 
-mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
 
-banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
+mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
+//banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
 
-System.out.println "On y est presque..."
 createDir = file(params.outdir).mkdir()
 
 // -------------------------------------------------
 // 					INCLUDES
 // -------------------------------------------------
-// Mettre ca dans des fichiers de config ??
-/*
-if DNA {
-	include { dna_qc as QC } from "$baseDir/sub-workflows/local/dna_qc.nf"
-}
-if RNA {
-	include { rna_qc as QC } from "$baseDir/sub-workflows/local/rna_qc.nf"
-}
-if amplicon {
-	if taille_insert dans itervalle {
-		include { diversity_qc as QC } from "$baseDir/sub-workflows/local/diversity_qc.nf"
-	} else {
-		include { dna_qc as QC } from "$baseDir/sub-workflows/local/dna_qc.nf"
-	}
-}
-*/
-include { Core as CORE	} from "$baseDir/sub-workflows/local/core_pipeline.nf"
+include { CORE			} from "$baseDir/sub-workflows/local/core_pipeline.nf"
 include { DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
 //include { MULTIQC		} from "$baseDir/modules/local/module_reports.nf"
 include { MULTIQC		} from "${params.shared_modules}/multiqc.nf"
@@ -153,14 +77,14 @@ include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/
 // 					WORKFLOW
 // -------------------------------------------------
 workflow ILLUMINA_QC {
+	WORKFLOW_SUMMARY()
 
-	CORE(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, banksForConta )		/*ch_ngl, ch_runInfo, mismatchNumber,  params.raw_data*/
-
+	CORE(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read)		/*ch_ngl, ch_runInfo, mismatchNumber,  params.raw_data*/
 
-	if (params.chemistry == 'Default') {
+	if (params.dataNature == 'DNA') {
 		DNA_QC(ch_read)
 	} else {
-		System.out.println "Pas de sous-workflow DNA_QC()"
+		System.out.println "Le QC des donnÃ©es non ADN n'est pas prit en charge pour le moment."
 	  }
 
 	// MultiQC
-- 
GitLab


From cce52c2beff7924b3f9a9a9cd3d7f7bf97f141f6 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:10:19 +0100
Subject: [PATCH 48/51] Add scripts for futures QC pipelines

	- diversity QC
	- RNA QC
---
 sub-workflows/local/diversity_qc.nf | 22 ++++++++++++++++++++++
 sub-workflows/local/rna_qc.nf       |  6 ++++++
 2 files changed, 28 insertions(+)

diff --git a/sub-workflows/local/diversity_qc.nf b/sub-workflows/local/diversity_qc.nf
index e69de29..8bc288d 100644
--- a/sub-workflows/local/diversity_qc.nf
+++ b/sub-workflows/local/diversity_qc.nf
@@ -0,0 +1,22 @@
+
+/*
+	pairedEnd merging (FLASH)
+		if analyse 16S AND banque fournie, alors :
+			Assignation on a subset of sequences
+*/
+
+// -------------------------------------------------
+// 					MODULES
+// -------------------------------------------------
+include { 			} from "$baseDir/modules/local/module_diversity.nf"
+
+
+// -------------------------------------------------
+// 					WORKFLOW
+// -------------------------------------------------
+workflow DIVERSITY_QC {
+	take:
+		fastq
+	main:
+
+}
\ No newline at end of file
diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf
index e69de29..fe778d2 100644
--- a/sub-workflows/local/rna_qc.nf
+++ b/sub-workflows/local/rna_qc.nf
@@ -0,0 +1,6 @@
+/*
+	alignementSTAR
+		alignementStat
+		insertSizeDistribution
+
+*/
\ No newline at end of file
-- 
GitLab


From d4422afdf7b3b260659c0047704499f0a917bbff Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:12:00 +0100
Subject: [PATCH 49/51] Add scripts for DTM

---
 bin/DTM/circlize_v2.R    | 114 +++++++++++++++++++++++++++++++++++++++
 bin/DTM/make_bedgraph.sh | 100 ++++++++++++++++++++++++++++++++++
 2 files changed, 214 insertions(+)
 create mode 100644 bin/DTM/circlize_v2.R
 create mode 100644 bin/DTM/make_bedgraph.sh

diff --git a/bin/DTM/circlize_v2.R b/bin/DTM/circlize_v2.R
new file mode 100644
index 0000000..f4c9d69
--- /dev/null
+++ b/bin/DTM/circlize_v2.R
@@ -0,0 +1,114 @@
+#!/usr/bin/env Rscript
+
+#install.packages("circlize",repos = "http://cran.us.r-project.org")
+#BiocManager::install("rtracklayer")
+#BiocManager::install("ComplexHeatmap")
+library(rtracklayer)
+library(circlize)
+library(ComplexHeatmap)
+
+# Args
+args <- commandArgs(trailingOnly=TRUE)
+# test if there are two arguments: if not, return an error
+if (length(args) != 2) {
+	stop("Exactly two arguments must be supplied in the following order:
+	\n 1. an integer for chunk_size /!\\ too small (10) will take forever, too big (1000000) will cause clustering err: 10000 or 100000 recommended 
+	\n 2. followed by all input.bedgraph files separated by commas and NO spaces
+	\n ex: circlize_v2.R 100000 filtered_Sdomesticus6.bedgraph,filtered_Sdomesticus4.bedgraph", call.=FALSE)
+} else if (length(args) == 2) {
+	chunk_size <- as.numeric(args[1])
+	list_bedgraphs <- strsplit(args[2], ", ")[[1]]
+}
+
+# Initialize empty matrix to plot. Each column will hold chunked data from one sample.
+cov_matrix <- c()
+loop <- 1 # loop counter
+
+for (bedgraph in list_bedgraphs){
+	# Import bedgraph generated with -bga
+	print(paste0("Loading bedgraph ", bedgraph))
+	BedFile <- rtracklayer::import(bedgraph, format = "bed")
+	print(paste0("Loaded. Binning data by ", chunk_size, "bp intervals"))
+
+	# Extract coverage values and weigh by width
+	coverage_points <- as.numeric(BedFile@elementMetadata@listData[["name"]])*as.numeric(BedFile@ranges@width)
+
+	# Reduce data
+	pos_start=BedFile@ranges@start # extract start positions from bed object
+	chr <- 0 # chromosome counter 
+	c <- 0 # chunk counter
+	# chunk_size=10000 #10k, 100k... defined in args
+	chunks <- c() # position of [chunk_size]th element in coverage_points vector 
+	chr_factors <- c() # reduced vector of chromosomes to use as split factors (same size as chunks)
+
+	for (i in 1:length(pos_start)){
+		val <- pos_start[i]
+		if(val == 1){
+			c <- 0 # reset count
+			chr <- chr+1 # next chromosome
+		}
+		if (val > chunk_size * c){
+			c <- c+1 # next chunk (10k, 20k, 30k...)
+			chr_factors <- c(chr_factors, toString(BedFile@seqnames@values[chr])) # save corresponding chr
+			chunks <- c(chunks, i-1) # save coordinate  
+		}
+	}  
+
+	# Calcualte averages of each chunk
+	values_avg <- c()  
+	for (i in 1:(length(chunks)-1)){ # i starts at 1
+		start <-chunks[i]+1
+		x <- i+1
+		end <- chunks[x]
+		diff <- (pos_start[end]-pos_start[start])+1
+		if (diff==0){ # If only one line in chunk 
+		  diff= as.numeric(BedFile@ranges@width)[start]
+		}
+		values_avg <- c(values_avg, sum(coverage_points[start:end])/diff)  
+	}
+	# Example: verify second value in bash with 
+	#head -n 74952 bga_zeros_scaled_doublefiltered_Sdomesticus6_S6_L001_R1_001_subset_unmerged.bedgraph | tail -n 35055 |  awk -F'\t' '{ sum += $4*($3-$2); n++ } END { if (n > 0) print sum / n; }'
+
+	# Append to matrix
+	cov_matrix <- cbind(cov_matrix, values_avg) 
+	colnames(cov_matrix)[loop] <- basename(bedgraph)
+	loop <- loop+1
+}
+
+# Order of samples
+print(paste0("Samples plot order (ext->int) ", colnames(cov_matrix)))
+
+# Plot
+print("Generating graph")
+bed_min <- 0
+bed_med <- median(cov_matrix)
+#nintyninth_percentile <- floor(length(values_avg)*0.01) # Index of top 1 percent of sorted points
+#bed_max <- head(sort(cov_matrix,decreasing=TRUE),n=nintyninth_percentile)[nintyninth_percentile] # largest of 99% to avoid outliers
+bed_max <- max(values_avg)
+col_fun <- colorRamp2(c(bed_min, bed_med, bed_max), c("blue","gray85", "red"))
+split <- factor(chr_factors, levels = BedFile@seqnames@values)
+
+# Reduce track width (default=0.2) if multiple samples
+circos.clear()
+circos.par(RESET = TRUE)
+if((ncol(cov_matrix)>1) & (ncol(cov_matrix)<=4)){
+  circos.par("track.height" = 0.1)
+} else if(ncol(cov_matrix)>4){
+  circos.par("track.height" = 0.05)
+}
+
+filename <- paste(basename(bedgraph), ".jpeg", sep="")
+jpeg(file=filename, units="in", width=5, height=5, res=150, pointsize = 8)
+
+# Very important that we do not cluster to not change the order!
+for(i in 1:ncol(cov_matrix)) {       # for-loop over columns (samples)
+	circos.heatmap(cov_matrix[,i], col=col_fun, split=split, cluster = FALSE, show.sector.labels = TRUE)
+}
+circos.clear()
+
+legend_title <- paste("Genome coverage (normalized RMP)\n", chunk_size, "bp resolution\n")
+lgd_heat <- Legend(title = legend_title, col_fun = col_fun,
+                   labels_gp = gpar(fontsize = 6), title_gp = gpar(fontsize = 8), grid_width = unit(0.25, "cm")) 
+grid.draw(lgd_heat)
+
+dev.off()
diff --git a/bin/DTM/make_bedgraph.sh b/bin/DTM/make_bedgraph.sh
new file mode 100644
index 0000000..1eab891
--- /dev/null
+++ b/bin/DTM/make_bedgraph.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#SBATCH --mail-user=jules.sabban@inrae.fr
+#SBATCH --mail-type=BEGIN,END,FAIL
+#SBATCH -p wflowq
+#SBATCH -t 4-00
+#SBATCH --mem-per-cpu=12G
+#SBATCH -e %x_%j.err
+#SBATCH -o %x_%j.log
+
+#### USAGE ###
+<< usageMessage
+USAGE : sbatch -J make_bedgraph_bacterium --array=1-6 make_bedgraph.sh <bam_fodler> <names_of_chromosomes_file> <chrom_pattern_to_remove>
+EXAMPLE : sbatch -J make_bedgraph_pic --array=1-6make_bedgraph.sh ../samtools ../chrom_names "JANXI\|CM"
+
+<chrom_pattern_to_remove> is mandatory, but can be a void string
+usageMessage
+
+#### ARGUMENT ####
+I_DIR=$1 # path to samtools outputs
+I_NAMES=$2	# path to chrom_names file
+R_PATTERN=$3	# chr pattern to remove from bedgraph file
+
+#### MODULES ####
+module load bioinfo/samtools-1.16.1
+module load bioinfo/bedtools-2.27.1
+
+
+
+replace_chr_names() {
+	# replace chr names
+	echo -e "Replace chr names"
+	SAMTOOLS_CMD="samtools view -H ${BAM_PATH} |"
+	while read LINE
+	do
+		read -r OLD	NEW <<< $(echo -e $LINE)
+		SAMTOOLS_CMD+=" sed -e 's/SN:${OLD}/SN:${NEW}/' |"
+	done < $I_NAMES
+
+	SAMTOOLS_CMD+=" samtools reheader - $BAM_PATH > filtered_${S_NAME}.bam"
+	# note the - is on purpose, -c adds chr in front
+	sh -c "$SAMTOOLS_CMD"
+}
+
+
+
+#samtools index chr_${S_NAME}.bam
+#cp chr_${S_NAME}.bam filtered_${S_NAME}.bam
+
+# filter out unplaced contigs
+#samtools view  chr_${S_NAME}.bam `seq 1 18` X Y -b > filtered_${S_NAME}.bam
+
+index_bam(){
+	echo -e "Indexing filtered BAM"
+	samtools index filtered_${S_NAME}.bam
+}
+
+
+# no longer need intermediary chr renamed bam/bai
+#rm chr_${S_NAME}.bam chr_${S_NAME}.bam.bai
+
+make_bedgraph(){
+	# Scale factor reads per million (of total reads or chr mapped reads)
+	scale=`bc <<< "scale=6;1000000/$(samtools view -f 0 -c filtered_${S_NAME}.bam)"`
+	#0.000808
+	echo -e "Scaling factor ${scale}. On to bedgraph generation"
+
+	# bedgraph
+	bedtools genomecov -ibam filtered_${S_NAME}.bam -bga -scale ${scale} > zeros_scaled_${S_NAME}.bedgraph
+}
+
+remove_unwanted_scaffold(){
+	# Even though bam was filtered, still have 0 values for unplaced scaffolds...remove non numeric or X/Y chromosomes
+	if [[ ! -z $R_PATTERN  ]]
+	then
+		grep -v $R_PATTERN zeros_scaled_${S_NAME}.bedgraph > zeros_scaled_filtered_${S_NAME}.bedgraph
+		rm zeros_scaled_${S_NAME}.bedgraph
+	else
+		mv zeros_scaled_${S_NAME}.bedgraph zeros_scaled_filtered_${S_NAME}.bedgraph
+	fi
+}
+
+
+
+main() {
+	BAM=$(find $I_DIR -type f -name '*R1*unmerged.bam' -execdir basename '{}' ';'|sed -n ${SLURM_ARRAY_TASK_ID}p)
+	echo -e "Traitement de ${BAM}"
+	BAM_PATH="${I_DIR}/${BAM}"
+
+	S_NAME=$(basename $BAM .bam)
+
+	replace_chr_names
+
+	index_bam
+
+	make_bedgraph
+
+	remove_unwanted_scaffold
+}
+
+main
\ No newline at end of file
-- 
GitLab


From 20b73046b1fde12b62d43dac12ece21e53002c63 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:23:13 +0100
Subject: [PATCH 50/51] Move example config files

---
 assets/{fastq_screen.conf => fastq_screen.conf_example} | 0
 params.config_example => assets/params.config_example   | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename assets/{fastq_screen.conf => fastq_screen.conf_example} (100%)
 rename params.config_example => assets/params.config_example (100%)

diff --git a/assets/fastq_screen.conf b/assets/fastq_screen.conf_example
similarity index 100%
rename from assets/fastq_screen.conf
rename to assets/fastq_screen.conf_example
diff --git a/params.config_example b/assets/params.config_example
similarity index 100%
rename from params.config_example
rename to assets/params.config_example
-- 
GitLab


From 50884d69282d372b08187a1c87046d72117f70f4 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 31 Jan 2023 17:30:21 +0100
Subject: [PATCH 51/51] Begin the README.md writing

---
 README.md | 92 ++++++++++---------------------------------------------
 1 file changed, 17 insertions(+), 75 deletions(-)

diff --git a/README.md b/README.md
index 9f44619..33b4fbf 100644
--- a/README.md
+++ b/README.md
@@ -4,78 +4,20 @@
 [![pipeline status](https://forgemia.inra.fr/get-nextflow-ngl-bi/template-nf/badges/master/pipeline.svg)](https://forgemia.inra.fr/get-nextflow-ngl-bi/template-nf//-/commits/master)
 
 
-# Ce repository est un template pour les workflows Get
-
-Ce workflow et ses diffÃ©rentes configurations permettent :
-- d'executer un pipeline a partir d'un fichier samples.csv
-- d'utiliser une image singularity ou conda ou path (cf profils)
-- d'executer un multiqc 
-- de tracer les versions des logiciels
-- d'envoyer un email Ã  la fin du pipeline --email toto@fai.fr
-- de gÃ©nÃ©rer automatiquement une image singularity et de la mettre a disposition dans le registry de la forge.
-
-## Comment utiliser ce rÃ©pository ?
-
-Cloner le repo
-```
-git clone git@forgemia.inra.fr:get-nextflow-ngl-bi/template-nf.git
-```
-
-Voici la liste des fichiers a rÃ©cupÃ©rer avec leur utilitÃ© :
-- `asset` code pour email et config de multiQC
-- `conf` configurations utilisÃ©es dans `nextflow.config`    
-    - base : conf gÃ©nÃ©rale
-    - path : si profile utilisÃ© est --multipath ajouter un block par process ayant des dÃ©pendances
-    - test : chaque pipeline devra avoir un profil de test pour tester les pipelines
-    - genomes : devra peut-etre etre centralisÃ© ailleurs pour avoir un seul fichier contenant les genomes utilisÃ©s par la pf.
-
-- `doc/output.md` : ce fichier devra etre copiÃ© et modifiÃ© avec la description des outputs du pipeline. Ce fichier est ensuite converti en html dans le repertoires de resultats du pipelines.
-
-- `.gitlab-ci.yml` si vous souhaitez avoir la gÃ©nÃ©ration automatique de l'image singularity Ã  partir des fichiers `Singularityfile` et `environment.yml` mettez ce fichier Ã  la racine de votre projet. L'image sera ensuite recupÃ©rable avec la commande suivante :
-```
-singularity pull template-nf.sif oras://registry.forgemia.inra.fr/get-nextflow-ngl-bi/template-nf/template-nf:latest
-```
-
-- les fichiers `CHANGELOG.md`, `LICENCE`, `README.md` a utiliser et modifier
-
-- `main.nf` : le pipeline
-- `nextflow.config` : la conf gÃ©nÃ©rale du pipeline
-- pour le reproductibilitÃ© : `Singularityfile` et `environment.yml` (si besoin en plus: `Dockerfile`)
-
-## Et apres ?
-- nomenclature: les channels doivent etre nommÃ©e comme suis: ch_FILE1_for_PROCESS_DESTINATION
-- mettre en place des donnÃ©es de tests
-- lorsque l'on code un process : 
-    - utiliser les labels (pour la memoire, cpu, temps) dÃ©finis dans base.config
-    - ajouter les logiciels utilisÃ©s dans get_software_versions
-- documenter le quick start ci-dessous et supprimer le paragraphe 'Ce repository est un template pour les workflows Get'
-- completer le `doc/output.md` et le `doc/usage.md`
-- tagger un pipeline dÃ¨s que les fonctionnalitÃ©s attendues sont codÃ©es
-
-
-
-> La documentation suivante est a modifier et a garder. La precedente est a supprimer. 
-
-## Introduction
-
-The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker and singularity containers making installation trivial and results highly reproducible.
-
-## Quick Start
-
-i. Install [`nextflow`](https://nf-co.re/usage/installation)
-
-ii. Install one of [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html)
-
-iii. Clone the pipeline and download the singularity pipeline 
-
-```bash
-git clone git@forgemia.inra.fr:get-nextflow-ngl-bi/template-nf.git
-cd template-nf
-singularity pull template-nf.sif oras://registry.forgemia.inra.fr/get-nextflow-ngl-bi/template-nf/template-nf:latest
-```
-iv. Run the pipeline
-
-```bash
-nextflow run pathto/template-nf/main.nf -profile test,singularity
-```
-
+# The wf-illumina-nf pipeline
+This pipeline performes the QC of data from Illumina sequencers.  
+
+## How tu use it ?
+The pipeline begin after the NGS_Illumina pipeline, which, at the end performes the demultiplexing of raw data. In the output directory of demultiplexing, five elements are needed :
+- one fastq files folder per project
+- the SampleSheet.csv
+- the nextflow outputs folder
+- the params.config file
+- the fastqScreen configration file
+
+An example of the params.config and fastqScreen are available in the assets folder.
+
+Example of a basic command line the launch the pipeline is (from the nextflow folder) :  
+```bash 
+sbatch -J nf-illumina_BHNKY7DRX2_1 -p wflowq -t 3-00 --mem 5GB --wrap="module load bioinfo/Nextflow-v21.04.1; cd /home/sbsuser/work/data/NovaSeq/230116_A00318_0372_BHNKY7DRX2_Lane1_1673933427_10x/nextflow; nextflow run /work/sbsuser/test/jules/VisualStudioSources/wf-illumina-nf/main.nf -profile prod -ansi-log false"
+```
\ No newline at end of file
-- 
GitLab