| Starting Database | 	 IMGT database but with IGHV3-7*02 extended by two bases (GA), and with the IGHV3-7*02 with a 3’-end that reads GCGAGGGA, (i.e. two variants of IGHV3-7*02)  
	(see Notes regarding other databases used but whose results are not reported)  | 
| Settings | 	 	 
		- Type of sequences: Choose ‘Ig’ or ‘TCR’.
 
	 
# 
sequence_type: Ig
			 
			- Barcoding settings
 
		 
		 
		- If you have a random barcode sequence (unique molecular identifier) at the 5’ end,
 
		- set this to its length. Leave at 0 when you have no 5’ barcode.
 
	 
# 
barcode_length_5prime: 0
		 
		- Same as above, but for the 3’ end of the sequence. Leave at 0 when you have no 3’ barcode.
 
		- Currently, you cannot have a barcode in both ends, so at least one of the two settings
 
		- must be zero.
 
	 
# 
barcode_length_3prime: 0
		 
		- When barcoding is enabled, sequences that have identical barcode and CDR3 are
 
		- collapsed into a single consensus sequence.
 
		- If you set this to false, no collapsing and consensus taking is done and
 
		- only the barcode is removed from each sequence.
 
	 
# 
barcode_consensus: true
		 
		- When grouping by barcode and CDR3, the CDR3 location is either detected with a
 
		- regular expressions or a ‘pseudo’ CDR3 sequence is used, which is at a
 
		- pre-defined position within the sequence.
 
	 
#
	
		- Set this configuration option to a region like [-80, -60] to use a pseudo
 
		- CDR3 located at bases 80 to 60 counted from the 3’ end. (Use negative numbers to
 
		- count from the 3’ end, positive ones to count from the 5’ end. The most 5’
 
		- base has index 0.)
 
	 
#
	
		- Set this to ‘detect’ (with quotation marks) in order to use CDR3s
 
		- detected by regular expression. This assumes that the input contains
 
		- VH sequences!
 
	 
#
	
		- Set this to false (no quotation marks) in order to only group by barcode, not by CDR3.
 
	 
# 
cdr3_location: ‘detect’  # Works only with VH sequences!
		 
		- When you use a RACE protocol, then the sequences have a run of G nucleotides in the beginning
 
		- which need to be removed when barcodes are used. If you use RACE, set this to true.
 
		- The G nucleotides are assumed to be in the 5’ end (but after the barcode if it exists).
 
	 
# 
race_g: false
			 
			- Primer-related settings
 
		 
		 
		- If set to true, it is assumed that the forward primer is always at the 5’ end
 
		- of the first read and that the reverse primer is always at the 5’ end of the
 
		- second read. If it can also be the other way, set this to false.
 
		- This setting has no effect if no primer sequences are defined below.
 
	 
# 
stranded: false
	
		- List of 5’ primers
 
	 
# 
forward_primers:
	
		- - AGCTACAGAGTCCCAGGTCCA
 
		- - ACAGGYGCCCACTCYSAG
 
		- - TTGCTMTTTTAARAGGTGTCCAGTGTG
 
		- - CTCCCAGATGGGTCCTGTC
 
		- - ACCGTCCYGGGTCTTGTC
 
		- - CTGTTCTCCAAGGGWGTCTSTG
 
		- - CATGGGGTGTCCTGTCACA
 
	 
	
		- List of 3’ primers
 
	 
# 
reverse_primers:
	
		- - GCAGGCCTTTTTGGCCNNNNNGCCGATGGGCCCTTGGTGGAGGCTGA  # IgG
 
		- - GCAGGCCTTTTTGGCCNNNNNGGGGCATTCTCACAGGAGACGAGGGGGAAAAG  # IgM
 
	 
		 
		- Work only on this number of reads (for quick test runs). Set to false to
 
		- process all reads.
 
	 
#
#limit: false
		 
		- Filter out merged reads that are shorter than this length.
 
	 
# 
minimum_merged_read_length: 300
		 
		- Read merging program. Choose either ‘pear’ or ‘flash’.
 
		- pear merges more reads, but is slower.
 
	 
#
#merge_program: pear
		 
		- Maximum overlap (-M) for the flash read merger.
 
		- If you use pear, this is ignored.
 
	 
# 
flash_maximum_overlap: 300
		 
		- Do not mention the original FASTA or FASTQ sequence names in the
 
		- assigned.tab files, but instead use names _seq,
 
		- where  is a running number starting at 1.
 
	 
#  true: yes, rename
#  false: no, do not rename
# 
rename: true
		 
		- Whether debugging is enabled or not. Currently, if this is set to true,
 
		- some large intermediate files that would otherwise be deleted will be
 
		- kept.
 
	 
# 
debug: false
		 
		- The “seed value” is in arbitrary number used to get reproducible
 
		- runs. Two runs that use the same software version, the same seed
 
		- and otherwise the same configuration will give identical results.
 
	 
#
	
		- Set this to false in order to use a different seed each run.
 
		- The results will then be not exactly reproducible.
 
	 
# 
seed: 1
		 
		- The preprocessing filter is always applied directly after running IgBLAST,
 
		- even if no gene discovery is requested.
 
	 
# 
preprocessing_filter:
  v_coverage: 90   # Match must cover V gene by at least this percentage
  j_coverage: 60   # Match must cover J gene by at least this percentage
  v_evalue: 0.001  # Highest allowed V gene match E-value
			 
			- Candidate discovery settings
 
		 
		 
		- When discovering new V genes, ignore whether a J gene has been assigned
 
		- and also ignore its %SHM.
 
	 
#  true: yes, ignore the J
#  false: do not ignore J assignment, do not ignore its %SHM
# 
ignore_j: false
		 
		- When clustering sequences to discover new genes, subsample to this number of
 
		- sequences. Higher is slower.
 
	 
# 
subsample: 1000
		 
		- When computing the Ds_exact column, consider only D hits that
 
		- cover the reference D gene sequence by at least this percentage.
 
	 
# 
d_coverage: 70
			 
			- V candidate filtering (germline filtering) settings
 
		 
		 
		- Filtering criteria applied to candidate sequences in all iterations except the last.
 
	 
# 
pre_germline_filter:
  unique_cdr3s: 2            # Minimum number of unique CDR3s (within exact matches)
  unique_js: 2               # Minimum number of unique J genes (within exact matches)
  whitelist: true            # Add database sequences to the whitelist
  cluster_size: 0            # Minimum number of sequences assigned to cluster
  differences: 0             # Merge sequences if they have at most this number of differences
  allow_stop: true           # Whether to allow non-productive sequences containing stop codons
  cross_mapping_ratio: 0.02  # Threshold for removal of cross-mapping artifacts (set to 0 to disable)
  clonotype_ratio: 0.12      # Required minimum ratio of clonotype counts between alleles of the same gene
  exact_ratio: 0.12          # Required minimum ratio of “exact” counts between alleles of the same gene
  unique_d_ratio: 0.3        # Minimum Ds_exact ratio between alleles
  unique_d_threshold: 10     # Check Ds_exact ratio only if highest-expressed allele has at least this Ds_exact count
		 
		- Filtering criteria applied to candidate sequences in the last iteration.
 
		- These should be more strict than the pre_germline_filter criteria.
 
	 
# 
germline_filter:
  unique_cdr3s: 5            # Minimum number of unique CDR3s (within exact matches)
  unique_js: 3               # Minimum number of unique J genes (within exact matches)
  whitelist: true            # Add database sequences to the whitelist
  cluster_size: 100          # Minimum number of sequences assigned to cluster
  differences: 0             # Merge sequences if they have at most this number of differences
  allow_stop: false          # Whether to allow non-productive sequences containing stop codons
  cross_mapping_ratio: 0.02  # Threshold for removal of cross-mapping artifacts (set to 0 to disable)
  clonotype_ratio: 0.12      # Required minimum ratio of clonotype counts between alleles of the same gene
  exact_ratio: 0.12          # Required minimum ratio of “exact” counts between alleles of the same gene
  unique_d_ratio: 0.3        # Minimum Ds_exact ratio between alleles
  unique_d_threshold: 10     # Check Ds_exact ratio only if highest-expressed allele has at least this Ds_exact count
			 
			- J discovery settings
 
		 
	j_discovery:
  allele_ratio: 0.2         # Required minimum ratio between alleles of a single gene
  cross_mapping_ratio: 0.1  # Threshold for removal of cross-mapping artifacts.
  propagate: true           # Use J genes discovered in iteration 1 in subsequent ones  |