diff --git a/.gitignore b/.gitignore index e86d99b..7352309 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__/ +.coverage decombinator.egg-info .pytest_cache .decombinatorenv diff --git a/src/decombinator/collapse.py b/src/decombinator/collapse.py index a4052e6..9be7f01 100644 --- a/src/decombinator/collapse.py +++ b/src/decombinator/collapse.py @@ -351,11 +351,11 @@ def check_umi_quality(qualstring: tuple[str], parameters: list[int]) -> bool: return number_below_min > parameters[1] or average_quality < parameters[2] -def are_seqs_equivalent(seq1, seq2, lev_percent_threshold): +def are_seqs_equivalent(seq1: str, seq2: str, lev_threshold_fraction: float): # Returns True if seqs can be considered the same, False otherwise # Definition of equivalent: - # levenshtein distance as a percentage of the shorter of the two seqs is <= threshold - threshold = len(min(seq1, seq2, key=len)) * lev_percent_threshold + # levenshtein distance as a fraction of the shorter of the two seqs is <= threshold + threshold = len(min(seq1, seq2, key=len)) * lev_threshold_fraction return polyleven.levenshtein(seq1, seq2) <= threshold @@ -482,7 +482,7 @@ def read_in_data( data, inputargs, barcode_quality_parameters, - lev_threshold, + lev_threshold_fraction, dont_count, opener, ): @@ -566,7 +566,7 @@ def read_in_data( if not inputargs["sampling_analysis"]: dcretc = "|".join([str(dcr), seq, seq_qualstring, seq_id]) else: - pre_collapse_barcode = line[8] + full_barcode_region = line[8] v_tail = line[10] dcretc = "|".join( [ @@ -574,7 +574,9 @@ def read_in_data( seq, seq_qualstring, seq_id, - pre_collapse_barcode, + barcode, + barcode_qualstring, + full_barcode_region, v_tail, ] ) @@ -590,54 +592,80 @@ def read_in_data( if barcode in barcode_lookup: - for index in barcode_lookup[barcode]: - if are_seqs_equivalent(index[1], seq, lev_threshold): + for barcode_index, barcode_protoseq in barcode_lookup[barcode]: + if are_seqs_equivalent( + barcode_protoseq, seq, lev_threshold_fraction + ): barcode_dcretc[ - barcode + "|" + str(index[0]) + "|" + index[1] + barcode + + "|" + + str(barcode_index) + + "|" + + barcode_protoseq ].append(dcretc) protodcretc_list = barcode_dcretc[ - barcode + "|" + str(index[0]) + "|" + index[1] + barcode + + "|" + + str(barcode_index) + + "|" + + barcode_protoseq ] seq_counter = coll.Counter( map(lambda x: x.split("|")[1], protodcretc_list) ) - protoseq = seq_counter.most_common(1)[0][ + new_protoseq = seq_counter.most_common(1)[0][ 0 ] # find most common sequence in group - if not index[1] == protoseq: + if barcode_protoseq != new_protoseq: # if there is a new protoseq, replace record with old protoseq # with identical record with updated protoseq barcode_dcretc[ - barcode + "|" + str(index[0]) + "|" + protoseq + barcode + + "|" + + str(barcode_index) + + "|" + + new_protoseq ] = barcode_dcretc[ - barcode + "|" + str(index[0]) + "|" + index[1] + barcode + + "|" + + str(barcode_index) + + "|" + + new_protoseq ] del barcode_dcretc[ - barcode + "|" + str(index[0]) + "|" + index[1] + barcode + + "|" + + str(barcode_index) + + "|" + + new_protoseq ] - barcode_lookup[barcode][index[0]] = [ - index[0], - protoseq, + barcode_lookup[barcode][barcode_index] = [ + barcode_index, + new_protoseq, ] - group_assigned = True - # if assigned to a group, stop and move onto next read - break + group_assigned = True + # if assigned to a group, stop and move onto next read + break if not group_assigned: # if no appropriate group found, create new group with correctly incremented index - barcode_lookup[barcode].append([index[0] + 1, seq]) + new_index = len(barcode_lookup[barcode]) + barcode_lookup[barcode].append([new_index, seq]) barcode_dcretc[ - "|".join([barcode, str(index[0] + 1), seq]) + "|".join([barcode, str(new_index), seq]) ].append(dcretc) group_assigned = True else: # if no identical barcode found, create new barcode group with index zero - barcode_lookup[barcode].append([0, seq]) - barcode_dcretc["|".join([barcode, "0", seq])].append(dcretc) + first_index = 0 + barcode_lookup[barcode].append([first_index, seq]) + barcode_dcretc["|".join([barcode, str(first_index), seq])].append( + dcretc + ) group_assigned = True counts["readdata_barcode_dcretc_keys"] = len(barcode_dcretc.keys()) @@ -712,7 +740,7 @@ def make_merge_groups( def make_clusters( merge_groups: sparse.coo_matrix, barcode_dcretc_list: list[tuple[str, list[str]]], - seq_threshold: int, + lev_threshold_fraction: float, ) -> coll.defaultdict[str, list[str]]: # Considers clusters as an undirected graph composed of disconnected subgraphs. # The nodes of the graph are the initial groups of barcode/protosequences. Edges between nodes @@ -728,15 +756,13 @@ def make_clusters( # initialise empty graph G = nx.Graph() - percent_seq_threshold = seq_threshold / 100.0 - for i, j in zip(merge_groups.row, merge_groups.col): protoseqs = [ barcode_dcretc_list[i][0].split("|")[2], barcode_dcretc_list[j][0].split("|")[2], ] if are_seqs_equivalent( - protoseqs[0], protoseqs[1], percent_seq_threshold + protoseqs[0], protoseqs[1], lev_threshold_fraction ): G.add_edge(i, j) n_merged_UMIs += 1 @@ -769,33 +795,48 @@ def make_clusters( return clusters -def write_clusters(clusters): - # create directory to store cluster data without overwriting exiting directories - dirname = "clusters" +def write_clusters( + clusters: coll.defaultdict[str, list[str]], + inputargs: dict[str, typing.Union[str, bool, int]], +) -> None: + # create file to store cluster data without overwriting exiting files + chain: str = inputargs["chain"] + filename = "clusters_" + chain + ftype = ".psv.gz" count = 1 - while os.path.isdir(dirname): - dirname = "clusters" + str(count) + while os.path.isfile(filename + ftype): + filename = filename + str(count) count += 1 - os.mkdir(dirname) + filename = filename + ftype print( - " Writing clusters to directory: ", os.path.abspath(dirname), "..." + " Writing clusters to directory: ", os.path.abspath(filename), "..." ) - # write data of each cluster to a separate file and store in clusters directory - for k in clusters: - with open( - dirname + os.sep + "|".join(k.split("|")[:2]) + ".txt", "w" - ) as ofile: - for j in clusters[k]: - print(j, file=ofile) - return 1 + header_names = [ + "umi_id", + "dcr", + "inter_tag", + "inter_tag_qual", + "read_id", + "umi", + "umi_qual", + "full_oligo", + "v_tail", + ] + # write data of each cluster to file + with gzip.open(filename, "wt") as cluster_file: + cluster_file.write("|".join(header_names) + "\n") + for cluster_id, dcretc_list in clusters.items(): + cluster_name = ":".join(cluster_id.split("|")[:2]) + for dcretc in dcretc_list: + cluster_file.write(cluster_name + "|" + dcretc + "\n") def cluster_UMIs( barcode_dcretc: coll.defaultdict[str, list[str]], inputargs: dict[str, typing.Union[str, bool, int]], barcode_threshold: int, - seq_threshold: int, + lev_threshold_fraction: float, dont_count: bool, ) -> coll.defaultdict[str, list[str]]: # input data of form: {'barcode1|index|protoseq': [dcretc1, dcretc2,...], 'barcode2|index|protoseq|: [dcretc1, dcretc2,...], ...} @@ -818,7 +859,9 @@ def cluster_UMIs( ) print(" ", "comparing TCR sequences of similar UMIs...") - clusters = make_clusters(matches, barcode_dcretc_list, seq_threshold) + clusters = make_clusters( + matches, barcode_dcretc_list, lev_threshold_fraction + ) print( " ", @@ -832,7 +875,7 @@ def cluster_UMIs( # dump clusters to separate files if desired if inputargs["writeclusters"]: - write_clusters(clusters) + write_clusters(clusters, inputargs) return clusters @@ -841,7 +884,7 @@ def collapsinate( data, inputargs, barcode_quality_parameters, - lev_threshold, + lev_threshold_fraction, barcode_distance_threshold, outpath, file_id, @@ -854,7 +897,7 @@ def collapsinate( data, inputargs, barcode_quality_parameters, - lev_threshold, + lev_threshold_fraction, dont_count, opener, ) @@ -864,7 +907,7 @@ def collapsinate( barcode_dcretc, inputargs, barcode_distance_threshold, - lev_threshold, + lev_threshold_fraction, dont_count, ) @@ -942,8 +985,8 @@ def collapsinator(inputargs: dict, data: list = None) -> list: inputargs["avgQthreshold"], ] - ## this is the percentage lev distance that is allowed to determine whether two sequences are equivalent - lev_threshold = inputargs["percentlevdist"] + ## this is the fractional lev distance that is allowed to determine whether two sequences are equivalent + lev_threshold_fraction = inputargs["percentlevdist"] / 100 ## this is the number of barcode edits that are allowed to call two barcodes equivalent barcode_distance_threshold = inputargs["bcthreshold"] @@ -963,7 +1006,7 @@ def collapsinator(inputargs: dict, data: list = None) -> list: data, inputargs, barcode_quality_parameters, - lev_threshold, + lev_threshold_fraction, barcode_distance_threshold, outpath, file_id, diff --git a/tests/resources/dcr_TINY_1_beta.freq b/tests/resources/dcr_TINY_1_beta.freq index fa04d9a..1803482 100644 --- a/tests/resources/dcr_TINY_1_beta.freq +++ b/tests/resources/dcr_TINY_1_beta.freq @@ -1,5 +1,6 @@ -15, 10, 4, 1, CTACCCCCGCGGGGAC, 2, 3 +15, 10, 4, 1, CTACCCCCGCGGGGAC, 2, 2 15, 10, 4, 1, CTACCCCCGCAAAGAC, 1, 1 +15, 10, 3, 1, CGGGGACCTACCCCC, 1, 1 43, 0, 5, 6, GGAGGGACAG, 1, 2 15, 6, 3, 9, CCTAGCGGAATACTCCTACAC, 1, 1 9, 6, 4, 0, CTCACGGGGGGTT, 1, 1 diff --git a/tests/resources/dcr_TINY_1_beta.tsv b/tests/resources/dcr_TINY_1_beta.tsv index 88e78a7..bb6181b 100644 --- a/tests/resources/dcr_TINY_1_beta.tsv +++ b/tests/resources/dcr_TINY_1_beta.tsv @@ -1,37 +1,38 @@ sequence_id v_call d_call j_call junction_aa duplicate_count sequence junction decombinator_id rev_comp productive sequence_aa cdr1_aa cdr2_aa vj_in_frame stop_codon conserved_c conserved_f sequence_alignment germline_alignment v_cigar d_cigar j_cigar av_UMI_cluster_size -1 TRBV20-1 TRBJ2-5 CSATTPAGTQETQYF 2 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTC 15, 10, 4, 1, CTACCCCCGCGGGGAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSATTPAGTQETQYFGPGTRLLVL DFQATT SNEGSKA T F T T 3 +1 TRBV20-1 TRBJ2-5 CSATTPAGTQETQYF 2 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTC 15, 10, 4, 1, CTACCCCCGCGGGGAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSATTPAGTQETQYFGPGTRLLVL DFQATT SNEGSKA T F T T 2 2 TRBV20-1 TRBJ2-5 CSATTPAKTQETQYF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCAAAGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGCAGTGCTACTACCCCCGCAAAGACCCAAGAGACCCAGTACTTC 15, 10, 4, 1, CTACCCCCGCAAAGAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSATTPAKTQETQYFGPGTRLLVL DFQATT SNEGSKA T F T T 1 -3 TRBV7-9 TRBJ1-1 CASSGGTAEAFF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTT 43, 0, 5, 6, GGAGGGACAG F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSGGTAEAFFGQGTRLTVV SEHNR FQNEAQ T F T T 2 -4 TRBV20-1 TRBJ2-1 CSASLAEYSYTEQFF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTC 15, 6, 3, 9, CCTAGCGGAATACTCCTACAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSASLAEYSYTEQFFGPGTRLTVL DFQATT SNEGSKA T F T T 1 -5 TRBV14 TRBJ2-1 CASSPHGGFSYNEQFF 1 GAAGCTGGAGTTACTCAGTTCCCCAGCCACAGCGTAATAGAGAAGGGCCAGACTGTGACTCTGAGATGTGACCCAATTTCTGGACATGATAATCTTTATTGGTATCGACGTGTTATGGGAAAAGAAATAAAATTTCTGTTACATTTTGTGAAAGAGTCTAAACAGGATGAGTCCGGTATGCCCAACAATCGATTCTTAGCTGAAAGGACTGGAGGGACGTATTCTACTCTGAAGGTGCAGCCTGCAGAACTGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTC 9, 6, 4, 0, CTCACGGGGGGTT F T EAGVTQFPSHSVIEKGQTVTLRCDPISGHDNLYWYRRVMGKEIKFLLHFVKESKQDESGMPNNRFLAERTGGTYSTLKVQPAELEDSGVYFCASSPHGGFSYNEQFFGPGTRLTVL SGHDN FVKESK T F T T 1 -6 TRBV10-1 TRBJ2-2 1 GATGCTGAAATCACCCAGAGCCCAAGACACAAGATCACAGAGACAGGAAGGCAGGTGACCTTGGCGTGTCACCAGACTTGGAACCACAACAATATGTTCTGGTATCGACAAGACCTGGGACATGGGCTGAGGCTGATCCATTACTCATATGGTGTTCAAGACACTAACAAAGGAGAAGTCTCAGATGGCTACAGTGTCTCTAGATCAAACACAGAGGACCTCCCCCTCACTCTGGAGTCTGCTGCCTCCTCCCAGACATCTGTATATTTCTGCGCCAGCAGCCGGGACTAGCGGGTGAGGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCTGACCGTACTGG 0, 7, 6, 8, CCGGGACTAGCGGGTGA F F DAEITQSPRHKITETGRQVTLACHQTWNHNNMFWYRQDLGHGLRLIHYSYGVQDTNKGEVSDGYSVSRSNTEDLPLTLESAASSQTSVYFCASSRD*RVRGAVFWRRL*ADRT F T T F 1 -7 TRBV5-1 TRBJ2-7 CASSLEPGVSYEQYF 1 AAGGCTGGAGTCACTCAAACTCCAAGATATCTGATCAAAACGAGAGGACAGCAAGTGACACTGAGCTGCTCCCCTATCTCTGGGCATAGGAGTGTATCCTGGTACCAACAGACCCCAGGACAGGGCCTTCAGTTCCTCTTTGAATACTTCAGTGAGACACAGAGAAACAAAGGAAACTTCCCTGGTCGATTCTCAGGGCGCCAGTTCTCTAACTCTCGCTCTGAGATGAATGTGAGCACCTTGGAGCTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTC 26, 12, 0, 0, AGCCAGGGGT F T KAGVTQTPRYLIKTRGQQVTLSCSPISGHRSVSWYQQTPGQGLQFLFEYFSETQRNKGNFPGRFSGRQFSNSRSEMNVSTLELGDSALYLCASSLEPGVSYEQYFGPGTRLTVT SGHRS YFSETQ T F T T 1 -8 TRBV4-2 TRBJ2-3 CASSPSGTFSTDTQYF 1 ACGGGAGTTACGCAGACACCAAGACACCTGGTCATGGGAATGACAAATAAGAAGTCTTTGAAATGTGAACAACATCTGGGGCATAACGCTATGTATTGGTACAAGCAAAGTGCTAAGAAGCCACTGGAGCTCATGTTTGTCTACAACTTTAAAGAACAGACTGAAAACAACAGTGTGCCAAGTCGCTTCTCACCTGAATGCCCCAACAGCTCTCACTTATTCCTTCACCTACACACCCTGCAGCCAGAAGACTCGGCCCTGTATCTCTGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGACAGTGCTCG TGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTT 24, 8, 6, 2, TCCTAGCGGCACCTTTTC F T TGVTQTPRHLVMGMTNKKSLKCEQHLGHNAMYWYKQSAKKPLELMFVYNFKEQTENNSVPSRFSPECPNSSHLFLHLHTLQPEDSALYLCASSPSGTFSTDTQYFGPGTRLTVL LGHNA YNFKEQ T F T T 1 -9 TRBV29-1 TRBJ2-7 CSVERGSSYEQYF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTC 20, 12, 2, 0, AGGGGCAG F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVERGSSYEQYFGPGTRLTVT SQVTM ANQGSEA T F T T 1 -10 TRBV5-4 TRBJ2-6 CASSTDAGANVLTF 1 GAGACTGGAGTCACCCAAAGTCCCACACACCTGATCAAAACGAGAGGACAGCAAGTGACTCTGAGATGCTCTTCTCAGTCTGGGCACAACACTGTGTCCTGGTACCAACAGGCCCTGGGTCAGGGGCCCCAGTTTATCTTTCAGTATTATAGGGAGGAAGAGAATGGCAGAGGAAACTTCCCTCCTAGATTCTCAGGTCTCCAGTTCCCTAATTATAGCTCTGAGCTGAATGTGAACGCCTTGGAGCTGGACGACTCGGCCCTGTATCTCTGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGGCTGACCGTGCTGG TGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTC 27, 11, 4, 2, ACCGATG F T ETGVTQSPTHLIKTRGQQVTLRCSSQSGHNTVSWYQQALGQGPQFIFQYYREEENGRGNFPPRFSGLQFPNYSSELNVNALELDDSALYLCASSTDAGANVLTFGAGSRLTVL SGHNT YYREEE T F T T 1 -11 TRBV6-1 TRBJ2-7 CASTETTSGSYEQYF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATAACTCCATGTACTGGTATCGACAAGACCCAGGCATGGGACTGAGGCTGATTTATTACTCAGCTTCTGAGGGTACCACTGACAAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATTAAACAAACGGGAGTTCTCGCTCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTC 31, 12, 7, 3, CCGAGACGACTAGCGGGAG F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHNSMYWYRQDPGMGLRLIYYSASEGTTDKGEVPNGYNVSRLNKREFSLRLESAAPSQTSVYFCASTETTSGSYEQYFGPGTRLTVT MNHNS SASEGT T F T T 1 -12 TRBV2 TRBJ2-5 CASKRTGGKPSETQYF 1 GAACCTGAAGTCACCCAGACTCCCAGCCATCAGGTCACACAGATGGGACAGGAAGTGATCTTGCGCTGTGTCCCCATCTCTAATCACTTATACTTCTATTGGTACAGACAAATCTTGGGGCAGAAAGTCGAGTTTCTGGTTTCCTTTTATAATAATGAAATCTCAGAGAAGTCTGAAATATTCGATGATCAATTCTCAGTTGAAAGGCCTGATGGATCAAATTTCACTCTGAAGATCCGGTCCACAAAGCTGGAGGACTCAGCCATGTACTTCTGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTC 14, 10, 7, 5, AACGGACGGGAGGGAAGCCGAGC F T EPEVTQTPSHQVTQMGQEVILRCVPISNHLYFYWYRQILGQKVEFLVSFYNNEISEKSEIFDDQFSVERPDGSNFTLKIRSTKLEDSAMYFCASKRTGGKPSETQYFGPGTRLLVL SNHLY FYNNEI T F T T 1 -13 TRBV30 TRBJ1-1 CAWEQGATEAFF 1 TCTCAGACTATTCATCAATGGCCAGCGACCCTGGTGCAGCCTGTGGGCAGCCCGCTCTCTCTGGAGTGCACTGTGGAGGGAACATCAAACCCCAACCTATACTGGTACCGACAGGCTGCAGGCAGGGGCCTCCAGCTGCTCTTCTACTCCGTTGGTATTGGCCAGATCAGCTCTGAGGTGCCCCAGAATCTCTCAGCCTCCAGACCCCAGGACCGGCAGTTCATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTT 22, 0, 5, 4, GAACAGGGGGC F T SQTIHQWPATLVQPVGSPLSLECTVEGTSNPNLYWYRQAAGRGLQLLFYSVGIGQISSEVPQNLSASRPQDRQFILSSKKLLLSDSGFYLCAWEQGATEAFFGQGTRLTVV GTSNPN SVGIG T F T T 1 -14 TRBV11-2 TRBJ2-1 CASSLDGQRVFNYNEQFF 1 GAAGCTGGAGTTGCCCAGTCTCCCAGATATAAGATTATAGAGAAAAGGCAGAGTGTGGCTTTTTGGTGCAATCCTATATCTGGCCATGCTACCCTTTACTGGTACCAGCAGATCCTGGGACAGGGCCCAAAGCTTCTGATTCAGTTTCAGAATAACGGTGTAGTGGATGATTCACAGTTGCCTAAGGATCGATTTTCTGCAGAGAGGCTCAAAGGAGTAGACTCCACTCTCAAGATCCAGCCTGCAAAGCTTGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTC 4, 6, 0, 4, TGGGCAGCGGGTGTTTAAT F T EAGVAQSPRYKIIEKRQSVAFWCNPISGHATLYWYQQILGQGPKLLIQFQNNGVVDDSQLPKDRFSAERLKGVDSTLKIQPAKLEDSAVYLCASSLDGQRVFNYNEQFFGPGTRLTVL SGHAT FQNNGV T F T T 1 -15 TRBV7-2 TRBJ1-4 CASSLIWGTAKELFF 1 GGAGCTGGAGTCTCCCAGTCCCCCAGTAACAAGGTCACAGAGAAGGGAAAGGATGTAGAGCTCAGGTGTGATCCAATTTCAGGTCATACTGCCCTTTACTGGTACCGACAGAGCCTGGGGCAGGGCCTGGAGTTTTTAATTTACTTCCAAGGCAACAGTGCACCAGACAAATCAGGGCTGCCCAGTGATCGCTTCTCTGCAGAGAGGACTGGGGGATCCGTCTCCACTCTGACGATCCAGCGCACACAGCAGGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTTGGCAGTGGAACCCAGCTCTCTGTCTTGG TGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTT 37, 3, 2, 12, ATCTGGGGGACAGCAAAGG F T GAGVSQSPSNKVTEKGKDVELRCDPISGHTALYWYRQSLGQGLEFLIYFQGNSAPDKSGLPSDRFSAERTGGSVSTLTIQRTQQEDSAVYLCASSLIWGTAKELFFGSGTQLSVL SGHTA FQGNSA T F T T 1 -16 TRBV29-1 TRBJ2-1 CSVEAAGGREQFF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTC 20, 6, 1, 10, CCGCAGGGGGCCGC F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVEAAGGREQFFGPGTRLTVL SQVTM ANQGSEA T F T T 1 -17 TRBV7-9 TRBJ2-3 CASSLGRTSSTDTQYF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGACAGTGCTCG TGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTT 43, 8, 3, 0, GGGGAGGACTAGC F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSLGRTSSTDTQYFGPGTRLTVL SEHNR FQNEAQ T F T T 2 -18 TRBV29-1 TRBJ2-6 CSVALQAGANVLTF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGGCTGACCGTGCTGG TGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTC 20, 11, 4, 2, CTCTCCAGG F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVALQAGANVLTFGAGSRLTVL SQVTM ANQGSEA T F T T 2 -19 TRBV20-1 TRBJ2-7 CSARLAGDYEQYF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTC 15, 12, 5, 3, CGACTAGCGGGAGA F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSARLAGDYEQYFGPGTRLTVT DFQATT SNEGSKA T F T T 1 -20 TRBV6-5 TRBJ2-1 CASSPGTGAYNEQFF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATGAATACATGTCCTGGTATCGACAAGACCCAGGCATGGGGCTGAGGCTGATTCATTACTCAGTTGGTGCTGGTATCACTGACCAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATCAACCACAGAGGATTTCCCGCTCAGGCTGCTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTC 33, 6, 5, 2, CCCGGGACAGGAG F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHEYMSWYRQDPGMGLRLIHYSVGAGITDQGEVPNGYNVSRSTTEDFPLRLLSAAPSQTSVYFCASSPGTGAYNEQFFGPGTRLTVL MNHEY SVGAGI T F T T 1 -21 TRBV9 TRBJ1-4 CASSPRAATNEKLFF 1 GATTCTGGAGTCACACAAACCCCAAAGCACCTGATCACAGCAACTGGACAGCGAGTGACGCTGAGATGCTCCCCTAGGTCTGGAGACCTCTCTGTGTACTGGTACCAACAGAGCCTGGACCAGGGCCTCCAGTTCCTCATTCAGTATTATAATGGAGAAGAGAGAGCAAAAGGAAACATTCTTGAACGATTCTCCGCACAACAGTTCCCTGACTTGCACTCTGAACTAAACCTGAGCTCTCTGGAGCTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTTGGCAGTGGAACCCAGCTCTCTGTCTTGG TGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTT 44, 3, 4, 0, CCCCGCGCGG F T DSGVTQTPKHLITATGQRVTLRCSPRSGDLSVYWYQQSLDQGLQFLIQYYNGEERAKGNILERFSAQQFPDLHSELNLSSLELGDSALYFCASSPRAATNEKLFFGSGTQLSVL SGDLS YYNGEE T F T T 1 -22 TRBV5-6 TRBJ2-1 CASSLGLAGHNEQFF 1 GACGCTGGAGTCACCCAAAGTCCCACACACCTGATCAAAACGAGAGGACAGCAAGTGACTCTGAGATGCTCTCCTAAGTCTGGGCATGACACTGTGTCCTGGTACCAACAGGCCCTGGGTCAGGGGCCCCAGTTTATCTTTCAGTATTATGAGGAGGAAGAGAGACAGAGAGGCAACTTCCCTGATCGATTCTCAGGTCACCAGTTCCCTAACTATAGCTCTGAGCTGAATGTGAACGCCTTGTTGCTGGGGGACTCGGCCCTCTATCTCTGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTC 29, 6, 0, 5, GTCTAGCGGGAC F T DAGVTQSPTHLIKTRGQQVTLRCSPKSGHDTVSWYQQALGQGPQFIFQYYEEEERQRGNFPDRFSGHQFPNYSSELNVNALLLGDSALYLCASSLGLAGHNEQFFGPGTRLTVL SGHDT YYEEEE T F T T 1 -23 TRBV2 TRBJ2-2 CASSEVTSRELFF 1 GAACCTGAAGTCACCCAGACTCCCAGCCATCAGGTCACACAGATGGGACAGGAAGTGATCTTGCGCTGTGTCCCCATCTCTAATCACTTATACTTCTATTGGTACAGACAAATCTTGGGGCAGAAAGTCGAGTTTCTGGTTTCCTTTTATAATAATGAAATCTCAGAGAAGTCTGAAATATTCGATGATCAATTCTCAGTTGAAAGGCCTGATGGATCAAATTTCACTCTGAAGATCCGGTCCACAAAGCTGGAGGACTCAGCCATGTACTTCTGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCTGACCGTACTGG TGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTT 14, 7, 3, 9, GGTGACTAGCC F T EPEVTQTPSHQVTQMGQEVILRCVPISNHLYFYWYRQILGQKVEFLVSFYNNEISEKSEIFDDQFSVERPDGSNFTLKIRSTKLEDSAMYFCASSEVTSRELFFGEGSRLTVL SNHLY FYNNEI T F T T 1 -24 TRBV5-1 TRBJ2-1 CASSPGLAPRNEQFF 1 AAGGCTGGAGTCACTCAAACTCCAAGATATCTGATCAAAACGAGAGGACAGCAAGTGACACTGAGCTGCTCCCCTATCTCTGGGCATAGGAGTGTATCCTGGTACCAACAGACCCCAGGACAGGGCCTTCAGTTCCTCTTTGAATACTTCAGTGAGACACAGAGAAACAAAGGAAACTTCCCTGGTCGATTCTCAGGGCGCCAGTTCTCTAACTCTCGCTCTGAGATGAATGTGAGCACCTTGGAGCTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTC 26, 6, 4, 7, CCAGGACTAGCACCGAGG F T KAGVTQTPRYLIKTRGQQVTLSCSPISGHRSVSWYQQTPGQGLQFLFEYFSETQRNKGNFPGRFSGRQFSNSRSEMNVSTLELGDSALYLCASSPGLAPRNEQFFGPGTRLTVL SGHRS YFSETQ T F T T 1 -25 TRBV20-1 TRBJ1-5 CSANLGADSNQPQHF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTTGGTGATGGGACTCGACTCTCCATCCTAG TGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTT 15, 4, 4, 0, ATCTAGGGGCCGA F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSANLGADSNQPQHFGDGTRLSIL DFQATT SNEGSKA T F T T 1 -26 TRBV14 TRBJ2-5 CASSSTGAQETQYF 1 GAAGCTGGAGTTACTCAGTTCCCCAGCCACAGCGTAATAGAGAAGGGCCAGACTGTGACTCTGAGATGTGACCCAATTTCTGGACATGATAATCTTTATTGGTATCGACGTGTTATGGGAAAAGAAATAAAATTTCTGTTACATTTTGTGAAAGAGTCTAAACAGGATGAGTCCGGTATGCCCAACAATCGATTCTTAGCTGAAAGGACTGGAGGGACGTATTCTACTCTGAAGGTGCAGCCTGCAGAACTGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTC 9, 10, 5, 2, TCGACCGGGGCG F T EAGVTQFPSHSVIEKGQTVTLRCDPISGHDNLYWYRRVMGKEIKFLLHFVKESKQDESGMPNNRFLAERTGGTYSTLKVQPAELEDSGVYFCASSSTGAQETQYFGPGTRLLVL SGHDN FVKESK T F T T 1 -27 TRBV3-1 TRBJ2-7 CASTLSGSSYEQYF 1 ACAGCTGTTTCCCAGACTCCAAAATACCTGGTCACACAGATGGGAAACGACAAGTCCATTAAATGTGAACAAAATCTGGGCCATGATACTATGTATTGGTATAAACAGGACTCTAAGAAATTTCTGAAGATAATGTTTAGCTACAATAATAAGGAGCTCATTATAAATGAAACAGTTCCAAATCGCTTCTCACCTAAATCTCCAGACAAAGCTCACTTAAATCTTCACATCAATTCCCTGGAGCTTGGTGACTCTGCTGTGTATTTCTGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTC 21, 12, 9, 0, TACCCTTAGCGGGAG F T TAVSQTPKYLVTQMGNDKSIKCEQNLGHDTMYWYKQDSKKFLKIMFSYNNKELIINETVPNRFSPKSPDKAHLNLHINSLELGDSAVYFCASTLSGSSYEQYFGPGTRLTVT LGHDT YNNKEL T F T T 1 -28 TRBV7-9 TRBJ2-5 CASSLGAMQQETQYF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTC 43, 10, 1, 2, GCGCCATGCAA F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSLGAMQQETQYFGPGTRLLVL SEHNR FQNEAQ T F T T 1 -29 TRBV9 TRBJ1-1 CASSVRESNTEAFF 1 GATTCTGGAGTCACACAAACCCCAAAGCACCTGATCACAGCAACTGGACAGCGAGTGACGCTGAGATGCTCCCCTAGGTCTGGAGACCTCTCTGTGTACTGGTACCAACAGAGCCTGGACCAGGGCCTCCAGTTCCTCATTCAGTATTATAATGGAGAAGAGAGAGCAAAAGGAAACATTCTTGAACGATTCTCCGCACAACAGTTCCCTGACTTGCACTCTGAACTAAACCTGAGCTCTCTGGAGCTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTT 44, 0, 2, 1, CAGGGAATC F T DSGVTQTPKHLITATGQRVTLRCSPRSGDLSVYWYQQSLDQGLQFLIQYYNGEERAKGNILERFSAQQFPDLHSELNLSSLELGDSALYFCASSVRESNTEAFFGQGTRLTVV SGDLS YYNGEE T F T T 1 -30 TRBV10-3 TRBJ2-5 CAISESRFGHTQETQYF 1 GATGCTGGAATCACCCAGAGCCCAAGACACAAGGTCACAGAGACAGGAACACCAGTGACTCTGAGATGTCACCAGACTGAGAACCACCGCTATATGTACTGGTATCGACAAGACCCGGGGCATGGGCTGAGGCTGATCCATTACTCATATGGTGTTAAAGATACTGACAAAGGAGAAGTCTCAGATGGCTATAGTGTCTCTAGATCAAAGACAGAGGATTTCCTCCTCACTCTGGAGTCCGCTACCAGCTCCCAGACATCTGTGTACTTCTGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTC 2, 10, 0, 1, GAGGTTTGGACACAC F T DAGITQSPRHKVTETGTPVTLRCHQTENHRYMYWYRQDPGHGLRLIHYSYGVKDTDKGEVSDGYSVSRSKTEDFLLTLESATSSQTSVYFCAISESRFGHTQETQYFGPGTRLLVL ENHRY SYGVKD T F T T 1 -31 TRBV6-1 TRBJ2-7 CASSESGRVYEQYF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATAACTCCATGTACTGGTATCGACAAGACCCAGGCATGGGACTGAGGCTGATTTATTACTCAGCTTCTGAGGGTACCACTGACAAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATTAAACAAACGGGAGTTCTCGCTCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTC 31, 12, 3, 3, GAGCGGGAGGGT F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHNSMYWYRQDPGMGLRLIYYSASEGTTDKGEVPNGYNVSRLNKREFSLRLESAAPSQTSVYFCASSESGRVYEQYFGPGTRLTVT MNHNS SASEGT T F T T 1 -32 TRBV28 TRBJ2-1 CASGLADYNEQFF 1 GATGTGAAAGTAACCCAGAGCTCGAGATATCTAGTCAAAAGGACGGGAGAGAAAGTTTTTCTGGAATGTGTCCAGGATATGGACCATGAAAATATGTTCTGGTATCGACAAGACCCAGGTCTGGGGCTACGGCTGATCTATTTCTCATATGATGTTAAAATGAAAGAAAAAGGAGATATTCCTGAGGGGTACAGTGTCTCTAGAGAGAAGAAGGAGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTC 19, 6, 9, 3, TGGACTAGCGGA F T DVKVTQSSRYLVKRTGEKVFLECVQDMDHENMFWYRQDPGLGLRLIYFSYDVKMKEKGDIPEGYSVSREKKERFSLILESASTNQTSMYLCASGLADYNEQFFGPGTRLTVL MDHEN SYDVKM T F T T 1 -33 TRBV12-4 TRBJ2-1 CASSLGLLNEQFF 1 GATGCTGGAGTTATCCAGTCACCCCGGCACGAGGTGACAGAGATGGGACAAGAAGTGACTCTGAGATGTAAACCAATTTCAGGACACGACTACCTTTTCTGGTACAGACAGACCATGATGCGGGGACTGGAGTTGCTCATTTACTTTAACAACAACGTTCCGATAGATGATTCAGGGATGCCCGAGGATCGATTCTCAGCTAAGATGCCTAATGCATCATTCTCCACTCTGAAGATCCAGCCCTCAGAACCCAGGGACTCAGCTGTGTACTTCTGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTC 6, 6, 1, 7, GTCTGCTT F T DAGVIQSPRHEVTEMGQEVTLRCKPISGHDYLFWYRQTMMRGLELLIYFNNNVPIDDSGMPEDRFSAKMPNASFSTLKIQPSEPRDSAVYFCASSLGLLNEQFFGPGTRLTVL SGHDY FNNNVP T F T T 1 -34 TRBV28 TRBJ2-7 CASSELTGRGEQYF 1 GATGTGAAAGTAACCCAGAGCTCGAGATATCTAGTCAAAAGGACGGGAGAGAAAGTTTTTCTGGAATGTGTCCAGGATATGGACCATGAAAATATGTTCTGGTATCGACAAGACCCAGGTCTGGGGCTACGGCTGATCTATTTCTCATATGATGTTAAAATGAAAGAAAAAGGAGATATTCCTGAGGGGTACAGTGTCTCTAGAGAGAAGAAGGAGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTC 19, 12, 5, 6, GAACTTACGGGGCGCGG F T DVKVTQSSRYLVKRTGEKVFLECVQDMDHENMFWYRQDPGLGLRLIYFSYDVKMKEKGDIPEGYSVSREKKERFSLILESASTNQTSMYLCASSELTGRGEQYFGPGTRLTVT MDHEN SYDVKM T F T T 1 -35 TRBV7-6 TRBJ2-1 CASSQTMSGSQAHEQFF 1 GGTGCTGGAGTCTCCCAGTCTCCCAGGTACAAAGTCACAAAGAGGGGACAGGATGTAGCTCTCAGGTGTGATCCAATTTCGGGTCATGTATCCCTTTATTGGTACCGACAGGCCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCAATTATGAAGCCCAACAAGACAAATCAGGGCTGCCCAATGATCGGTTCTCTGCAGAGAGGCCTGAGGGATCCATCTCCACTCTGACGATCCAGCGCACAGAGCAGCGGGACTCGGCCATGTATCGCTGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTC 40, 6, 5, 8, CAAACCATGAGTGGCTCACAGGCCC F T GAGVSQSPRYKVTKRGQDVALRCDPISGHVSLYWYRQALGQGPEFLTYFNYEAQQDKSGLPNDRFSAERPEGSISTLTIQRTEQRDSAMYRCASSQTMSGSQAHEQFFGPGTRLTVL SGHVS FNYEAQ T F T T 1 -36 TRBV25-1 TRBJ2-5 CASSEWGGGQETQYF 1 GAAGCTGACATCTACCAGACCCCAAGATACCTTGTTATAGGGACAGGAAAGAAGATCACTCTGGAATGTTCTCAAACCATGGGCCATGACAAAATGTACTGGTATCAACAAGATCCAGGAATGGAACTACACCTCATCCACTATTCCTATGGAGTTAATTCCACAGAGAAGGGAGATCTTTCCTCTGAGTCAACAGTCTCCAGAATAAGGACGGAGCATTTTCCCCTGACCCTGGAGTCTGCCAGGCCCTCACATACCTCTCAGTACCTCTGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTC 17, 10, 1, 5, GGGGAGGGGGCCAG F T EADIYQTPRYLVIGTGKKITLECSQTMGHDKMYWYQQDPGMELHLIHYSYGVNSTEKGDLSSESTVSRIRTEHFPLTLESARPSHTSQYLCASSEWGGGQETQYFGPGTRLLVL MGHDK SYGVNS T F T T 1 +3 TRBV20-1 TRBJ2-5 CSASGDLPPQETQYF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGCGGGGACCTACCCCCCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGCAGTGCTAGCGGGGACCTACCCCCCCAAGAGACCCAGTACTTC 15, 10, 3, 1, CGGGGACCTACCCCC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSASGDLPPQETQYFGPGTRLLVL DFQATT SNEGSKA T F T T 1 +4 TRBV7-9 TRBJ1-1 CASSGGTAEAFF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTT 43, 0, 5, 6, GGAGGGACAG F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSGGTAEAFFGQGTRLTVV SEHNR FQNEAQ T F T T 2 +5 TRBV20-1 TRBJ2-1 CSASLAEYSYTEQFF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTC 15, 6, 3, 9, CCTAGCGGAATACTCCTACAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSASLAEYSYTEQFFGPGTRLTVL DFQATT SNEGSKA T F T T 1 +6 TRBV14 TRBJ2-1 CASSPHGGFSYNEQFF 1 GAAGCTGGAGTTACTCAGTTCCCCAGCCACAGCGTAATAGAGAAGGGCCAGACTGTGACTCTGAGATGTGACCCAATTTCTGGACATGATAATCTTTATTGGTATCGACGTGTTATGGGAAAAGAAATAAAATTTCTGTTACATTTTGTGAAAGAGTCTAAACAGGATGAGTCCGGTATGCCCAACAATCGATTCTTAGCTGAAAGGACTGGAGGGACGTATTCTACTCTGAAGGTGCAGCCTGCAGAACTGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTC 9, 6, 4, 0, CTCACGGGGGGTT F T EAGVTQFPSHSVIEKGQTVTLRCDPISGHDNLYWYRRVMGKEIKFLLHFVKESKQDESGMPNNRFLAERTGGTYSTLKVQPAELEDSGVYFCASSPHGGFSYNEQFFGPGTRLTVL SGHDN FVKESK T F T T 1 +7 TRBV10-1 TRBJ2-2 1 GATGCTGAAATCACCCAGAGCCCAAGACACAAGATCACAGAGACAGGAAGGCAGGTGACCTTGGCGTGTCACCAGACTTGGAACCACAACAATATGTTCTGGTATCGACAAGACCTGGGACATGGGCTGAGGCTGATCCATTACTCATATGGTGTTCAAGACACTAACAAAGGAGAAGTCTCAGATGGCTACAGTGTCTCTAGATCAAACACAGAGGACCTCCCCCTCACTCTGGAGTCTGCTGCCTCCTCCCAGACATCTGTATATTTCTGCGCCAGCAGCCGGGACTAGCGGGTGAGGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCTGACCGTACTGG 0, 7, 6, 8, CCGGGACTAGCGGGTGA F F DAEITQSPRHKITETGRQVTLACHQTWNHNNMFWYRQDLGHGLRLIHYSYGVQDTNKGEVSDGYSVSRSNTEDLPLTLESAASSQTSVYFCASSRD*RVRGAVFWRRL*ADRT F T T F 1 +8 TRBV5-1 TRBJ2-7 CASSLEPGVSYEQYF 1 AAGGCTGGAGTCACTCAAACTCCAAGATATCTGATCAAAACGAGAGGACAGCAAGTGACACTGAGCTGCTCCCCTATCTCTGGGCATAGGAGTGTATCCTGGTACCAACAGACCCCAGGACAGGGCCTTCAGTTCCTCTTTGAATACTTCAGTGAGACACAGAGAAACAAAGGAAACTTCCCTGGTCGATTCTCAGGGCGCCAGTTCTCTAACTCTCGCTCTGAGATGAATGTGAGCACCTTGGAGCTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTC 26, 12, 0, 0, AGCCAGGGGT F T KAGVTQTPRYLIKTRGQQVTLSCSPISGHRSVSWYQQTPGQGLQFLFEYFSETQRNKGNFPGRFSGRQFSNSRSEMNVSTLELGDSALYLCASSLEPGVSYEQYFGPGTRLTVT SGHRS YFSETQ T F T T 1 +9 TRBV4-2 TRBJ2-3 CASSPSGTFSTDTQYF 1 ACGGGAGTTACGCAGACACCAAGACACCTGGTCATGGGAATGACAAATAAGAAGTCTTTGAAATGTGAACAACATCTGGGGCATAACGCTATGTATTGGTACAAGCAAAGTGCTAAGAAGCCACTGGAGCTCATGTTTGTCTACAACTTTAAAGAACAGACTGAAAACAACAGTGTGCCAAGTCGCTTCTCACCTGAATGCCCCAACAGCTCTCACTTATTCCTTCACCTACACACCCTGCAGCCAGAAGACTCGGCCCTGTATCTCTGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGACAGTGCTCG TGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTT 24, 8, 6, 2, TCCTAGCGGCACCTTTTC F T TGVTQTPRHLVMGMTNKKSLKCEQHLGHNAMYWYKQSAKKPLELMFVYNFKEQTENNSVPSRFSPECPNSSHLFLHLHTLQPEDSALYLCASSPSGTFSTDTQYFGPGTRLTVL LGHNA YNFKEQ T F T T 1 +10 TRBV29-1 TRBJ2-7 CSVERGSSYEQYF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTC 20, 12, 2, 0, AGGGGCAG F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVERGSSYEQYFGPGTRLTVT SQVTM ANQGSEA T F T T 1 +11 TRBV5-4 TRBJ2-6 CASSTDAGANVLTF 1 GAGACTGGAGTCACCCAAAGTCCCACACACCTGATCAAAACGAGAGGACAGCAAGTGACTCTGAGATGCTCTTCTCAGTCTGGGCACAACACTGTGTCCTGGTACCAACAGGCCCTGGGTCAGGGGCCCCAGTTTATCTTTCAGTATTATAGGGAGGAAGAGAATGGCAGAGGAAACTTCCCTCCTAGATTCTCAGGTCTCCAGTTCCCTAATTATAGCTCTGAGCTGAATGTGAACGCCTTGGAGCTGGACGACTCGGCCCTGTATCTCTGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGGCTGACCGTGCTGG TGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTC 27, 11, 4, 2, ACCGATG F T ETGVTQSPTHLIKTRGQQVTLRCSSQSGHNTVSWYQQALGQGPQFIFQYYREEENGRGNFPPRFSGLQFPNYSSELNVNALELDDSALYLCASSTDAGANVLTFGAGSRLTVL SGHNT YYREEE T F T T 1 +12 TRBV6-1 TRBJ2-7 CASTETTSGSYEQYF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATAACTCCATGTACTGGTATCGACAAGACCCAGGCATGGGACTGAGGCTGATTTATTACTCAGCTTCTGAGGGTACCACTGACAAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATTAAACAAACGGGAGTTCTCGCTCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTC 31, 12, 7, 3, CCGAGACGACTAGCGGGAG F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHNSMYWYRQDPGMGLRLIYYSASEGTTDKGEVPNGYNVSRLNKREFSLRLESAAPSQTSVYFCASTETTSGSYEQYFGPGTRLTVT MNHNS SASEGT T F T T 1 +13 TRBV2 TRBJ2-5 CASKRTGGKPSETQYF 1 GAACCTGAAGTCACCCAGACTCCCAGCCATCAGGTCACACAGATGGGACAGGAAGTGATCTTGCGCTGTGTCCCCATCTCTAATCACTTATACTTCTATTGGTACAGACAAATCTTGGGGCAGAAAGTCGAGTTTCTGGTTTCCTTTTATAATAATGAAATCTCAGAGAAGTCTGAAATATTCGATGATCAATTCTCAGTTGAAAGGCCTGATGGATCAAATTTCACTCTGAAGATCCGGTCCACAAAGCTGGAGGACTCAGCCATGTACTTCTGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTC 14, 10, 7, 5, AACGGACGGGAGGGAAGCCGAGC F T EPEVTQTPSHQVTQMGQEVILRCVPISNHLYFYWYRQILGQKVEFLVSFYNNEISEKSEIFDDQFSVERPDGSNFTLKIRSTKLEDSAMYFCASKRTGGKPSETQYFGPGTRLLVL SNHLY FYNNEI T F T T 1 +14 TRBV30 TRBJ1-1 CAWEQGATEAFF 1 TCTCAGACTATTCATCAATGGCCAGCGACCCTGGTGCAGCCTGTGGGCAGCCCGCTCTCTCTGGAGTGCACTGTGGAGGGAACATCAAACCCCAACCTATACTGGTACCGACAGGCTGCAGGCAGGGGCCTCCAGCTGCTCTTCTACTCCGTTGGTATTGGCCAGATCAGCTCTGAGGTGCCCCAGAATCTCTCAGCCTCCAGACCCCAGGACCGGCAGTTCATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTT 22, 0, 5, 4, GAACAGGGGGC F T SQTIHQWPATLVQPVGSPLSLECTVEGTSNPNLYWYRQAAGRGLQLLFYSVGIGQISSEVPQNLSASRPQDRQFILSSKKLLLSDSGFYLCAWEQGATEAFFGQGTRLTVV GTSNPN SVGIG T F T T 1 +15 TRBV11-2 TRBJ2-1 CASSLDGQRVFNYNEQFF 1 GAAGCTGGAGTTGCCCAGTCTCCCAGATATAAGATTATAGAGAAAAGGCAGAGTGTGGCTTTTTGGTGCAATCCTATATCTGGCCATGCTACCCTTTACTGGTACCAGCAGATCCTGGGACAGGGCCCAAAGCTTCTGATTCAGTTTCAGAATAACGGTGTAGTGGATGATTCACAGTTGCCTAAGGATCGATTTTCTGCAGAGAGGCTCAAAGGAGTAGACTCCACTCTCAAGATCCAGCCTGCAAAGCTTGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTC 4, 6, 0, 4, TGGGCAGCGGGTGTTTAAT F T EAGVAQSPRYKIIEKRQSVAFWCNPISGHATLYWYQQILGQGPKLLIQFQNNGVVDDSQLPKDRFSAERLKGVDSTLKIQPAKLEDSAVYLCASSLDGQRVFNYNEQFFGPGTRLTVL SGHAT FQNNGV T F T T 1 +16 TRBV7-2 TRBJ1-4 CASSLIWGTAKELFF 1 GGAGCTGGAGTCTCCCAGTCCCCCAGTAACAAGGTCACAGAGAAGGGAAAGGATGTAGAGCTCAGGTGTGATCCAATTTCAGGTCATACTGCCCTTTACTGGTACCGACAGAGCCTGGGGCAGGGCCTGGAGTTTTTAATTTACTTCCAAGGCAACAGTGCACCAGACAAATCAGGGCTGCCCAGTGATCGCTTCTCTGCAGAGAGGACTGGGGGATCCGTCTCCACTCTGACGATCCAGCGCACACAGCAGGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTTGGCAGTGGAACCCAGCTCTCTGTCTTGG TGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTT 37, 3, 2, 12, ATCTGGGGGACAGCAAAGG F T GAGVSQSPSNKVTEKGKDVELRCDPISGHTALYWYRQSLGQGLEFLIYFQGNSAPDKSGLPSDRFSAERTGGSVSTLTIQRTQQEDSAVYLCASSLIWGTAKELFFGSGTQLSVL SGHTA FQGNSA T F T T 1 +17 TRBV29-1 TRBJ2-1 CSVEAAGGREQFF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTC 20, 6, 1, 10, CCGCAGGGGGCCGC F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVEAAGGREQFFGPGTRLTVL SQVTM ANQGSEA T F T T 1 +18 TRBV7-9 TRBJ2-3 CASSLGRTSSTDTQYF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGACAGTGCTCG TGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTT 43, 8, 3, 0, GGGGAGGACTAGC F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSLGRTSSTDTQYFGPGTRLTVL SEHNR FQNEAQ T F T T 2 +19 TRBV29-1 TRBJ2-6 CSVALQAGANVLTF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGGCTGACCGTGCTGG TGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTC 20, 11, 4, 2, CTCTCCAGG F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVALQAGANVLTFGAGSRLTVL SQVTM ANQGSEA T F T T 2 +20 TRBV20-1 TRBJ2-7 CSARLAGDYEQYF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTC 15, 12, 5, 3, CGACTAGCGGGAGA F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSARLAGDYEQYFGPGTRLTVT DFQATT SNEGSKA T F T T 1 +21 TRBV6-5 TRBJ2-1 CASSPGTGAYNEQFF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATGAATACATGTCCTGGTATCGACAAGACCCAGGCATGGGGCTGAGGCTGATTCATTACTCAGTTGGTGCTGGTATCACTGACCAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATCAACCACAGAGGATTTCCCGCTCAGGCTGCTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTC 33, 6, 5, 2, CCCGGGACAGGAG F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHEYMSWYRQDPGMGLRLIHYSVGAGITDQGEVPNGYNVSRSTTEDFPLRLLSAAPSQTSVYFCASSPGTGAYNEQFFGPGTRLTVL MNHEY SVGAGI T F T T 1 +22 TRBV9 TRBJ1-4 CASSPRAATNEKLFF 1 GATTCTGGAGTCACACAAACCCCAAAGCACCTGATCACAGCAACTGGACAGCGAGTGACGCTGAGATGCTCCCCTAGGTCTGGAGACCTCTCTGTGTACTGGTACCAACAGAGCCTGGACCAGGGCCTCCAGTTCCTCATTCAGTATTATAATGGAGAAGAGAGAGCAAAAGGAAACATTCTTGAACGATTCTCCGCACAACAGTTCCCTGACTTGCACTCTGAACTAAACCTGAGCTCTCTGGAGCTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTTGGCAGTGGAACCCAGCTCTCTGTCTTGG TGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTT 44, 3, 4, 0, CCCCGCGCGG F T DSGVTQTPKHLITATGQRVTLRCSPRSGDLSVYWYQQSLDQGLQFLIQYYNGEERAKGNILERFSAQQFPDLHSELNLSSLELGDSALYFCASSPRAATNEKLFFGSGTQLSVL SGDLS YYNGEE T F T T 1 +23 TRBV5-6 TRBJ2-1 CASSLGLAGHNEQFF 1 GACGCTGGAGTCACCCAAAGTCCCACACACCTGATCAAAACGAGAGGACAGCAAGTGACTCTGAGATGCTCTCCTAAGTCTGGGCATGACACTGTGTCCTGGTACCAACAGGCCCTGGGTCAGGGGCCCCAGTTTATCTTTCAGTATTATGAGGAGGAAGAGAGACAGAGAGGCAACTTCCCTGATCGATTCTCAGGTCACCAGTTCCCTAACTATAGCTCTGAGCTGAATGTGAACGCCTTGTTGCTGGGGGACTCGGCCCTCTATCTCTGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTC 29, 6, 0, 5, GTCTAGCGGGAC F T DAGVTQSPTHLIKTRGQQVTLRCSPKSGHDTVSWYQQALGQGPQFIFQYYEEEERQRGNFPDRFSGHQFPNYSSELNVNALLLGDSALYLCASSLGLAGHNEQFFGPGTRLTVL SGHDT YYEEEE T F T T 1 +24 TRBV2 TRBJ2-2 CASSEVTSRELFF 1 GAACCTGAAGTCACCCAGACTCCCAGCCATCAGGTCACACAGATGGGACAGGAAGTGATCTTGCGCTGTGTCCCCATCTCTAATCACTTATACTTCTATTGGTACAGACAAATCTTGGGGCAGAAAGTCGAGTTTCTGGTTTCCTTTTATAATAATGAAATCTCAGAGAAGTCTGAAATATTCGATGATCAATTCTCAGTTGAAAGGCCTGATGGATCAAATTTCACTCTGAAGATCCGGTCCACAAAGCTGGAGGACTCAGCCATGTACTTCTGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCTGACCGTACTGG TGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTT 14, 7, 3, 9, GGTGACTAGCC F T EPEVTQTPSHQVTQMGQEVILRCVPISNHLYFYWYRQILGQKVEFLVSFYNNEISEKSEIFDDQFSVERPDGSNFTLKIRSTKLEDSAMYFCASSEVTSRELFFGEGSRLTVL SNHLY FYNNEI T F T T 1 +25 TRBV5-1 TRBJ2-1 CASSPGLAPRNEQFF 1 AAGGCTGGAGTCACTCAAACTCCAAGATATCTGATCAAAACGAGAGGACAGCAAGTGACACTGAGCTGCTCCCCTATCTCTGGGCATAGGAGTGTATCCTGGTACCAACAGACCCCAGGACAGGGCCTTCAGTTCCTCTTTGAATACTTCAGTGAGACACAGAGAAACAAAGGAAACTTCCCTGGTCGATTCTCAGGGCGCCAGTTCTCTAACTCTCGCTCTGAGATGAATGTGAGCACCTTGGAGCTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTC 26, 6, 4, 7, CCAGGACTAGCACCGAGG F T KAGVTQTPRYLIKTRGQQVTLSCSPISGHRSVSWYQQTPGQGLQFLFEYFSETQRNKGNFPGRFSGRQFSNSRSEMNVSTLELGDSALYLCASSPGLAPRNEQFFGPGTRLTVL SGHRS YFSETQ T F T T 1 +26 TRBV20-1 TRBJ1-5 CSANLGADSNQPQHF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTTGGTGATGGGACTCGACTCTCCATCCTAG TGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTT 15, 4, 4, 0, ATCTAGGGGCCGA F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSANLGADSNQPQHFGDGTRLSIL DFQATT SNEGSKA T F T T 1 +27 TRBV14 TRBJ2-5 CASSSTGAQETQYF 1 GAAGCTGGAGTTACTCAGTTCCCCAGCCACAGCGTAATAGAGAAGGGCCAGACTGTGACTCTGAGATGTGACCCAATTTCTGGACATGATAATCTTTATTGGTATCGACGTGTTATGGGAAAAGAAATAAAATTTCTGTTACATTTTGTGAAAGAGTCTAAACAGGATGAGTCCGGTATGCCCAACAATCGATTCTTAGCTGAAAGGACTGGAGGGACGTATTCTACTCTGAAGGTGCAGCCTGCAGAACTGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTC 9, 10, 5, 2, TCGACCGGGGCG F T EAGVTQFPSHSVIEKGQTVTLRCDPISGHDNLYWYRRVMGKEIKFLLHFVKESKQDESGMPNNRFLAERTGGTYSTLKVQPAELEDSGVYFCASSSTGAQETQYFGPGTRLLVL SGHDN FVKESK T F T T 1 +28 TRBV3-1 TRBJ2-7 CASTLSGSSYEQYF 1 ACAGCTGTTTCCCAGACTCCAAAATACCTGGTCACACAGATGGGAAACGACAAGTCCATTAAATGTGAACAAAATCTGGGCCATGATACTATGTATTGGTATAAACAGGACTCTAAGAAATTTCTGAAGATAATGTTTAGCTACAATAATAAGGAGCTCATTATAAATGAAACAGTTCCAAATCGCTTCTCACCTAAATCTCCAGACAAAGCTCACTTAAATCTTCACATCAATTCCCTGGAGCTTGGTGACTCTGCTGTGTATTTCTGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTC 21, 12, 9, 0, TACCCTTAGCGGGAG F T TAVSQTPKYLVTQMGNDKSIKCEQNLGHDTMYWYKQDSKKFLKIMFSYNNKELIINETVPNRFSPKSPDKAHLNLHINSLELGDSAVYFCASTLSGSSYEQYFGPGTRLTVT LGHDT YNNKEL T F T T 1 +29 TRBV7-9 TRBJ2-5 CASSLGAMQQETQYF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTC 43, 10, 1, 2, GCGCCATGCAA F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSLGAMQQETQYFGPGTRLLVL SEHNR FQNEAQ T F T T 1 +30 TRBV9 TRBJ1-1 CASSVRESNTEAFF 1 GATTCTGGAGTCACACAAACCCCAAAGCACCTGATCACAGCAACTGGACAGCGAGTGACGCTGAGATGCTCCCCTAGGTCTGGAGACCTCTCTGTGTACTGGTACCAACAGAGCCTGGACCAGGGCCTCCAGTTCCTCATTCAGTATTATAATGGAGAAGAGAGAGCAAAAGGAAACATTCTTGAACGATTCTCCGCACAACAGTTCCCTGACTTGCACTCTGAACTAAACCTGAGCTCTCTGGAGCTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTT 44, 0, 2, 1, CAGGGAATC F T DSGVTQTPKHLITATGQRVTLRCSPRSGDLSVYWYQQSLDQGLQFLIQYYNGEERAKGNILERFSAQQFPDLHSELNLSSLELGDSALYFCASSVRESNTEAFFGQGTRLTVV SGDLS YYNGEE T F T T 1 +31 TRBV10-3 TRBJ2-5 CAISESRFGHTQETQYF 1 GATGCTGGAATCACCCAGAGCCCAAGACACAAGGTCACAGAGACAGGAACACCAGTGACTCTGAGATGTCACCAGACTGAGAACCACCGCTATATGTACTGGTATCGACAAGACCCGGGGCATGGGCTGAGGCTGATCCATTACTCATATGGTGTTAAAGATACTGACAAAGGAGAAGTCTCAGATGGCTATAGTGTCTCTAGATCAAAGACAGAGGATTTCCTCCTCACTCTGGAGTCCGCTACCAGCTCCCAGACATCTGTGTACTTCTGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTC 2, 10, 0, 1, GAGGTTTGGACACAC F T DAGITQSPRHKVTETGTPVTLRCHQTENHRYMYWYRQDPGHGLRLIHYSYGVKDTDKGEVSDGYSVSRSKTEDFLLTLESATSSQTSVYFCAISESRFGHTQETQYFGPGTRLLVL ENHRY SYGVKD T F T T 1 +32 TRBV6-1 TRBJ2-7 CASSESGRVYEQYF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATAACTCCATGTACTGGTATCGACAAGACCCAGGCATGGGACTGAGGCTGATTTATTACTCAGCTTCTGAGGGTACCACTGACAAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATTAAACAAACGGGAGTTCTCGCTCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTC 31, 12, 3, 3, GAGCGGGAGGGT F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHNSMYWYRQDPGMGLRLIYYSASEGTTDKGEVPNGYNVSRLNKREFSLRLESAAPSQTSVYFCASSESGRVYEQYFGPGTRLTVT MNHNS SASEGT T F T T 1 +33 TRBV28 TRBJ2-1 CASGLADYNEQFF 1 GATGTGAAAGTAACCCAGAGCTCGAGATATCTAGTCAAAAGGACGGGAGAGAAAGTTTTTCTGGAATGTGTCCAGGATATGGACCATGAAAATATGTTCTGGTATCGACAAGACCCAGGTCTGGGGCTACGGCTGATCTATTTCTCATATGATGTTAAAATGAAAGAAAAAGGAGATATTCCTGAGGGGTACAGTGTCTCTAGAGAGAAGAAGGAGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTC 19, 6, 9, 3, TGGACTAGCGGA F T DVKVTQSSRYLVKRTGEKVFLECVQDMDHENMFWYRQDPGLGLRLIYFSYDVKMKEKGDIPEGYSVSREKKERFSLILESASTNQTSMYLCASGLADYNEQFFGPGTRLTVL MDHEN SYDVKM T F T T 1 +34 TRBV12-4 TRBJ2-1 CASSLGLLNEQFF 1 GATGCTGGAGTTATCCAGTCACCCCGGCACGAGGTGACAGAGATGGGACAAGAAGTGACTCTGAGATGTAAACCAATTTCAGGACACGACTACCTTTTCTGGTACAGACAGACCATGATGCGGGGACTGGAGTTGCTCATTTACTTTAACAACAACGTTCCGATAGATGATTCAGGGATGCCCGAGGATCGATTCTCAGCTAAGATGCCTAATGCATCATTCTCCACTCTGAAGATCCAGCCCTCAGAACCCAGGGACTCAGCTGTGTACTTCTGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTC 6, 6, 1, 7, GTCTGCTT F T DAGVIQSPRHEVTEMGQEVTLRCKPISGHDYLFWYRQTMMRGLELLIYFNNNVPIDDSGMPEDRFSAKMPNASFSTLKIQPSEPRDSAVYFCASSLGLLNEQFFGPGTRLTVL SGHDY FNNNVP T F T T 1 +35 TRBV28 TRBJ2-7 CASSELTGRGEQYF 1 GATGTGAAAGTAACCCAGAGCTCGAGATATCTAGTCAAAAGGACGGGAGAGAAAGTTTTTCTGGAATGTGTCCAGGATATGGACCATGAAAATATGTTCTGGTATCGACAAGACCCAGGTCTGGGGCTACGGCTGATCTATTTCTCATATGATGTTAAAATGAAAGAAAAAGGAGATATTCCTGAGGGGTACAGTGTCTCTAGAGAGAAGAAGGAGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTC 19, 12, 5, 6, GAACTTACGGGGCGCGG F T DVKVTQSSRYLVKRTGEKVFLECVQDMDHENMFWYRQDPGLGLRLIYFSYDVKMKEKGDIPEGYSVSREKKERFSLILESASTNQTSMYLCASSELTGRGEQYFGPGTRLTVT MDHEN SYDVKM T F T T 1 +36 TRBV7-6 TRBJ2-1 CASSQTMSGSQAHEQFF 1 GGTGCTGGAGTCTCCCAGTCTCCCAGGTACAAAGTCACAAAGAGGGGACAGGATGTAGCTCTCAGGTGTGATCCAATTTCGGGTCATGTATCCCTTTATTGGTACCGACAGGCCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCAATTATGAAGCCCAACAAGACAAATCAGGGCTGCCCAATGATCGGTTCTCTGCAGAGAGGCCTGAGGGATCCATCTCCACTCTGACGATCCAGCGCACAGAGCAGCGGGACTCGGCCATGTATCGCTGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTC 40, 6, 5, 8, CAAACCATGAGTGGCTCACAGGCCC F T GAGVSQSPRYKVTKRGQDVALRCDPISGHVSLYWYRQALGQGPEFLTYFNYEAQQDKSGLPNDRFSAERPEGSISTLTIQRTEQRDSAMYRCASSQTMSGSQAHEQFFGPGTRLTVL SGHVS FNYEAQ T F T T 1 +37 TRBV25-1 TRBJ2-5 CASSEWGGGQETQYF 1 GAAGCTGACATCTACCAGACCCCAAGATACCTTGTTATAGGGACAGGAAAGAAGATCACTCTGGAATGTTCTCAAACCATGGGCCATGACAAAATGTACTGGTATCAACAAGATCCAGGAATGGAACTACACCTCATCCACTATTCCTATGGAGTTAATTCCACAGAGAAGGGAGATCTTTCCTCTGAGTCAACAGTCTCCAGAATAAGGACGGAGCATTTTCCCCTGACCCTGGAGTCTGCCAGGCCCTCACATACCTCTCAGTACCTCTGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTC 17, 10, 1, 5, GGGGAGGGGGCCAG F T EADIYQTPRYLVIGTGKKITLECSQTMGHDKMYWYQQDPGMELHLIHYSYGVNSTEKGDLSSESTVSRIRTEHFPLTLESARPSHTSQYLCASSEWGGGQETQYFGPGTRLLVL MGHDK SYGVNS T F T T 1 diff --git a/tests/test_cli.py b/tests/test_cli.py index 5918c52..7df5d6f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,3 @@ -from decombinator import pipeline, io import pytest import pathlib import os diff --git a/tests/test_collapse.py b/tests/test_collapse.py index c43e633..5f66b10 100644 --- a/tests/test_collapse.py +++ b/tests/test_collapse.py @@ -1,5 +1,6 @@ import collections as coll import pathlib +import typing import pytest @@ -41,7 +42,7 @@ def test_create_cluster_structures(self, barcode_dcretc_list): def test_merge_order(self, barcode_dcretc_list): clusters = collapse.cluster_UMIs( - barcode_dcretc_list, {"writeclusters": False}, 2, 25, True + barcode_dcretc_list, {"writeclusters": False}, 2, 0.25, True ) assert clusters == { @@ -199,12 +200,20 @@ class TestReadInData: collapse.counts = coll.Counter() @pytest.fixture - def blank_input(self): + def blank_input(self) -> list[None]: return [] @pytest.fixture - def pipe_args(self): - return {"command": "pipeline"} + def pipe_args(self) -> dict[str, typing.Union[str, int]]: + return { + "command": "pipeline", + "lenthreshold": 130, + "minbcQ": 20, + "bcQbelowmin": 1, + "avgQthreshold": 30, + "oligo": "M13", + "sampling_analysis": False, + } def test_no_dcr(self, blank_input, pipe_args): with pytest.raises(ValueError): @@ -212,6 +221,106 @@ def test_no_dcr(self, blank_input, pipe_args): blank_input, pipe_args, None, None, None, None ) + @pytest.fixture + def valid_input(self) -> list[list[str]]: + return [ + [ + "15", + "4", + "1", + "7", + "CCCCCAGGGGGCTC", + "LH00409:259:22JJCFLT4:8:1149:38410:17375", + "ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTCCAGCCCCAGCATTTTGGTGATGGGACTCGACTC", + "IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "GTCGTGACTGGGAAAACCCTGGCACCCGGTCGTGATCTGACT", + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "GACACAACTCTCCCCAGAGAAGGTGGTGTGA", + ], + [ # Final base of junction is changed, should still collapse + "15", + "4", + "1", + "7", + "CCCCCAGGGGGCTG", + "LH00409:259:22JJCFLT4:8:1149:38410:17375", + "ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTGCAGCCCCAGCATTTTGGTGATGGGACTCGACTC", + "IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "GTCGTGACTGGGAAAACCCTGGCACCCGGTCGTGATCTGACT", + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "GACACAACTCTCCCCAGAGAAGGTGGTGTGA", + ], + [ # Same TCR but with different junction region, making TCR 20% different + "15", + "4", + "1", + "7", + "AAAAAAAAAAAAAA", + "LH00409:259:22JJCFLT4:8:1149:38410:17375", + "ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGAAAAAAAAAAAAAACAGCCCCAGCATTTTGGTGATGGGACTCGACTC", + "IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "GTCGTGACTGGGAAAACCCTGGCACCCGGTCGTGATCTGACT", + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "GACACAACTCTCCCCAGAGAAGGTGGTGTGA", + ], + ] + + @pytest.fixture + def barcode_quality_parameters( + self, pipe_args: dict[str, int] + ) -> list[int]: + return [ + pipe_args["minbcQ"], + pipe_args["bcQbelowmin"], + pipe_args["avgQthreshold"], + ] + + def test_barcode_collision( + self, + valid_input: list[list[str]], + pipe_args: dict[str, typing.Union[str, int]], + barcode_quality_parameters: list[int], + ): + barcode_dcretc = collapse.read_in_data( + valid_input, + pipe_args, + barcode_quality_parameters, + lev_threshold_fraction=0.1, + dont_count=False, + opener=open, + ) + assert barcode_dcretc == { + "CACCCGCTGACT|0|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTCCAGCCCCAGCATTTTGGTGATGGGACTCGACTC": [ + "['15', '4', '1', '7', 'CCCCCAGGGGGCTC']|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTCCAGCCCCAGCATTTTGGTGATGGGACTCGACTC|IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII|LH00409:259:22JJCFLT4:8:1149:38410:17375", + "['15', '4', '1', '7', 'CCCCCAGGGGGCTG']|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTGCAGCCCCAGCATTTTGGTGATGGGACTCGACTC|IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII|LH00409:259:22JJCFLT4:8:1149:38410:17375", + ], + "CACCCGCTGACT|1|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGAAAAAAAAAAAAAACAGCCCCAGCATTTTGGTGATGGGACTCGACTC": [ + "['15', '4', '1', '7', 'AAAAAAAAAAAAAA']|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGAAAAAAAAAAAAAACAGCCCCAGCATTTTGGTGATGGGACTCGACTC|IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII|LH00409:259:22JJCFLT4:8:1149:38410:17375" + ], + } + + def test_barcode_collision_no_tcr_check( + self, + valid_input: list[list[str]], + pipe_args: dict[str, typing.Union[str, int]], + barcode_quality_parameters: list[int], + ): + barcode_dcretc = collapse.read_in_data( + valid_input, + pipe_args, + barcode_quality_parameters, + lev_threshold_fraction=1, + dont_count=False, + opener=open, + ) + assert barcode_dcretc == { + "CACCCGCTGACT|0|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTCCAGCCCCAGCATTTTGGTGATGGGACTCGACTC": [ + "['15', '4', '1', '7', 'CCCCCAGGGGGCTC']|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTCCAGCCCCAGCATTTTGGTGATGGGACTCGACTC|IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII|LH00409:259:22JJCFLT4:8:1149:38410:17375", + "['15', '4', '1', '7', 'CCCCCAGGGGGCTG']|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGCCCCCAGGGGGCTGCAGCCCCAGCATTTTGGTGATGGGACTCGACTC|IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII|LH00409:259:22JJCFLT4:8:1149:38410:17375", + "['15', '4', '1', '7', 'AAAAAAAAAAAAAA']|ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGAGAAAAAAAAAAAAAACAGCCCCAGCATTTTGGTGATGGGACTCGACTC|IIIIIIIIIIIIIII-II-IIIIIIIIIIIIIIIIIIIIIIIIIIIII-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII|LH00409:259:22JJCFLT4:8:1149:38410:17375", + ], + } + class TestCheckDcrFile: diff --git a/tests/test_subparsers.py b/tests/test_subparsers.py index 0a55d19..d5d4cbc 100644 --- a/tests/test_subparsers.py +++ b/tests/test_subparsers.py @@ -171,8 +171,6 @@ def test_tsv_output( output_data = f.read() # Perform comparison - print(output_data) - print(reference_data) assert output_data == reference_data, "Output does not match reference data" @@ -213,14 +211,12 @@ def test_log_output( with open(reference_log, "r") as f: comparison_label = "_".join(reference_log.name.split("_")[-2:]) - print(comparison_label) reference_log_lines = f.readlines()[ comparison_start[comparison_label] ] with open(output_log, "r") as f: comparison_label = "_".join(output_log.name.split("_")[-2:]) - print(comparison_label) output_log_lines = f.readlines()[comparison_start[comparison_label]] assert output_log_lines == reference_log_lines