From abfd72a03a4d9dbf99b60b35b14a722f363d9b14 Mon Sep 17 00:00:00 2001 From: Brooke Boatman Date: Sun, 30 Apr 2023 12:57:10 -0500 Subject: [PATCH] changes visualizer to plotly from angular app, finishes tsne. model now built --- magicDeckGenerator/lib/vectorize/main.py | 114 ++++++++++++++++++++++- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/magicDeckGenerator/lib/vectorize/main.py b/magicDeckGenerator/lib/vectorize/main.py index 775cc46..b9a6f64 100644 --- a/magicDeckGenerator/lib/vectorize/main.py +++ b/magicDeckGenerator/lib/vectorize/main.py @@ -6,11 +6,15 @@ import sys import pickle from rake_nltk import Rake -import re +from sklearn.manifold import TSNE +import numpy as np +import plotly.express as px card_path = './models/scryfall-default-cards.json' doc2vec_path = "./models/card_doc2vec" card_vec_path = './models/card_vecs.p' +full_card_path = './models/full_cards.p' +graph_point_path = './models/graphPoints.csv' color_identities = {'': 0, 'C': 0, 'R': 1, 'U': 2, 'G': 3, 'B': 4, 'W': 5, 'RU': 6, 'GR': 7, 'BR': 8, @@ -21,6 +25,16 @@ 'BGRU': 26, 'GRUW': 27, 'BRUW': 28, 'BGRW': 29, 'BGUW': 30, 'BGRUW': 31} +hex_colors = { + 0: 'grey', + 1: 'red', + 2: 'blue', + 3: 'green', + 4: 'black', + 5: 'white', + 10: 'orange' +} + def load_cards(): url = 'https://api.scryfall.com/bulk-data/oracle-cards' @@ -55,6 +69,7 @@ def get_card_rarity(card_data): def convert_str_val_to_num(value): + # 20 is the highest concievable size of weird cards if value == "*": return 20.0 elif value == "1+*" or value == "2+*": @@ -102,6 +117,37 @@ def format_card_data(card_json, card_types, card_rarity): print(e, card['name']) +def set_card_color(card): + color = card['color_identity'] + if color > 5: + color = 10 + return { + 'name': card['name'], + 'color': str(color), + 'x': card['x'], + 'y': card['y'], + 'text': card['text'] + } + + +def create_csv(processed_card_data): + card_mapping = [set_card_color(card) + for card in processed_card_data[:100]] + + fig = px.scatter(card_mapping, x='x', y='y', + color='color', + color_discrete_map={ + '0': 'grey', + '1': 'red', + '2': 'blue', + '3': 'green', + '4': 'black', + '5': 'white', + '10': 'orange' + }, hover_data=['name', 'text']) + + fig.show() + def build_doc2vec_model(cards): corpus = ''.join([card['text'].replace( "//", " ") + " \n" for card in cards]) @@ -110,7 +156,7 @@ def build_doc2vec_model(cards): tags = [] phrases = r.get_ranked_phrases_with_scores() phrases.reverse() - for tag in phrases[:150000]: + for tag in phrases[:100000]: if tag[1] not in tags and tag[0] > 1.0: tags.append(tag[1]) documents = [TaggedDocument(doc['text'].split(" "), tags) @@ -126,23 +172,64 @@ def generate_text_vector(card): return vector -def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█', printEnd="\r"): +def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█', printEnd="\r"): percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) filledLength = int(length * iteration // total) bar = fill * filledLength + '-' * (length - filledLength) print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=printEnd) - # Print New Line on Complete if iteration == total: print() + +def get_card_array(card): + rarity = card['rarity'] + type = card['card_type'] + toughness = card.get('toughness', -1) + power = card.get('power', -1) + identity = card['color_identity'] + text_vec = card['text_vec'] + if (toughness is None): + toughness = -1 + if (power is None): + power = -1 + arr = [rarity, type, toughness, power, identity] + return [y for x in [arr, text_vec] for y in x] + + +def project_cards(cards): + print('Doing TSNE') + alg = TSNE(n_components=2) + card_data = [get_card_array(x) for x in cards] + np_card_data = np.array(card_data) + return alg.fit_transform(np_card_data) + + if __name__ == "__main__": commands = sys.argv + if 'show' in commands: + full_cards = pickle.load(open(full_card_path, "rb")) + vec_cards = pickle.load(open(card_vec_path, "rb")) + filtered = list( + filter(lambda card: card['text_vec'] is not None, vec_cards)) + print(len(full_cards)) + print(full_cards[28655]) + print(len(filtered)) + print(filtered[15803]) + + exit() + + if 'draw' in commands: + full_cards = pickle.load(open(full_card_path, "rb")) + create_csv(full_cards) + exit() + if 'build' in commands: # load_cards() print("Is building") data = get_card_json() + #TODO: Filter cards with no text - they are art cards and are not playable card_types = get_card_type_list(data) card_rarity = get_card_rarity(data) @@ -158,6 +245,7 @@ def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length= if ('build' in commands): print('Building Model') build_doc2vec_model(formatted_cards) + text_vec_cards = formatted_cards # Wipe old vectors if we rebuild model print('Generating text vectors') process_count = 0 @@ -165,12 +253,28 @@ def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length= if text_vec_cards[x]['text_vec'] is None: vector = generate_text_vector(formatted_cards[x]) formatted_cards[x]['text_vec'] = vector + else: + formatted_cards[x] = text_vec_cards[x] if (process_count % 100 == 1): - printProgressBar(prefix=' Progress:', iteration=process_count/100, total=len(formatted_cards)/100) + print_progress_bar( + prefix=' Progress:', iteration=process_count/100, total=len(formatted_cards)/100) pickle.dump(formatted_cards, open(card_vec_path, "wb")) process_count += 1 + if (process_count == len(formatted_cards)): + print_progress_bar( + prefix=' Progress:', iteration=process_count/100, total=len(formatted_cards)/100) + if process_count > 0: # If we processed any, do one extra dump for good measure to capture those missed in the % check pickle.dump(formatted_cards, open(card_vec_path, "wb")) + + tsne_values = project_cards(formatted_cards) + formatted_cards = list(formatted_cards) + for i, c in enumerate(tsne_values): + x, y = c.tolist() + formatted_cards[i]['x'] = x + formatted_cards[i]['y'] = y + + pickle.dump(formatted_cards, open(full_card_path, "wb"))