Bboatman · Bboatman · Apr 30, 2023
diff --git a/magicDeckGenerator/lib/vectorize/main.py b/magicDeckGenerator/lib/vectorize/main.py
@@ -6,11 +6,15 @@
 import sys
 import pickle
 from rake_nltk import Rake
-import re
+from sklearn.manifold import TSNE
+import numpy as np
+import plotly.express as px
 
 card_path = './models/scryfall-default-cards.json'
 doc2vec_path = "./models/card_doc2vec"
 card_vec_path = './models/card_vecs.p'
+full_card_path = './models/full_cards.p'
+graph_point_path = './models/graphPoints.csv'
 
 color_identities = {'': 0, 'C': 0, 'R': 1, 'U': 2, 'G': 3,
                     'B': 4, 'W': 5, 'RU': 6, 'GR': 7, 'BR': 8,
@@ -21,6 +25,16 @@
                     'BGRU': 26, 'GRUW': 27, 'BRUW': 28, 'BGRW': 29,
                     'BGUW': 30, 'BGRUW': 31}
 
+hex_colors = {
+    0: 'grey',
+    1: 'red',
+    2: 'blue',
+    3: 'green',
+    4: 'black',
+    5: 'white',
+    10: 'orange'
+}
+
 
 def load_cards():
     url = 'https://api.scryfall.com/bulk-data/oracle-cards'
@@ -55,6 +69,7 @@ def get_card_rarity(card_data):
 
 
 def convert_str_val_to_num(value):
+    # 20 is the highest concievable size of weird cards
     if value == "*":
         return 20.0
     elif value == "1+*" or value == "2+*":
@@ -102,6 +117,37 @@ def format_card_data(card_json, card_types, card_rarity):
         print(e, card['name'])
 
 
+def set_card_color(card):
+    color = card['color_identity']
+    if color > 5:
+        color = 10
+    return {
+        'name': card['name'],
+        'color': str(color),
+        'x': card['x'],
+        'y': card['y'],
+        'text': card['text']
+    }
+
+
+def create_csv(processed_card_data):
+    card_mapping = [set_card_color(card)
+                    for card in processed_card_data[:100]]
+
+    fig = px.scatter(card_mapping, x='x', y='y',
+                     color='color',
+                     color_discrete_map={
+                         '0': 'grey',
+                         '1': 'red',
+                         '2': 'blue',
+                         '3': 'green',
+                         '4': 'black',
+                         '5': 'white',
+                         '10': 'orange'
+                     }, hover_data=['name', 'text'])
+
+    fig.show()
+
 def build_doc2vec_model(cards):
     corpus = ''.join([card['text'].replace(
         "//", " ") + " \n" for card in cards])
@@ -110,7 +156,7 @@ def build_doc2vec_model(cards):
     tags = []
     phrases = r.get_ranked_phrases_with_scores()
     phrases.reverse()
-    for tag in phrases[:150000]:
+    for tag in phrases[:100000]:
         if tag[1] not in tags and tag[0] > 1.0:
             tags.append(tag[1])
     documents = [TaggedDocument(doc['text'].split(" "), tags)
@@ -126,23 +172,64 @@ def generate_text_vector(card):
     return vector
 
 
-def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█', printEnd="\r"):
+def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█', printEnd="\r"):
     percent = ("{0:." + str(decimals) + "f}").format(100 *
                                                      (iteration / float(total)))
     filledLength = int(length * iteration // total)
     bar = fill * filledLength + '-' * (length - filledLength)
     print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=printEnd)
-    # Print New Line on Complete
     if iteration == total:
         print()
 
+
+def get_card_array(card):
+    rarity = card['rarity']
+    type = card['card_type']
+    toughness = card.get('toughness', -1)
+    power = card.get('power', -1)
+    identity = card['color_identity']
+    text_vec = card['text_vec']
+    if (toughness is None):
+        toughness = -1
+    if (power is None):
+        power = -1
+    arr = [rarity, type, toughness, power, identity]
+    return [y for x in [arr, text_vec] for y in x]
+
+
+def project_cards(cards):
+    print('Doing TSNE')
+    alg = TSNE(n_components=2)
+    card_data = [get_card_array(x) for x in cards]
+    np_card_data = np.array(card_data)
+    return alg.fit_transform(np_card_data)
+
+
 if __name__ == "__main__":
     commands = sys.argv
+    if 'show' in commands:
+        full_cards = pickle.load(open(full_card_path, "rb"))
+        vec_cards = pickle.load(open(card_vec_path, "rb"))
+        filtered = list(
+            filter(lambda card: card['text_vec'] is not None, vec_cards))
+        print(len(full_cards))
+        print(full_cards[28655])
+        print(len(filtered))
+        print(filtered[15803])
+
+        exit()
+
+    if 'draw' in commands:
+        full_cards = pickle.load(open(full_card_path, "rb"))
+        create_csv(full_cards)
+        exit()
+
     if 'build' in commands:
         # load_cards()
         print("Is building")
 
     data = get_card_json()
+    #TODO: Filter cards with no text - they are art cards and are not playable
     card_types = get_card_type_list(data)
     card_rarity = get_card_rarity(data)
 
@@ -158,19 +245,36 @@ def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=
     if ('build' in commands):
         print('Building Model')
         build_doc2vec_model(formatted_cards)
+        text_vec_cards = formatted_cards  # Wipe old vectors if we rebuild model
 
     print('Generating text vectors')
     process_count = 0
     for x in range(len(formatted_cards)):
         if text_vec_cards[x]['text_vec'] is None:
             vector = generate_text_vector(formatted_cards[x])
             formatted_cards[x]['text_vec'] = vector
+        else:
+            formatted_cards[x] = text_vec_cards[x]
         if (process_count % 100 == 1):
-            printProgressBar(prefix=' Progress:', iteration=process_count/100, total=len(formatted_cards)/100)
+            print_progress_bar(
+                prefix=' Progress:', iteration=process_count/100, total=len(formatted_cards)/100)
             pickle.dump(formatted_cards, open(card_vec_path, "wb"))
 
         process_count += 1
+        if (process_count == len(formatted_cards)):
+            print_progress_bar(
+                prefix=' Progress:', iteration=process_count/100, total=len(formatted_cards)/100)
+
 
     if process_count > 0:
         # If we processed any, do one extra dump for good measure to capture those missed in the % check
         pickle.dump(formatted_cards, open(card_vec_path, "wb"))
+
+    tsne_values = project_cards(formatted_cards)
+    formatted_cards = list(formatted_cards)
+    for i, c in enumerate(tsne_values):
+        x, y = c.tolist()
+        formatted_cards[i]['x'] = x
+        formatted_cards[i]['y'] = y
+
+    pickle.dump(formatted_cards, open(full_card_path, "wb"))