From e67b4f974542314d6b98014aa2a33487159224e0 Mon Sep 17 00:00:00 2001 From: Qisheng Li Date: Tue, 28 Jun 2016 11:28:43 -0500 Subject: [PATCH 1/6] config change --- cartograph/Config.py | 4 ++-- cartograph/MapStyler.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cartograph/Config.py b/cartograph/Config.py index 5cf5582..147f683 100644 --- a/cartograph/Config.py +++ b/cartograph/Config.py @@ -18,9 +18,9 @@ def __init__(self): self.NUM_CLUSTERS = 15 # number of clusters to generate from K-means self.TSNE_THETA = 0.5 # lower values = more accurate maps, but take (much) longer self.TSNE_PCA_DIMENSIONS = None # None indicates not to use PCA first - self.PERCENTAGE_WATER = 0.05 + self.PERCENTAGE_WATER = 0.1 - self.COLORWHEEl = ["#795548", "#FF5722", "#FFC107", "#CDDC39", "#4CAF50", "#009688", "#00BCD4", "#2196F3", "#3F51B5", "#673AB7", "#22375a", "#4bfb29", "#2e2e2e", "#cc6733", "#00deaf"] + self.COLORWHEEL = ["#795548", "#FF5722", "#FFC107", "#CDDC39", "#4CAF50", "#009688", "#00BCD4", "#2196F3", "#3F51B5", "#673AB7", "#22375a", "#4bfb29", "#2e2e2e", "#cc6733", "#00deaf"] # ========== BorderFactory ========== self.MIN_NUM_IN_CLUSTER = 30 # eliminates noise diff --git a/cartograph/MapStyler.py b/cartograph/MapStyler.py index 68e171d..6bd68c8 100644 --- a/cartograph/MapStyler.py +++ b/cartograph/MapStyler.py @@ -63,7 +63,7 @@ def generateSinglePolygonStyle(filename, opacity, color, gamma=1): # ===== Generate Map File ===== def generateCountryPolygonStyle(filename, opacity, clusterIds): - colorWheel = Config.COLORWHEEL + colorWheel = config.COLORWHEEL s = mapnik.Style() for i, c in enumerate(clusterIds): r = mapnik.Rule() @@ -77,7 +77,7 @@ def generateCountryPolygonStyle(filename, opacity, clusterIds): def generateContourPolygonStyle(opacity, numContours, gamma=1): - colorWheel = Config.COLORWHEEL + colorWheel = config.COLORWHEEL s = mapnik.Style() for i in range(config.NUM_CLUSTERS): r = mapnik.Rule() From 342fd34609490f960e16683b8f972cac7be83c36 Mon Sep 17 00:00:00 2001 From: Qisheng Li Date: Wed, 6 Jul 2016 14:06:17 -0500 Subject: [PATCH 2/6] interpolation --- cartograph/Config.py | 37 +++++++++++++++-------- knn.py | 70 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 13 deletions(-) create mode 100644 knn.py diff --git a/cartograph/Config.py b/cartograph/Config.py index 29fbe90..f04ee63 100644 --- a/cartograph/Config.py +++ b/cartograph/Config.py @@ -4,18 +4,21 @@ class Config: def __init__(self): self.FILE_NAME_WIKIBRAIN_VECS = "./data/labdata/vecs.tsv" self.FILE_NAME_WIKIBRAIN_NAMES = "./data/labdata/names.tsv" - self.FILE_NAME_NUMBERED_VECS = "./data/labdata/numberedVecsFull.tsv" - self.FILE_NAME_NUMBERED_NAMES = "./data/labdata/numberedNamesFull.tsv" - self.FILE_NAME_ARTICLE_COORDINATES = "./data/labdata/tsne_cache_full.tsv" - self.FILE_NAME_WATER_AND_ARTICLES = "./data/tsv/water_and_article_coordinates.tsv" - self.FILE_NAME_WATER_CLUSTERS = "./data/tsv/clusters_with_water_pts.tsv" - self.FILE_NAME_NUMBERED_CLUSTERS = "./data/tsv/numberedClusters.tsv" - self.FILE_NAME_KEEP = "./data/tsv/keep.tsv" - self.FILE_NAME_POPULARITY = "./data/labdata/article_pageview_full.tsv" - self.FILE_NAME_NUMBERED_POPULARITY = "./data/tsv/popularity_with_id.tsv" + + self.FILE_NAME_NUMBERED_VECS = "./data/labdata/interest_vecs.tsv" + self.FILE_NAME_NUMBERED_NAMES = "./data/labdata/interest_names.tsv" + + self.FILE_NAME_ARTICLE_COORDINATES = "./data/interpolation/interest_interpolated_coordinates.tsv" + #self.FILE_NAME_ARTICLE_COORDINATES = "./data/labdata/tsne_cache_interest_interpolated.tsv" + self.FILE_NAME_WATER_AND_ARTICLES = "./data/tsv/water_and_article_coordinates_interest.tsv" + self.FILE_NAME_WATER_CLUSTERS = "./data/tsv/clusters_with_water_pts_interest.tsv" + self.FILE_NAME_NUMBERED_CLUSTERS = "./data/tsv/numberedClusters_interest.tsv" + self.FILE_NAME_KEEP = "./data/tsv/keep_interest.tsv" + self.FILE_NAME_POPULARITY = "./data/labdata/interest_pageview.tsv" + self.FILE_NAME_NUMBERED_POPULARITY = "./data/tsv/popularity_with_id_interest.tsv" self.FILE_NAME_SCALE_DENOMINATORS = "./data/labdata/scale_denominators.tsv" - self.NUM_CLUSTERS = 15 # number of clusters to generate from K-means + self.NUM_CLUSTERS = 4 # number of clusters to generate from K-means self.TSNE_THETA = 0.5 # lower values = more accurate maps, but take (much) longer self.TSNE_PCA_DIMENSIONS = None # None indicates not to use PCA first self.PERCENTAGE_WATER = 0.1 @@ -24,14 +27,16 @@ def __init__(self): # ========== BorderFactory ========== self.MIN_NUM_IN_CLUSTER = 30 # eliminates noise - self.BLUR_RADIUS = 5 # defines size of neighborhood for blurring + self.BLUR_RADIUS = 4 # defines size of neighborhood for blurring # ========== mapGenerator ========== self._localTiles = "./data/tiles/" self._serverTiles = "/var/www/html/tiles/" self.DIRECTORY_NAME_TILES = self._localTiles - self.FILE_NAME_REGION_NAMES = "./data/labdata/top_categories_full.tsv" - self.FILE_NAME_IMGNAME = "./data/images/world" + + self.FILE_NAME_REGION_NAMES = "./data/labdata/top_categories_interest.tsv" + self.FILE_NAME_IMGNAME = "./data/images/world_interest" + self.FILE_NAME_IMGDOT = "./data/labdata/blackDot.png" self.FILE_NAME_COUNTRIES = "./data/geojson/countries.geojson" self.FILE_NAME_CONTOUR_DATA = "./data/geojson/contourData.geojson" @@ -41,6 +46,12 @@ def __init__(self): self.FILE_NAME_TOP_TITLES = "./data/geojson/top_100_articles.geojson" + # ========== Interpolation ========== + self.FILE_NAME_MORE_VECS = "./data/labdata/interest_vecs.tsv" + self.FILE_NAME_MORE_NAMES = "./data/labdata/interest_names.tsv" + self.FILE_NAME_MORE_COORDINATES = "./data/interpolation/interest_interpolated_coordinates.tsv" + + __config = Config() diff --git a/knn.py b/knn.py new file mode 100644 index 0000000..50d8125 --- /dev/null +++ b/knn.py @@ -0,0 +1,70 @@ +import luigi +import os +import cartograph +import numpy as np +from sklearn.neighbors import KDTree +from sklearn.metrics.pairwise import cosine_similarity +from pprint import pprint + +from cartograph import Config +from cartograph import Util +config = Config.BAD_GET_CONFIG() + + + + +peopleDict = Util.read_features(config.FILE_NAME_NUMBERED_VECS, #config people + config.FILE_NAME_NUMBERED_NAMES, + config.FILE_NAME_ARTICLE_COORDINATES) +peopleKeys = list(peopleDict.keys()) +peopleVectors = np.array([peopleDict[vID]["vector"] for vID in peopleKeys]) +peopleNames = [peopleDict[nID]["name"] for nID in peopleKeys] +x = [float(peopleDict[fID]["x"]) for fID in peopleKeys] +y = [float(peopleDict[fID]["y"]) for fID in peopleKeys] + + + +interestDict = Util.read_features(config.FILE_NAME_MORE_VECS, + config.FILE_NAME_MORE_NAMES) +interestKeys = list(interestDict.keys()) +interestVectors = np.array([interestDict[vID]["vector"] for vID in interestKeys]) +interestNames = [interestDict[nID]["name"] for nID in interestKeys] + + +kdt = KDTree(peopleVectors, leaf_size=30) + + +x_lst = [] +y_lst = [] +knn_dict = {} +for i in range(len(interestVectors)): + dist, ind = kdt.query([interestVectors[i]], k=5) + temp_x_lst=[] + temp_y_lst=[] + temp_name_lst = [] + weights = [] + for j in ind[0]: + temp_x_lst.append(x[j]) + temp_y_lst.append(y[j]) + temp_name_lst.append(peopleNames[j]) + weights.append(float(cosine_similarity([interestVectors[i]], [peopleVectors[j]]))) #assume cosine similarity >0 + #print weights + #weights = [1,1,1,1,1] + x_lst.append(np.average(temp_x_lst, axis = 0, weights = weights)) + y_lst.append(np.average(temp_y_lst, axis = 0, weights = weights)) + knn_dict[interestNames[i]] = temp_name_lst + + +Util.write_tsv(config.FILE_NAME_MORE_COORDINATES, + ("index", "x", "y"), interestKeys, x_lst, y_lst) + + +#pprint(knn_dict) + + + + + + + +#write coordinates to FILE_NAME_MORE_COORDINATES \ No newline at end of file From 55559ca3925947aaa9cbd63e2c2304016b106c90 Mon Sep 17 00:00:00 2001 From: Qisheng Li Date: Wed, 6 Jul 2016 15:38:24 -0500 Subject: [PATCH 3/6] fix bug --- cartograph/MapStyler.py | 118 ++-------------------------------------- 1 file changed, 6 insertions(+), 112 deletions(-) diff --git a/cartograph/MapStyler.py b/cartograph/MapStyler.py index 1d2057b..49265ef 100644 --- a/cartograph/MapStyler.py +++ b/cartograph/MapStyler.py @@ -3,12 +3,12 @@ class MapStyler: - def __init__(self, config): - self.numClusters = config.getint("PreprocessingConstants", "num_clusters") - self.colorWheel = config.get("MapData", "colorwheel")[2:-2].split("', '") - self.width = config.getint("MapConstants", "map_width") - self.height = config.getint("MapConstants", "map_height") + def __init__(self, numClusters, colorWheel, width=800, height=600): + self.numClusters = numClusters self.m = None + self.width = width + self.height = height + self.colorWheel = colorWheel d = 3000000 self.extents = mapnik.Box2d(-d, -d, d, d) @@ -23,28 +23,6 @@ def makeMap(self, contourFilename, countryFilename, clusterIds): for feat in jsContour['features']: numContours[feat['properties']['clusterNum']] += 1 -<<<<<<< HEAD - self.m.append_style("countries", generateCountryPolygonStyle(countryFilename, 1.0, clusterIds)) - self.m.layers.append(generateLayer(countryFilename, "countries", ["countries"])) - - styles = generateContourPolygonStyle(1.0, numContours, clusterIds) - sNames = [] - for i, s in enumerate(styles): - name = "contour" + str(i) - self.m.append_style(name, s) - sNames.append(name) - self.m.layers.append(generateLayer(contourFilename, "contour", sNames)) - - self.m.append_style("outline", generateLineStyle("#999999", 1.0, '3,3')) - self.m.layers.append(generateLayer(countryFilename, "outline", ["outline"])) - - # self.m.append_style("outline", generateLineStyle("#999999", 1.0, '3,3')) - # self.m.layers.append(generateLayer(contourFilename, "outline", ["outline"])) - - #extent = mapnik.Box2d(-180.0, -180.0, 90.0, 90.0) - #print(extent) - #self.m.zoom_to_box(self.extents) -======= self.m.append_style("countries", self.generateCountryPolygonStyle(countryFilename, .20, clusterIds)) @@ -66,7 +44,6 @@ def makeMap(self, contourFilename, countryFilename, clusterIds): # extent = mapnik.Box2d(-180.0, -180.0, 90.0, 90.0) # print(extent) # self.m.zoom_to_box(self.extents) ->>>>>>> 4db0d38a3ac41fe540000c1d3695c7b6080c0fdd self.m.zoom_all() # print(self.m.envelope()) @@ -83,97 +60,15 @@ def saveImage(self, mapFilename, imgFilename): self.m.zoom_all() mapnik.render_to_file(self.m, imgFilename) -<<<<<<< HEAD - -""" -TODO: move the functions below into the above class -""" - - -def generateSinglePolygonStyle(filename, opacity, color, gamma=1): - s = mapnik.Style() - r = mapnik.Rule() - symbolizer = mapnik.PolygonSymbolizer() - symbolizer.fill = mapnik.Color('steelblue') - symbolizer.fill_opacity = opacity - symbolizer.gamma = gamma - r.symbols.append(symbolizer) - s.rules.append(r) - return s - - -# ===== Generate Map File ===== -def generateCountryPolygonStyle(filename, opacity, clusterIds): - babyColors = ["#fef7f8", "#76e696", "#ca6dec", "#ade095", "#aba5f8", - "#c4ff0c", "#d9c8ff", "#00d833", "#fec3ff", "#d6e200", - "#d5d6ff", "#ff9942", "#2678ff", "#ffaf98", "#46a2fd", - "#ff2b3b", "#02fac8", "#ff9ae3", "#b5e3c4", "#ff30e7"] - - s = mapnik.Style() - for i, c in enumerate(clusterIds): - r = mapnik.Rule() - symbolizer = mapnik.PolygonSymbolizer() - symbolizer.fill = mapnik.Color(babyColors[i]) -======= def generateSinglePolygonStyle(self, filename, opacity, color, gamma=1): s = mapnik.Style() r = mapnik.Rule() symbolizer = mapnik.PolygonSymbolizer() symbolizer.fill = mapnik.Color('steelblue') ->>>>>>> 4db0d38a3ac41fe540000c1d3695c7b6080c0fdd symbolizer.fill_opacity = opacity symbolizer.gamma = gamma r.symbols.append(symbolizer) s.rules.append(r) -<<<<<<< HEAD - return s - - -def generateContourPolygonStyle(opacity, numContours, clusterIds, gamma=1): - colorWheel = config.COLORWHEEL - color = ["#f19daa", "#26cf58", "#a51cd7", "#70c946", "#5346f1", - "#7da400", "#9561ff", "#00711b", "#fd5cff", "#757b00", - "#6e76ff", "#da6500", "#0048be", "#ff6031", "#026fdc", - "#c3000f"] - styles = [] - for i in range(config.NUM_CLUSTERS): - for j in range(numContours[i]): - s = mapnik.Style() - r = mapnik.Rule() - symbolizer = mapnik.PolygonSymbolizer() - l = color[i] - symbolizer.fill = mapnik.Color(colorWheel[l][j]) - symbolizer.fill_opacity = opacity - symbolizer.gamma = gamma - r.symbols.append(symbolizer) - r.filter = mapnik.Expression('[identity].match("' + str(j) + str(i) + '")') - s.rules.append(r) - styles.append(s) - return styles - - -def generateLineStyle(color, opacity, dash=None): - s = mapnik.Style() - r = mapnik.Rule() - symbolizer = mapnik.LineSymbolizer() - symbolizer.stroke = mapnik.Color(color) - symbolizer.stroke_opacity = opacity - if dash: - symbolizer.stroke_dasharray = dash - r.symbols.append(symbolizer) - s.rules.append(r) - return s - - -def generateLayer(jsonFile, name, styleNames): - ds = mapnik.GeoJSON(file=jsonFile) - layer = mapnik.Layer(name) - layer.datasource = ds - for s in styleNames: - layer.styles.append(s) - layer.srs = '+init=epsg:4236' - return layer -======= return s def generateCountryPolygonStyle(self, filename, opacity, clusterIds): @@ -219,5 +114,4 @@ def generateLayer(self, jsonFile, name, styleName): layer.datasource = ds layer.styles.append(styleName) layer.srs = '+init=epsg:4236' - return layer ->>>>>>> 4db0d38a3ac41fe540000c1d3695c7b6080c0fdd + return layer \ No newline at end of file From 889b2c2f01361dd9c2fa3516f9db0c902537a1b5 Mon Sep 17 00:00:00 2001 From: Qisheng Li Date: Thu, 7 Jul 2016 10:17:57 -0500 Subject: [PATCH 4/6] knn --- knn.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/knn.py b/knn.py index 50d8125..c9d9603 100644 --- a/knn.py +++ b/knn.py @@ -12,7 +12,6 @@ - peopleDict = Util.read_features(config.FILE_NAME_NUMBERED_VECS, #config people config.FILE_NAME_NUMBERED_NAMES, config.FILE_NAME_ARTICLE_COORDINATES) @@ -36,7 +35,7 @@ x_lst = [] y_lst = [] -knn_dict = {} +#knn_dict = {} for i in range(len(interestVectors)): dist, ind = kdt.query([interestVectors[i]], k=5) temp_x_lst=[] @@ -47,12 +46,12 @@ temp_x_lst.append(x[j]) temp_y_lst.append(y[j]) temp_name_lst.append(peopleNames[j]) - weights.append(float(cosine_similarity([interestVectors[i]], [peopleVectors[j]]))) #assume cosine similarity >0 + weights.append(float(cosine_similarity([interestVectors[i]], [peopleVectors[j]]))) #cosine similarity >0 #print weights #weights = [1,1,1,1,1] x_lst.append(np.average(temp_x_lst, axis = 0, weights = weights)) y_lst.append(np.average(temp_y_lst, axis = 0, weights = weights)) - knn_dict[interestNames[i]] = temp_name_lst + #knn_dict[interestNames[i]] = temp_name_lst Util.write_tsv(config.FILE_NAME_MORE_COORDINATES, @@ -61,10 +60,3 @@ #pprint(knn_dict) - - - - - - -#write coordinates to FILE_NAME_MORE_COORDINATES \ No newline at end of file From 8075e83f267492652c34a0cf041f0a7a077e0871 Mon Sep 17 00:00:00 2001 From: Qisheng Li Date: Thu, 7 Jul 2016 12:00:05 -0500 Subject: [PATCH 5/6] upate directory --- .gitignore | 6 ++++++ conf.txt | 1 - workflow.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) delete mode 100644 conf.txt diff --git a/.gitignore b/.gitignore index 3744002..652dd86 100644 --- a/.gitignore +++ b/.gitignore @@ -6,8 +6,12 @@ /data/geojson /data/tiles /data/labdata.key +/data/macademia_people map.xml +*.geojson +*.geoJSON + .DS_Store /data/.DS_Store /install/.DS_Store @@ -15,3 +19,5 @@ map.xml __pycache__ *.pyc + +conf.txt \ No newline at end of file diff --git a/conf.txt b/conf.txt deleted file mode 100644 index e4b31eb..0000000 --- a/conf.txt +++ /dev/null @@ -1 +0,0 @@ -[PreprocessingConstants] diff --git a/workflow.py b/workflow.py index 4d6e33d..c38681d 100644 --- a/workflow.py +++ b/workflow.py @@ -187,7 +187,7 @@ def run(self): name = featureDict[featureID]["name"] popularityList.append(nameDict[name]) - Util.write_tsv(config.FILE_NAME_NUMBERED_POPULARITY, + Util.write_tsv(config.get("PreprocessingFiles", "popularity_with_id"), ("id", "popularity"), idList, popularityList) From f032219e9bf71a0e606d8c1f35e807c7b8eac264 Mon Sep 17 00:00:00 2001 From: Jaco Date: Thu, 7 Jul 2016 14:35:12 -0500 Subject: [PATCH 6/6] Fixed bad use of config in t-SNE things in workflow. --- .gitignore | 4 ---- workflow.py | 3 +-- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index efa75d5..5797177 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,4 @@ map.xml __pycache__ *.pyc -<<<<<<< HEAD conf.txt -======= -conf.txt ->>>>>>> 3eb884c2b023e0f719358b1e506adb67f28de185 diff --git a/workflow.py b/workflow.py index a5347ca..e7eb191 100644 --- a/workflow.py +++ b/workflow.py @@ -287,7 +287,7 @@ def run(self): X = [float(points[k]['x']) for k in keys] Y = [float(points[k]['y']) for k in keys] maxVal = max(abs(v) for v in X + Y) - scaling = config.MAX_COORDINATE / maxVal + scaling = config.get("MapConstants", "max_coordinate") / maxVal X = [x * scaling for x in X] Y = [y * scaling for y in Y] Util.write_tsv(config.get("PreprocessingFiles", @@ -434,7 +434,6 @@ def run(self): range(1, len(regionList) + 1), regionList) - class CreateContours(MTimeMixin, luigi.Task): ''' Make contours based on density of points inside the map