-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
executable file
·146 lines (127 loc) · 4.61 KB
/
main.py
File metadata and controls
executable file
·146 lines (127 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#! /usr/bin/env python
from getopt import getopt, GetoptError
from pandas import read_csv
import os
import sys
import sqlite3
from rdflib import Graph
def castForType (mapping, val, rowVal):
if mapping[2] == "integer":
print mapping, rowVal
return int (val)
if mapping[2] == "real": return float (val)
if mapping[2] == "text": return str (val)
raise Exception ()
def getDefaultForType (mapping):
if mapping[2] == "integer": return 0
if mapping[2] == "real": return 0.0
if mapping[2] == "text": return ""
raise Exception ()
return None
# TODO hashing offers superior amortized asymptotic complexity
#def getMapping (icol, mappings):
# for ic, ocoln, ocolt in mappings:
# #print icol, ic, ocoln, ocolt
# if icol == ic: return ocoln
# #raise Exception ()
# return None
#def getMapping (rowVal, mappings):
# for mapping in mappings:
def getMapping (rowVals, mapping):
for rowVal in rowVals:
if rowVal[1] == mapping[0]:
return rowVal[0], mapping[1], castForType (mapping, rowVal[2], rowVal)
#raise Exception ()
return None, mapping[1], getDefaultForType (mapping)
def getMappings (rowVals, mappings):
# return map (lambda rv: getMapping (rv, mappings), rowVals)
return map (lambda m: getMapping (rowVals, m), mappings)
def io (inputfile, mapfile, outputfile):
g = Graph ()
g.load (inputfile)
gRows = list ([(str (subj), str (pred), str (obj))
for subj, pred, obj in g])
#obj_set = set (map (lambda t: t[2], gRows)) # cell values
subj_set = set (map (lambda t: t[0], gRows)) # row IDs
#pred_set = set (map (lambda t: t[1], gRows)) # col names
if mapfile: mappings = list ([tuple (x) for x in read_csv (
mapfile, header=None, delim_whitespace=True).to_records (
index=False)])
# TODO pred_set is an awful default value for the Sqlite column names
else:
pred_set = set (map (lambda t: t[1], gRows)) # col names
mappings = zip (pred_set, pred_set, ["text"] * len (pred_set))
#pred_set = pred_set.intersection (map (lambda t: t[0], mappings))
con = sqlite3.connect (outputfile)
with con:
cur = con.cursor ()
cols = map (lambda t: (t[1], t[2]), mappings)
schema = ','.join (["%s %s" % (colName, colType)
for colName, colType in cols])
colNames = ','.join (zip (*cols)[0])
wildcards = ','.join ('?' for _ in cols)
print schema
cur.execute ("CREATE TABLE IF NOT EXISTS inspections(%s)" % schema)
table_insert_string = "INSERT INTO inspections(%s) VALUES (%s)" % (colNames, wildcards)
for rowID in subj_set:
#rowVals = filter (lambda t: t[0] == rowID and t[1] in pred_set, gRows)
rowVals = filter (lambda t: t[0] == rowID and t[1] in map (lambda t: t[0], mappings), gRows)
#pred_objs = map (lambda t: (t[1], t[2]), rowVals)
# TODO ensure correct order
#preds = map (lambda t: (getMapping (t[0], mappings), t[1]),
# pred_objs)
#preds = filter (lambda d: d, preds)
#preds = map (lambda mapping: filter (lambda t: t[0] == mapping[1], pred_objs), mappings)
# TODO wtf
#print preds
#print pred_objs
#print rowVals
#for rowVal in rowVals:
# for mapping in mappings:
# if rowVal[1] == mapping[0]:
# rowVal[0], mapping[0], rowVal[2]
preds = getMappings (rowVals, mappings)
preds = map (lambda t: t[2], preds)
print preds
cur.execute (table_insert_string, preds)
#cur.executeMany (table_insert_string, data)
con.commit ()
def getUsage (argv):
usages = ["%s -i <inputfile.rdf> [-c <map.cnf>] -o <outputfile.sql>" % argv[0],
"%s -i <inputfile.rdf> [-c <map.cnf>] [-e <of_ext>]" % argv[0]]
return '\n'.join (usages)
def printUsage (argv): print getUsage (argv)
def parseArgs (argv):
inputfile, mapfile, outputfile = None, None, None
ofnameext = "sql"
mapfilenameext = "cnf"
try: opts, args = getopt (argv[1:], "hi:o:e:c:",
["help", "if=", "of=", "ofe=", "conf="])
except GetoptError:
printUsage (argv)
raise Exception ()
for opt, arg in opts:
if opt in ("-h", "--help"):
printUsage (argv)
return # None
if opt in ("-i", "--if"): inputfile = arg
elif opt in ("-o", "--of"): outputfile = arg
elif opt in ("-e", "--ofe"): ofnameext = arg
elif opt in ("-c", "--conf"): mapfile = arg
else: assert false
if not inputfile: raise Exception ()
ifname, ifnameext = os.path.splitext (inputfile)
if not outputfile:
outputfile = "%s.%s" % (ifname, ofnameext)
if not mapfile:
mapfilename = "%s.%s" % (ifname, mapfilenameext)
if os.path.isfile (mapfilename): mapfile = mapfilename
return inputfile, mapfile, outputfile
def main (argv):
tmp = parseArgs (argv)
if not tmp: return
inputfile, mapfile, outputfile = tmp
io (inputfile, mapfile, outputfile)
if __name__ == '__main__':
main (sys.argv)
sys.exit ()