1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245:
246:
247:
248:
249:
250:
251:
252:
253:
254:
255:
256:
257:
258:
259:
260:
261:
262:
263:
264:
265:
266:
267:
268:
269:
270:
271:
272:
273:
274:
275:
276:
277:
278:
279:
280:
281:
282:
283:
284:
285:
286:
287:
288:
289:
290:
291:
292:
293:
294:
295:
296:
297:
298:
|
import csv
inputFname = 'N-x-N-2.csv'
class Point:
def __init__(self, compoundId, index):
lst = compoundId.split('.')
assert len(lst) == 2
self.id = int(lst[0])
self.pref = int(lst[1])
self.index = index
def __str__(self):
'''Human readable representation of the point.'''
return str(self.id) + '.' + str(self.pref)
def cmpPoints(a, b):
if a.pref < b.pref:
return -1
elif a.pref > b.pref:
return +1
assert a.pref == b.pref
if a.id < b.id:
return -1
elif a.id > b.id:
return +1
else:
return 0
def buildMatrices(fname, distance):
fin = open(fname, 'rb')
reader = csv.reader(fin)
# Get the line with point identifications, build the point-object list.
idlst = reader.next()
idlst = idlst[1:]
points = [ Point(e, n) for n, e in enumerate(idlst) ]
mN = [] # the numeric matrix as the list of lists
mB = [] # the boolean matrix as the list of lists
for row in reader:
lstN = [] # next row of the numeric matrix
lstB = [] # next row of the boolean matrix
for e in row[1:]:
if e == '':
n = 0.0
else:
n = float(e)
lstN.append(n)
lstB.append(n <= distance) # also the reflexivity
mN.append(lstN)
mB.append(lstB)
fin.close()
return mN, mB, points
def subMatrix(mSrc, points):
# The mSrc is full matrix (can be both boolean or numeric).
# Consider only the rows and columns indexed by the points.
# The list of points is subset of the full list of points.
idxLst = [ p.index for p in points ]
m = [] # the matrix as the list of lists
for rowIdx in idxLst:
lst = [] # row reduced to points columns
row = mSrc[rowIdx]
for colIdx in idxLst:
lst.append(mSrc[rowIdx][colIdx])
m.append(lst)
return m
def copyMatrix(src):
dst = [] # the matrix as the list of lists
for row in src:
dst.append(row[:]) # append copy of the list
return dst
def transitiveClosure(m):
'''Transitive closure of the boolean matrix (in situ).'''
n = len(m)
rlst = range(n)
changed = True
while changed:
changed = False
for k in rlst:
for i in rlst:
for j in rlst:
value = m[i][j] or (m[i][k] and m[k][j])
if value != m[i][j]:
m[i][j] = value
changed = True
return m
def boolMatrixToFile(m, f):
# f ... text file open for writing
for row in m:
lst = []
for e in row:
if e:
lst.append('@')
else:
lst.append('-')
f.write(''.join(lst) + '\n')
def separateSinglesFromIslands(m, points):
# The points have to be related to bool matrix m.
# The m is expected to be the full matrix.
singles = [] # list of single point islands
others = [] # list of points that are parts of islands (indices)
for idx, row in enumerate(m):
singleDetected = True # optimistic initialization
for i, v in enumerate(row):
if v and idx != i: # if related to another point
singleDetected = False # cannot be a single
break # shortcut evaluation
if singleDetected:
singles.append(points[idx])
else:
others.append(points[idx])
return singles, others
def getIslands(m, points):
# The points list must be related to the boolean matrix m.
# The m is expected to contain the transitive closure.
processed = set() # set of processed point indices
islands = [] # list of lists (one list, one island of more points)
for idx, row in enumerate(m):
if idx in processed:
continue # already processed point (part of some island)
lst = [] # init -- the island with no points
for i, v in enumerate(row):
if v:
lst.append(points[i]) # point of the island
processed.add(i) # the point processed
if len(lst) > 1:
islands.append(lst) # another island
return islands
def singlesToFile(points, fname):
fout = open(fname, 'w')
lst = [ str(p) for p in points ]
fout.write('Singles: %s\n\n' % ', '.join(lst))
fout.close()
def islandInfoStr(n, isle):
lst = [ str(p) for p in isle ]
return 'Island %s: %s' % (n, ', '.join(lst))
def islandsToFile(islands, fname):
fout = open(fname, 'w')
for n, isle in enumerate(islands):
fout.write(islandInfoStr(n, isle))
fout.write('\n')
fout.close()
def subsolutionGenerator(points, m):
# This is the generator that recursively searches
# for the subsolutions related to the starting points.
# and returns (yields) them.
# The m must be full boolean matrix that can be indexed
# by pt.index.
# Quit the recursion if there is nothing to process.
# Return the empty list of subsolutions in such case.
if len(points) == 0:
yield []
return
# Get the list of startpoints. They have all the same
# preference as the first point (the list was sorted earlier).
pt = points[0]
pref = pt.pref
startLst = [ p for p in points if p.pref == pref ]
# If the startpoint "consumes" another startpoint in its
# subsolutions, then the consumed points cannot be used as
# starting points as they would produce the non-unique subsolutions.
# The detection will be done through the set of consumed
# starting points. This holds only on the same level of
# preference, i.e. for this recursive level.
consumed = set()
# Now start the recursive solution from each of the non-consumed
# starting points.
for startPt in startLst:
if startPt.id in consumed:
continue # skip the consumed starting point
# The starting point was not consumed but it is going to be
# consumed in this loop. It must not be a candidate for the
# recursive call.
consumed.add(startPt.id)
# Get the next candidates and recursively compute
# subsolutions of the candidates. Then yield the concatenation
# of the startpoint with the subsolutions of the candidates.
# The non-candidates are simply thrown away, because they cannot
# be part of the subsolution with the given starting point.
# The order of candicates must be preserved.
candidates = []
for pt in points:
if pt.id in consumed:
continue # the consumed point cannot be the candidate
if not m[startPt.index][pt.index]: # if the points do not touch...
candidates.append(pt)
# Get the subsolutions of the candidates recursively
# and concatenate it with the starting point. If the subsolution
# contains any of the starting points, they must be added to the
# consumed set. The detection is done via the same preference
# as the stating point has.
for subLst in subsolutionGenerator(candidates, m):
for p in subLst:
if p.pref == startPt.pref:
consumed.add(p.id) # starting point consumed by the subsolution
else:
break # the other points cannot have the same pref
# Yield the next subsolution.
yield [ startPt ] + subLst
def islandSubsolutions(n, isle, mBFull):
# n is just a number to identify the island in the output.
# isle is list of points.
# mBFull is the full boolean matrix.
fname = 'isle%02d.txt' % n
fout = open(fname, 'w')
# Display the non-reduced island.
fout.write(islandInfoStr(n, isle))
fout.write('\n')
# Display the boolean matrix for the island.
m = subMatrix(mBFull, isle)
boolMatrixToFile(m, fout)
fout.write('\n')
# Now, get the subsolutions for the island. The points
# must be sorted to process the points with higher preference
# (i.e. smaller .n) earlier.
isle.sort(cmpPoints)
for x, subLst in enumerate(subsolutionGenerator(isle, mBFull)):
fout.write('Subsolution %d: ' % x)
idLst = [ str(pt) for pt in subLst ]
fout.write(', '.join(idLst))
fout.write('\n')
fout.close()
if __name__ == '__main__':
distance = 16100
mN, mB, points = buildMatrices(inputFname, distance)
singles, others = separateSinglesFromIslands(mB, points)
singlesToFile(singles, 'singles.txt')
m1 = subMatrix(mB, others)
m2 = transitiveClosure(copyMatrix(m1))
islands = getIslands(m2, others)
islandsToFile(islands, 'islands.txt')
for n, isle in enumerate(islands):
islandSubsolutions(n, isle, mB)
|