修訂 | e35f47f183c1106e612fb39c68506c718322d8de (tree) |
---|---|
時間 | 2009-11-27 02:16:07 |
作者 | lorenzo |
Commiter | lorenzo |
I introduced numpy to load/save files and changed the function to hash the
data (I mahe sure that in A--B contacts, A and B are sorted).
However, this was probably done already before (new results equal to the
old ones).
@@ -354,6 +354,9 @@ | ||
354 | 354 | # my_hash(arr) >> 32 to get back the 1st argument |
355 | 355 | # my_hash(arr) & 0xFFFFFFFF to get back the second argument |
356 | 356 | |
357 | + arr=s.sort(arr) #maybe this is not needed at all; the rows of sliced data (which are arr) | |
358 | + #are already sorted out elsewhere. | |
359 | + | |
357 | 360 | return (arr[0] << 32) | arr[1] |
358 | 361 | |
359 | 362 |
@@ -378,6 +381,8 @@ | ||
378 | 381 | #then, instead of looking for couples of interacting tags, I will simply look for repeated |
379 | 382 | #occurrences of a number |
380 | 383 | |
384 | + n.savetxt("hashed_edge_list.dat",hash_and_time, fmt="%d") | |
385 | + | |
381 | 386 | return hash_and_time |
382 | 387 | |
383 | 388 | def detect_binary_contacts_among_same_two_tags(hash_and_time,\ |
@@ -457,11 +462,11 @@ | ||
457 | 462 | #sliced_interactions=p.load("sliced_dynamics.dat") |
458 | 463 | #sliced_interactions=p.load("edge_list.dat") |
459 | 464 | |
460 | -t_ini=1240092590 # 86400 #i.e. one day in seconds | |
465 | +t_ini=1257894000 # 86400 #i.e. one day in seconds | |
461 | 466 | |
462 | 467 | number_intervals=1 |
463 | 468 | interval_duration= -86400 #i.e. 1 day #604800 #i.e. one week in seconds |
464 | -t_end=-1246492820 #i.e. I choose when to end the time grid | |
469 | +t_end=1257894000+86400 #i.e. I choose when to end the time grid | |
465 | 470 | #whereas with a negative value of time_end everything is as it used to be. |
466 | 471 | |
467 | 472 |
@@ -3,7 +3,7 @@ | ||
3 | 3 | import pylab as p |
4 | 4 | import numpy as n |
5 | 5 | import sys |
6 | -import string | |
6 | + | |
7 | 7 | |
8 | 8 | |
9 | 9 | def contact_duration_and_interval_single_couple(time_list, delta_slice): |
@@ -13,8 +13,7 @@ | ||
13 | 13 | print "The chosen couple does not exist hence no analysis can be performed on it" |
14 | 14 | return |
15 | 15 | |
16 | - # delta_slice=delta_slice.astype("uint64") #I do not need floating point arithmetic | |
17 | - #(maybe I do not need this conversion at all, but it should not do any harm) | |
16 | + delta_slice=int(delta_slice) #I do not need floating point arithmetic | |
18 | 17 | |
19 | 18 | time_list=(time_list-time_list[0])/delta_slice |
20 | 19 | gaps=s.diff(time_list) #a bit more efficient than the line above |
@@ -149,7 +148,7 @@ | ||
149 | 148 | |
150 | 149 | |
151 | 150 | |
152 | - # delta_slice=delta_slice.astype("uint64") #I do not need floating point arithmetic | |
151 | + delta_slice=int(delta_slice) #I do not need floating point arithmetic | |
153 | 152 | |
154 | 153 | single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice |
155 | 154 | gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above |
@@ -276,57 +275,46 @@ | ||
276 | 275 | |
277 | 276 | #overall_gaps=overall_gaps[s.where(overall_gaps !=0)] |
278 | 277 | #overall_duration=overall_duration[s.where(overall_duration !=0)] |
279 | - p.save("many_tags_contact_interval_distr2.dat", overall_gaps , fmt='%d') | |
280 | - p.save("many_tags_contact_duration_distr2.dat", overall_duration , fmt='%d') | |
278 | + n.savetxt("many_tags_contact_interval_distr2.dat", overall_gaps , fmt='%d') | |
279 | + n.savetxt("many_tags_contact_duration_distr2.dat", overall_duration , fmt='%d') | |
281 | 280 | |
282 | 281 | return overall_duration, overall_gaps |
283 | 282 | |
283 | +# def my_hash(arr): | |
284 | +# my_hash=hash((arr[0], arr[1])) | |
285 | + | |
286 | +# return my_hash | |
287 | + | |
288 | + | |
284 | 289 | def my_hash(arr): #this will operate on a similar function which |
285 | 290 | #has already combined tag_id and bootcount (both taking 16 bites each) |
286 | 291 | |
287 | 292 | # my_hash(arr) >> 32 to get back the 1st argument |
288 | 293 | # my_hash(arr) & 0xFFFFFFFF to get back the second argument |
289 | 294 | |
295 | + arr=s.sort(arr) #maybe this is not needed at all; the rows of sliced data (which are arr) | |
296 | + #are already sorted out elsewhere. | |
297 | + | |
290 | 298 | return (arr[0] << 32) | arr[1] |
291 | 299 | |
292 | 300 | |
301 | + | |
293 | 302 | def couple_hash_table(sliced_data): |
294 | - hash_list=s.zeros(2*len(sliced_data)).reshape((len(sliced_data),2)).astype("int64") | |
295 | - | |
296 | - print "len(sliced_data) is, ",len(sliced_data) | |
297 | - | |
298 | - for m in xrange(len(sliced_data)): | |
299 | - # temp_arr=sliced_data[m,1:3 ] | |
300 | - # my_val=my_hash(temp_arr) | |
301 | - hash_list[m,0]=sliced_data[m, 0] | |
302 | - hash_list[m,1]=my_hash(sliced_data[m,1:3 ]) | |
303 | + hash_list=s.arange(len(sliced_data)) | |
303 | 304 | |
304 | - # if (m==3000): | |
305 | - | |
306 | - | |
307 | - # print "m is, ", m | |
308 | - # print "my_val is, ", my_val | |
309 | - # print "temp_arr is, ", temp_arr | |
310 | - # print "hence the position in emacs is, ", m+1 | |
311 | - # print "sliced_data[i,1 ], sliced_data[i,2 ] are, ", sliced_data[m,1 ], sliced_data[m,2 ] | |
312 | - # print "my_hash(sliced_data[i,1:3 ] is, ", my_hash(sliced_data[m,1:3 ]) | |
313 | - # print "my_hash(temp_arr) is, ", my_hash(temp_arr) | |
314 | - # print "hash_list[i,1] is,", hash_list[m,1] | |
315 | - | |
305 | + for i in xrange(len(sliced_data)): | |
306 | + hash_list[i]=my_hash(sliced_data[i,1:3 ]) | |
307 | + | |
316 | 308 | |
317 | - # hash_and_time=s.transpose(s.vstack((sliced_data[:, 0],hash_list))).astype("uint64") | |
309 | + hash_and_time=s.transpose(s.vstack((sliced_data[:, 0],hash_list))) | |
318 | 310 | |
319 | - hash_list=hash_list.astype("int64") | |
320 | - | |
321 | - p.save("hash_and_time.dat", hash_list,fmt='%d') | |
311 | + n.savetxt("hash_and_time.dat", hash_and_time,fmt='%d') | |
322 | 312 | |
323 | 313 | #the aim of this function is to identify each reported binary contact with a single number. |
324 | 314 | #then, instead of looking for couples of interacting tags, I will simply look for repeated |
325 | 315 | #occurrences of a number |
326 | 316 | |
327 | - print "end of rehashing" | |
328 | - | |
329 | - return hash_list | |
317 | + return hash_and_time | |
330 | 318 | |
331 | 319 | def detect_binary_contacts_among_same_two_tags(hash_and_time, delta_slice): |
332 | 320 |
@@ -363,26 +351,28 @@ | ||
363 | 351 | chosen_couple_contact_interval)) |
364 | 352 | |
365 | 353 | |
366 | - p.save("couple_interaction_duration_times.dat", couple_interaction_duration,fmt='%d') | |
354 | + n.savetxt("couple_interaction_duration_times.dat", couple_interaction_duration,fmt='%d') | |
367 | 355 | |
368 | - p.save("couple_interaction_interval_times.dat", couple_interaction_interval,fmt='%d') | |
356 | + n.savetxt("couple_interaction_interval_times.dat", couple_interaction_interval,fmt='%d') | |
369 | 357 | |
370 | 358 | |
371 | 359 | return couple_interaction_duration |
372 | 360 | |
373 | 361 | |
374 | -def remove_self_loops_and_save(slice_lump): | |
362 | +def remove_self_loops_and_save(data_file): | |
363 | + slice_lump=n.loadtxt(data_file) | |
364 | + slice_lump=slice_lump.astype("int") | |
375 | 365 | #Now get rid of the self-loops |
376 | - sel=s.where(slice_lump[:,1]!=slice_lump[:,2])[0] | |
366 | + sel=s.where(slice_lump[:,1]!=slice_lump[:,2]) | |
377 | 367 | |
378 | 368 | |
379 | 369 | |
380 | - slice_lump_no_loops=slice_lump[sel,:] #again, careful since this can be tricky! | |
370 | + slice_lump_no_loops=slice_lump[sel,:][0] #again, careful since this can be tricky! | |
381 | 371 | |
382 | - p.save("sliced_dynamics.dat",slice_lump_no_loops, fmt='%d') | |
372 | + n.savetxt("sliced_dynamics.dat",slice_lump_no_loops, fmt='%d') | |
383 | 373 | |
384 | 374 | |
385 | - return (slice_lump_no_loops.astype("int64")) | |
375 | + return 0 | |
386 | 376 | |
387 | 377 | |
388 | 378 |
@@ -395,24 +385,14 @@ | ||
395 | 385 | #sliced_interactions=p.load("sliced_dynamics.dat") |
396 | 386 | #sliced_interactions=p.load("edge_list.dat") |
397 | 387 | |
398 | - | |
399 | -f = open(sys.argv[1]) | |
400 | -sliced_interactions = [map(int, string.split(line)) for line in f.readlines()] | |
401 | -f.close() | |
402 | - | |
403 | -sliced_interactions = s.array(sliced_interactions, dtype="int64") | |
388 | +for arg in sys.argv[1:]: | |
389 | + sliced_interactions=n.loadtxt(arg) | |
404 | 390 | |
405 | - | |
406 | - | |
407 | -# sliced_interactions=sliced_interactions.astype("int") | |
408 | - | |
409 | -sliced_interactions=remove_self_loops_and_save(sliced_interactions) | |
410 | - | |
411 | -hash_and_time=couple_hash_table(sliced_interactions) | |
412 | - | |
391 | +sliced_interactions=sliced_interactions.astype("int") | |
413 | 392 | |
414 | 393 | contact_duration_and_interval_many_tags(sliced_interactions, delta_slice) |
415 | 394 | |
395 | +hash_and_time=couple_hash_table(sliced_interactions) | |
416 | 396 | |
417 | 397 | |
418 | 398 | detect_binary_contacts_among_same_two_tags(hash_and_time, delta_slice) |