OSDN > 開發者 > larry77 > 工作室 > myprojects-hg-reborn > File Details
myprojects-hg-reborn
Fork

(Original repository, No fork origin)
File Info

修訂.	0ea264aff68f7ed684d29045d69242ebb498b40e
大小	8,440 bytes
時間	2010-09-14 07:18:41
作者	lorenzo
Log Message	I cleaned up a bit the code and removed unnecessary tests/calculations carried out on a single tag id.
Content

Export as raw format
import Data.Ord


import Data.List
-- The module imported above contains a lot of functions to manipulate lists
import Numeric.LinearAlgebra

import qualified Data.Set as Set  


main :: IO ()

main = do
   txt <- readFile "couple_interaction_duration_times_extended_1_.dat"

   let dat :: [[Integer]]

       dat = convert txt  -- Now dat is a list where every element is a row of the original data table
   
   let dat_col = transpose dat -- now every entry of data_col is a column of the original data table

   let tag_list = get_tag_list dat_col

   -- putStrLn "tag_list is, "
   -- print tag_list

   -- let tag_1080_pos = get_single_tag_story dat_col 1080

   -- putStrLn "positions of tag 1080, "
   -- print tag_1080_pos

   -- let t_1080= get_sel (dat_col!!1) tag_1080_pos

   -- writeFile "ini_1080.dat" t_1080

  

   -- putStrLn "the times of the begin of the contacts of tag 1080 are, "
   -- print (take 30 t_1080 )

   -- let t_1080_2 = single_tag_unique_contact_times dat_col 1080

   -- putStrLn "or also "
   -- print (take 30 t_1080_2 )


   -- save "1080_hs.dat" t_1080

   -- putStrLn "nub t_1080==nub (sort t_1080) is,"
   -- print (( t_1080)==( (sort t_1080) ))

   
   let unique_time_long = map  (single_tag_unique_contact_times dat_col) tag_list

   save_vector_flat "unique_time_long.dat" unique_time_long

   -- putStrLn "unique_time_long is,  "

   -- print (unique_time_long)

   -- let ctime = count t_1080

   -- putStrLn "the repeated times are, "
   -- print (take 10 ctime )


   -- let n_occ = count_occurrencies t_1080
   -- putStrLn "the occurrencies are, "
   -- print ( n_occ )


   -- let times_multi_contacts = find_repeated_times n_occ t_1080
   
   -- putStrLn "the times at which multiple contacts are established are, "
   -- print ( times_multi_contacts )

   
   -- let duration_times = get_single_tag_contact_durations dat_col tag_1080_pos

   -- putStrLn "contact durations are"
   -- print (take 30 duration_times )
   
   -- let summed_times = sum_corresponding_unique t_1080 duration_times

   -- putStrLn "the summed duration times for multiple contacts starting at the same time  are"
   -- print (take 30 summed_times )

   -- let summed_times_2 = single_tag_unique_contact_durations dat_col 1080
   -- putStrLn "or also"
   -- print (take 30 summed_times_2 )


   let summed_durations_long = map (single_tag_unique_contact_durations dat_col) tag_list  
   
   save_vector_flat "unique_contact_durations_long.dat" summed_durations_long

   let id_list_long = map (stick_tag_id dat_col) tag_list

   save_vector_flat "tag_id_long.dat" id_list_long


   -- It looks like the little script is really OK up to here!


   -- Now sort the times at which a contact begins in increasing order 

   let unique_time_long_ordered = map  (sort) unique_time_long

   save_vector_flat "unique_time_long_ordered.dat" unique_time_long_ordered




   let data_comb = zip unique_time_long  summed_durations_long

   -- let summed_durations_long_ordered =   genericIndex data_comb 2

   -- let ms = argsort_only (unique_time_long!!1)  (summed_durations_long!!1)


   let summed_durations_long_ordered = map  (argsort_tuple .  genericIndex data_comb) [0,1..length(data_comb)-1]

   save_vector_flat "unique_contact_durations_long_ordered.dat" summed_durations_long_ordered


   putStrLn "summed_durations_long_ordered is, "
   print (summed_durations_long_ordered )
   
   -- putStrLn "ms is, "
   -- print (ms)
   

   
   putStrLn "So far so good"



convert x = (map (map read . words) . lines) x

firstColumn xss = head (transpose xss)

nthColumn xss n  = (transpose xss) !! n  -- my first haskell function!

find_ij xss i j = (xss !! i) !! j -- for the case of this function, each entry of a list of lists (i.e. each sublist) is meant to be
                              --a row.

get_tag_list xss = nub $ (xss!!4) ++ (xss!!3)   -- this automatically removes duplicate entries

get_single_tag_story dat tag_id =  (nub ((findIndices (== tag_id) (dat!!3) ) ++ (findIndices (== tag_id) (dat!!4) )))

get_single_tag_contact_durations dat sel = get_sel (dat!!0) sel
                                                -- where sel= sort $ nub ((findIndices (== tag_id) (dat!!3) ) ++ (findIndices (== tag_id) (dat!!4) ))


get_single_tag_contact_durations_tag_id dat tag_id = get_sel (dat!!0) sel
                                                 where sel=  nub ((findIndices (== tag_id) (dat!!3) ) ++ (findIndices (== tag_id) (dat!!4) ))



single_tag_unique_contact_times dat_col tag_id  = nub $ get_sel (dat_col!!1) sel
                         where sel = (nub ((findIndices (== tag_id) (dat_col!!3) ) ++ (findIndices (== tag_id) (dat_col!!4) ))) 

single_tag_non_unique_contact_times dat_col tag_id  =  get_sel (dat_col!!1) sel
                                     where sel= nub ((findIndices (== tag_id) (dat_col!!3) ) ++ (findIndices (== tag_id) (dat_col!!4) )) 


stick_tag_id dat_col tag_id = take n [tag_id,tag_id..]
                              where n= length $ single_tag_unique_contact_times dat_col tag_id


-- The following function is quite crucial as it combines several different functions
-- and given the list of data (in column format) and a tag id, it returns the list of 
-- tag contact durations

single_tag_unique_contact_durations dat_col tag_id  = sum_corresponding_unique bs ms
                    where
                      bs = single_tag_non_unique_contact_times dat_col tag_id
                      ms = get_single_tag_contact_durations_tag_id dat_col tag_id
                               


find_repeated_times occurrences timelist  = get_sel (sort(nub timelist)) sel
                        where sel = findIndices (>1)  occurrences -- NB: to select the repeated elements in timelist I
                                                      -- need to make the timelist unique and to sort it (since 
                                    -- the function count_occurrencies to see how many time each unique entry of a list is repeated,
                                    --  the list gets sorted in increasing order.)



-- Now I want to sum the durations corresponding to repeated times




sum_corresponding_unique bs ms =  [sum [m | (b,m)<- zip bs ms, b == u] | u<- us]
                where us = nub (  bs)--   where ms is the list of the elements
                                  -- I want to sum for the corresponding repeated elements in bs.
                                       -- NB2: this just works, I do not need to sort anything


-- A function returning a list with the elements of ml in position sel
get_sel ml sel = map (genericIndex ml) (sort sel)  -- NB: sel has to be sorted in increasing order or there may be problems
-- and I also need genericIndex as !! does not work with Integer numbers (only with Int)

-- The one below is a more complicated function which probably  do not understand completely hence I do not use it

selection :: Integral a => [a] -> [b] -> [b]
selection sel = pick distances  -- NB: sel has to be increasing!
  where
    distances = zipWith (-) (sort sel) (0:(sort sel))
    pick [] _ = []
    pick (d:ds) xs = case genericDrop d xs of
                       [] -> []
                       ys@(y:_) -> y : pick ds ys




argsort bs ms = unzip . sortBy (comparing fst) $ zip bs ms -- where ms is the list which has to be sorted according to the
                                                 -- increasing order of bs. NB: this returns both bs sorted and ms argsorted accordingly
                                                 -- i.e. it returns a tuple with two elements


argsort_only bs ms = snd $ argsort bs ms -- this returns only the argsorted 


argsort_tuple list = argsort_only  list1 list2 --  as above, but to be applied to a tuple
                      where list1 = fst(list)
                            list2 = snd(list)



cumsum x = scanl1 (+) x -- see http://bit.ly/cagkw2 NB: scanl1 needs a function of two arguments, similar to foldl
                        -- see  scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]


count l = map (\l@(x:xs) -> (x,length l)) . group . sort $ l



count_occurrencies x = map length l
                       where l =  group.sort $ x

unique x = nub x


save filename zs = writeFile filename (show zs)

save_vector filename list = writeFile filename $ unlines (map show list)

save_vector_flat filename list = writeFile filename $ unlines (map show $ concat list)
myprojects-hg-reborn Fork

標籤

Frequently used words (click to add to your profile)

File Info

Content

myprojects-hg-reborn
Fork