Abstract:
introduce some useful hash data structures.
Hash
is popular used in Perl programming. Performance of hash remain
stable even storing millions key-values in some cases.
1.
conversion between hash and text file:
#######
sub
hash_to_file{
my($hash_pointer,
$out_file, $sep)=@_;
my
%hash=%$hash_pointer;
$sep="="
unless $sep; #default is = as split
open
my($OUT), ">", $out_file or die "can't open
$out_file\n";
foreach
my$key(sort (keys %hash)){
print
$OUT join("", $key, $sep, $hash{$key}), "\n";
}
close($OUT);
}
#########
sub
file_to_hash{
my
($file, $sep, $key_index, $value_index)=@_;
$sep='='
unless $sep;
$key_index=0
unless $key_index;
$value_index=1
unless $value_index;
my
%hash;
if(-f
$file){
open
my ($INFO), "<", $file or die;
while
(<$INFO>) {#2
chomp($_);
my
@items = split(/$sep/, $_); #split on the tabs
my
$key=$items[$key_index];
my
$value=$items[$value_index];
#print"$key:$value\n";
if
($_=~/$sep/){
if(exists
$hash{$key}){
$hash{$key}
.= ','.$value ;
}
else{
$hash{$key}
=$value ;
}
}
}#2
close($INFO);
}
return(\%hash);
}
2
conversion between nested hash and text file
#############################
#export
the hash with two levels into text file
sub
hash2_to_file{
my($hash2_pointer,
$statistics_file, $sep)=@_;
my
%hash2=%$hash2_pointer;
$sep="\t"
unless $sep;
#get
keys1 and key2
my
@row_names=keys %hash2;
@row_names=
sort @row_names;
my
@col_names=map { keys $hash2{$_} } @row_names;
@col_names=List::MoreUtils::uniq
@col_names;
@col_names=
sort @col_names;
open
my($OUT), ">", $statistics_file or die;
print
$OUT join($sep, 'names', @col_names), "\n";
foreach
my $row_name(@row_names){#2
my
@counting_num;
foreach
my $col_name(@col_names){
my
$value= (exists $hash2{$row_name}->{$col_name}) ?
$hash2{$row_name}->{$col_name} : 0;
push(@counting_num,
$value);
}
print
$OUT join($sep, $row_name, @counting_num), "\n";
}#2
close($OUT);
#
}
#########
##########################
#read
file as the hash with two levels
sub
file_to_hash2{
my($file,
$sep)=@_;
$sep="\t"
unless $sep;
#
my
%hash2;
open
my($OUT), "<", $file or die;
my
$header=<$OUT>;
chomp($header);
#print
"####$header###\n";
my
@col_names=split(/$sep/,$header);
#
while
(<$OUT>){
chomp($_);
my
@items=split(/$sep/, $_);
my
$row_name=$items[0];
for(my
$i=1;$i<@items; $i++){
my
$col_name=$col_names[$i];
$hash2{$row_name}->{$col_name}=$items[$i];
#print
"$row_name:$col_name=$items[$i]\n";
}
}
close($OUT);
return(\%hash2);
}
No comments:
Post a Comment