Thursday, December 17, 2015

Perl: Hash operations


Abstract: introduce some useful hash data structures.


Hash is popular used in Perl programming. Performance of hash remain stable even storing millions key-values in some cases.

1. conversion between hash and text file:
#######
sub hash_to_file{
my($hash_pointer, $out_file, $sep)=@_;
my %hash=%$hash_pointer;
$sep="=" unless $sep; #default is = as split
open my($OUT), ">", $out_file or die "can't open $out_file\n";
foreach my$key(sort (keys %hash)){
print $OUT join("", $key, $sep, $hash{$key}), "\n";
}
close($OUT);
}
#########
sub file_to_hash{
my ($file, $sep, $key_index, $value_index)=@_;
$sep='=' unless $sep;
$key_index=0 unless $key_index;
$value_index=1 unless $value_index;
my %hash;
if(-f $file){
open my ($INFO), "<", $file or die;
while (<$INFO>) {#2
chomp($_);
my @items = split(/$sep/, $_); #split on the tabs
my $key=$items[$key_index];
my $value=$items[$value_index];
#print"$key:$value\n";
if ($_=~/$sep/){
if(exists $hash{$key}){
$hash{$key} .= ','.$value ;
}
else{
$hash{$key} =$value ;
}
}
}#2
close($INFO);
}
return(\%hash);
}

2 conversion between nested hash and text file
#############################
#export the hash with two levels into text file
sub hash2_to_file{
my($hash2_pointer, $statistics_file, $sep)=@_;
my %hash2=%$hash2_pointer;
$sep="\t" unless $sep;
#get keys1 and key2
my @row_names=keys %hash2;
@row_names= sort @row_names;
my @col_names=map { keys $hash2{$_} } @row_names;
@col_names=List::MoreUtils::uniq @col_names;
@col_names= sort @col_names;
open my($OUT), ">", $statistics_file or die;
print $OUT join($sep, 'names', @col_names), "\n";
foreach my $row_name(@row_names){#2
my @counting_num;
foreach my $col_name(@col_names){
my $value= (exists $hash2{$row_name}->{$col_name}) ? $hash2{$row_name}->{$col_name} : 0;
push(@counting_num, $value);
}
print $OUT join($sep, $row_name, @counting_num), "\n";
}#2
close($OUT);
#
}
#########
##########################
#read file as the hash with two levels
sub file_to_hash2{
my($file, $sep)=@_;
$sep="\t" unless $sep;
#
my %hash2;
open my($OUT), "<", $file or die;
my $header=<$OUT>;
chomp($header);
#print "####$header###\n";
my @col_names=split(/$sep/,$header);
#
while (<$OUT>){
chomp($_);
my @items=split(/$sep/, $_);
my $row_name=$items[0];
for(my $i=1;$i<@items; $i++){
my $col_name=$col_names[$i];
$hash2{$row_name}->{$col_name}=$items[$i];
#print "$row_name:$col_name=$items[$i]\n";
}
}
close($OUT);
return(\%hash2);
}

No comments:

Post a Comment