#!/usr/local/bin/jperl ######################################################## # SSM 95 職歴データ分析用プログラム # 1996.09.04 ######################################################## $start = time; ######################################################## # Configuration ######################################################## $BINOUT = 'SSM95A.BIN' ; %MISS = ( 'DKNA' , -32767, 'OUT' , -32766, 'BLANK' , -32765, 'GHOST' , -32764, 'YET' , -32763, ) ; $missmax=-32763; print "\n[miss]\n" ; foreach( keys %MISS ){ print "$_\t$MISS{$_}\n" ; } ######################################################## # DATA 行を読み出す。 ######################################################## print "\n[var]\n" ; $nvar = $pointer = $ncard = $i = $[ ; while( ){ # 空行は無視 next if( /^\s*$/ ) ; chop; if( /^\s*(\S+)\s+([^-]+)-?(.*)$/ ) { ($[-1+$2 == $pointer) || die( "Pointer $pointer unmach: $_" ) ; $name[$nvar++] = $1; $pointer += ( $width = ('' eq $3) ? 1 : 1+$3-$2 ) ; $template[$ncard] .= "a$width" ; print "$1\t$ncard\t$2-$3\n"; if ( $width==1 ){ push( @w1 , $1 ); } elsif( $width==2 ){ push( @w2 , $1 ); } elsif( $width==3 ){ push( @w3 , $1 ); } elsif( $width==4 ){ push( @w4 , $1 ); } else { die("Width over 4: $_"); } $nvar[$ncard]++; } elsif( m#/# ) { $pointer = $[ ; ++$ncard; } else { die( "Wrong Statement: $_" ) ; } } for( $i=$[ ; $i<=$#name ; ++$i ){ $V{$name[$i]} = $i ; } print "\n[input]\n" ; print "nvar\t$nvar\n" ; print "ncard\t$ncard\n" ; for( $c=$[ ; $c<$ncard; ++$c ){ print "nvar of Card $c\t$nvar[$c]\n" ; } for( $i=$[; $i<=$#template; ++$i ){ print "template$i\t$template[$i]\n" ; } print "w1\t@w1\n" ; print "w2\t@w2\n" ; print "w3\t@w3\n" ; print "w4\t@w4\n" ; ######################################################## # 素データの処理 ######################################################## open( BINOUT, ">$BINOUT" ) || die( "Cannot open file $BINOUT." ); binmode( BINOUT ); $N=0; MAINLOOP: while(){ ++$N; print STDERR "$N\r" ; $#V = $[-1; for( $l=$[; $l<$ncard; ++$l ){ ( $card=<> ) || die( "Record $N is ended at CARD $l." ) ; #unless( # $l==$[ # last MAINLOOP; #} chop( $card ); push( @V, unpack( $template[$l] , $card ) ) ; } # Check! foreach( @V ){ /\d +$/ && die( "Value end with space ($N)." ) ; } for( $i=1; defined( $V{"IDT$i"} ) ; ++$i ) { ( $V[$V{'IDT1'}] eq $V[$V{"IDT$i"}] ) || die( qq|Wrong IDT$i ($N:$i)| ) ; ( $V[$V{'IDP1'}] eq $V[$V{"IDP$i"}] ) || die( qq|Wrong IDP$i ($N:$i)| ) ; ( 1 == $V[$V{"KIND$i"}] ) || die( "KIND$i is not 1: line $. ($N:$i)" ) ; ( ' ' eq $V[$V{"BLANK$i"}] ) || die( "BLANK$i is not BLANK: line $. ($N:$i)" ) ; ( $i == $V[$V{"CARD$i"}] ) || die( "Wrong: CARD$i has wrong value: line $. ($N:$i)" ) ; } ( ' ' eq $V[$V{"null"}] ) || die( "null is not BLANK: line $. ($N)" ) ; foreach( @w1 ){ if( $V[$V{$_}] == 9 ) { $V[$V{$_}]=$MISS{'DKNA'}; } elsif( $V[$V{$_}] == 8 ) { $V[$V{$_}]=$MISS{'OUT'}; } elsif( $V[$V{$_}] eq ' '){ $V[$V{$_}]=$MISS{'BLANK'}; } } foreach( @w2 ){ if( $V[$V{$_}] == 99 ) { $V[$V{$_}]=$MISS{'DKNA'}; } elsif( $V[$V{$_}] == 98 ) { $V[$V{$_}]=$MISS{'OUT'}; } elsif( $V[$V{$_}] eq ' '){ $V[$V{$_}]=$MISS{'BLANK'}; } } foreach( @w3 ){ if( $V[$V{$_}] == 999 ) { $V[$V{$_}]=$MISS{'DKNA'}; } elsif( $V[$V{$_}] == 998 ) { $V[$V{$_}]=$MISS{'OUT'}; } elsif( $V[$V{$_}] eq ' '){ $V[$V{$_}]=$MISS{'BLANK'}; } } foreach( @w4 ){ if( $V[$V{$_}] == 9999 ) { $V[$V{$_}]=$MISS{'DKNA'}; } elsif( $V[$V{$_}] == 9998 ) { $V[$V{$_}]=$MISS{'OUT'}; } elsif( $V[$V{$_}] eq ' '){ $V[$V{$_}]=$MISS{'BLANK'}; } } for( $i=1; defined( $V{"IDT$i"} ) ; ++$i ) { ( $MISS{'BLANK'} == $V[$V{"BLANK$i"}] ) || die( "BLANK$i is not BLANK: line $. ($N:$i)" ) ; } ( $MISS{'BLANK'} == $V[$V{"null"}] ) || die( "null is not BLANK: line $. ($N)" ) ; print BINOUT pack( 's*' , @V ) ; last if eof(); } close( BINOUT ); print "\n[output]\n" ; print "template\ts*" ; print "\nrecordsize\t" , 2*@V ; print "\nN\t" , $N ; print "\nnvar\t" , $nvar ; print "\nfilename\t" , $BINOUT; print "\nfilesize\t" , $filesize = 2*$N*@V; print "\n\nsec\t" , time()-$start; $nvar==@V || die( "Nvar ($nvar) and recordsize/2 (@V) missmatch." ) ; $realfilesize = (-s $BINOUT); #( $filesize == $realfilesize) || die( "Filesize of $BINOUT ($filesize:$realfilesize) missmatch." ) ; __END__ # ここから後は で使う IDT1 1-3 IDP1 4-5 KIND1 6 BLANK1 7 CARD1 8 .....以下略