# Extract the number of workers and contractual cash earnings from
# the original tables ("maikin genhyou" in Japanese) of Monthly Labour Survey.
# Excel files are available from
# https://www.e-stat.go.jp/stat-search/files?tstat=000001011791
# 2021/08/15 - 2021/09/06
# Created by TANAKA Sigeto
# URL: http://tsigeto.info/maikin/maikin-monthly.pl.txt
# Option:
# -all (without restriction of file name pattern)
# -na (print lines with '-' or '*' to STDERR)
%Option = ( all => 0 , na => 0) ;
$Option{all}= 1 if grep { s/^\-all$//} @ARGV;
$Option{na} = 1 if grep { s/^\-na$// } @ARGV;
@ARGV = grep( $_ ne '', @ARGV );
$\ = "\n" ;
$, = $" = "\t" ;
%Class = ( # Size (workers in an establishment)
'T' => 0, # All sizes
1 =>1000, # 1000 and over
3 =>500 , # 500-999
5 =>100 , # 100-499
7 => 30 , # 30- 99
9 => 5 , # 5- 29
) ;
# Print the header (yyyymm for survey year-month; e0 and e1 for N of workers)
print qw( file line yyyymm size e0 e1 wage industry ) ;
FILE: foreach(@ARGV) {
open (FILE, $_ ) || die("Cannot open file $_\n" );
my $Filename = $_;
$Filename =~ s/\.txt$// ;
my $Ym = '' ;
if( $Filename =~ /hon\-mks(\d\d\d\d\d\d)/ ) {
$Ym = $1 ;
}
elsif( $Filename =~ /mks190_(\d\d\d\d\d\d)/ ) {
$Ym = $1 ;
}
elsif( $Filename =~ /(sai)?(\d\d)(\d\d)mks/ ) {
my $ad = 1988 + $2 ;
$Ym = $ad . $3 ;
}
# Filename pattern is restricted unless the option '-all' was specified
next if '' eq $Ym && ! $Option{all} ;
my $Line=0;
my $Ind='';
while(){
++$Line;
my @field = split /\t/;
foreach(@field){
s/^[\"\s]*//;
s/[\"\s]*$//;
s/(\d),(\d)/$1$2/g ;
}
# Industry
if( ( $field[0] eq 'TL' || $field[0] =~ /^[C-R]/ ) && $field[1] eq '' ) {
$Ind = $field[0];
next;
}
# Establishment size
my $class = $Class{ $field[0] };
next if $class eq '' ;
next if( $Done{$Filename}{$Ind}{$class} ) ;
# Number of workers
my( $e0, $e1 ) = @field[3,6];
# Wage
my($wage) = $field[13];
# Missing values
my $na=0;
( $e0 =~ s/^[\-\*]$// ) && ++$na ;
( $e1 =~ s/^[\-\*]$// ) && ++$na ;
($wage=~ s/^[\-\*]$// ) && ++$na ;
if( $Option{na} && $na ){
print STDERR $Filename, $Line , $Ym, $class, $e0 , $e1, $wage, $Ind, '||' . $_ ;
}
print $Filename, $Line , $Ym, $class, $e0 , $e1, $wage, $Ind;
++ $Done{$Filename}{$Ind}{$class};
}
}