# R script to extract figures for scheduled cash earnings from the Monthly Labour Survey's long-term data
# URL: http://tsigeto.info/maikin/maikin-longterm-earnings-sche.r.txt
# 2021/12/31 - 2023/11/26
# Created by TANAKA Sigeto
# for Figures 1 and 2 in https://remcat.hatenadiary.jp/entry/20220102/rev2019
# Result: http://tsigeto.info/maikin/maikin-earningsdiff-2004-2020.txt
# See also: https://remcat.hatenadiary.jp/entry/20220102/rev2019#appendix
# CSV files were downloaded from the Portal Site of Official Statistics of Japan (e-Stat),
# long-term accumulated data of the Monthly Labour Survey: National Survey
# from https://www.e-stat.go.jp/stat-search/files?tstat=000001011791 (2021-12-31).
# To explore the changes due to the recalculation of the data in 2019,
# the script below compares the new and old files, focusing on scheduled cash earnings
# for every month (for all industries, all establishment sizes, all worker types).
# Preparation: Delete the Japanese characters in the first column and in the first row, if they make errors in the data processing by R.
datafile.new <- "hon-maikin-k-jissu.csv"
datafile.old <- "juu-maikin-k-jissu.csv"
read.earnings <- function ( datafile ) {
x0 <- read.csv( datafile )
x1 <- subset(
x0,
substring( x0[ , 3 ] , 1, 2) != "CY" & # Each month
substring( x0[ , 4 ] , 1, 2) == "TL" & # All industries
substring( x0[ , 5 ] , 1, 1) == "T" & # All sizes
x0[ , 6 ] == 0 # Both full- and part-time
)
x2 <- x1[ , c( 2,3,8 ) ] # Extract year, month, contract cash earnings
# yyyymm <- paste ( x2[,1] , x2[,2], sep="" )
yyyymm <- sprintf( "%s%02d" , x2[,1] , as.numeric(x2[,2]) )
r <- as.numeric( x2[, 3] )
names(r) <- yyyymm
r
}
earnings.new <- read.earnings ( datafile.new )
earnings.old <- read.earnings ( datafile.old )
earnings <- data.frame (
yyyymm = as.numeric( names(earnings.old) ),
new = earnings.new[ names(earnings.old) ] ,
old = earnings.old
)
earnings$pct <- 100*( (earnings$new / earnings$old) - 1 )
earnings.diff <- subset( earnings, 0