#================================================================= # Purpose: Generate consistency table for all series pairs specified in an # input map table. Series information is read from at least one # "TSA downloaded" group summary file and up to four can be specified # on input. It's up to the user to ensure that the input group summary # files are consistent with the group content of the input map table. # If not, a warning message is produced. The consistency table is # written to a file in HTML format in the specified output directory # and can be optionally displayed in a browser by setting a flag on # the command-line. Repeated runs of this script will result # in the output HTML being over-written. # # Required: It is assumed that you have a version of Perl installed from # the ABS software portal. # # Examples: For all command line arguments and options, see the tutorial # and examples by executing this script at the DOS command prompt # with no input arguments, e.g. simply type "consistency.pl". # #================================================================= # Force all variables to be declared. use strict; # Import module for command-line inputs. use Getopt::Std; my (%options, $iam, $version, $lastmod, $who, $lineh, $infile, $outdir, $groupsumfile1, $groupsumfile2, $groupsumfile3, $groupsumfile4, $outhtmlfile, $spawnviewer, $args, @maplines, @gslines1, @gslines2, @gslines3, @gslines4, $line, @fields, $i, @cols, @series1, @series2, $pair, @gpname, $k, @garray1, @garray2, $skipcount, @rlines, $finalpaircount, @period1, @period2, $j1, $j2, @tma1, @tma2, @sma1, @sma2, @method1, @method2, @model1, @model2, @td1, @td2, $mvtd1, $mvtd2, $wts1, $wts2, $n, @tb1, @tb2, $value, $tbcnt, @lgex1, @lgex2, $lgexcnt, $sbcnt, $nn, @sb1, @sb2, @ep1, @ep2, @numdata1, @numdata2, @data1, @data2, @datatype1, @datatype2, @ssperiod1, @ssyear1, @seperiod1, @seyear1, @ssperiod2, @ssyear2, @seperiod2, @seyear2, @matcheds1, @matcheds2, @moves1, @moves2, @absmoves1, @absmoves2, @timestring, $countstring, $diagnosticssumm, $datatypeflag, $templine, @pubtma1, @pubtma2, @pubewp1, @pubewp2, $Rfailureflag, $identicalflag, $grpname1, $grpname2, $tempseries, $test1result, $test1title, $numsuccessdatatypematch, $badparmatch, $epcnt, @fd1, @fd2, $epsplit, $mvttimeplotfilehtml, $mvtvsmvtplotfilehtml, $ccfplotfilehtml, $acfplotfilehtml, $acfcointplotfilehtml, @asperiod1, @asyear1, @aeperiod1, @aeyear1, @asperiod2, @asyear2, @aeperiod2, @aeyear2, @usperiod1, @usyear1, @ueperiod1, @ueyear1, @usperiod2, @usyear2, @ueperiod2, @ueyear2, @ao1, @ao2, $aocnt, @valar, @arima1, @arima2, $arcnt, @F2B1, @F2B2, @F2I1, @F2I2, @splitmeas, @x11value, $diagouthtmlfile, $addpriors, $computediags, $failedtests, $grp1, $grp2, $s1, $s2, $found1, $found2); #----------------------------------------------------------------- # Specify command-line inputs. %options = (); getopts("i:o:a:b:c:d:h:s:p:t:",\%options); # Print software description. $iam = 'consistency.pl'; $version = '5.3'; $lastmod = 'last modified 13-02-2007'; $who = 'Frank Masci'; if (!%options) { print "_________________________________________________________________________\n"; print "$iam, Version $version\: $lastmod by $who\n"; while (defined ($lineh = )) { if ($lineh =~ /RRR/) { next; } $lineh =~ s/^#\s(.*)/$1/; if (! $lineh) { $lineh = " \n"; } print "$lineh"; } print "_________________________________________________________________________\n"; exit 0; } #----------------------------------------------------------------- # Define variables from command-line inputs. $infile = $options{'i'} || die "\n*** Input map table file " . "not given (-i ); quitting...\n"; $outdir = $options{'o'} || die "\n*** Output directory not given " . "(-o ); quitting...\n"; $groupsumfile1 = $options{'a'} || 'null'; $groupsumfile2 = $options{'b'} || 'null'; $groupsumfile3 = $options{'c'} || 'null'; $groupsumfile4 = $options{'d'} || 'null'; $outhtmlfile = $options{'h'} || die "\n*** Output HTML filename not given " . "(-h ); quitting...\n"; $spawnviewer = $options{'s'} || 0; $addpriors = $options{'p'} || 0; $computediags = $options{'t'} || 0; #----------------------------------------------------------------- # Check input validity, file existence. if( !(-e $infile) ) { die "\n*** Input map table file doesn't exist, " . "please check; quitting...\n"; } if( !($outdir eq ".") && !($outdir =~ m/:/) ) { die "\n*** Format of specified output directory not allowed; can " . "only be \".\" or full directory path; quitting...\n"; } if( !(-e $outdir) ) { print "\n*** Specified output directory doesn't exist; I'm creating " . "it for you...\n"; mkdir("$outdir",0777) || die "***\nCannot mkdir $outdir; quitting...\n"; } if( ($groupsumfile1 eq 'null') && ($groupsumfile2 eq 'null') && ($groupsumfile3 eq 'null') && ($groupsumfile4 eq 'null') ) { die "\n*** No group summary files (from TSA-download) were specified " . "on input; you must specify at least one of these; quitting...\n"; } if( ($groupsumfile1 ne 'null') && (!(-e $groupsumfile1)) ) { die "\n*** Group summary file \#1 was specified but it doesn't exist, " . "please check; quitting...\n"; } if( ($groupsumfile2 ne 'null') && (!(-e $groupsumfile2)) ) { die "\n*** Group summary file \#2 was specified but it doesn't exist, " . "please check; quitting...\n"; } if( ($groupsumfile3 ne 'null') && (!(-e $groupsumfile3)) ) { die "\n*** Group summary file \#3 was specified but it doesn't exist, " . "please check; quitting...\n"; } if( ($groupsumfile4 ne 'null') && (!(-e $groupsumfile4)) ) { die "\n*** Group summary file \#4 was specified but it doesn't exist, " . "please check; quitting...\n"; } # Strip any existing path from $outhtmlfile and prepend new path: $outdir # Also check for .html extension and append one if doesn't exist. @fields = split(/\\/, $outhtmlfile); $outhtmlfile = $fields[$#fields]; if( !($outhtmlfile =~ /\.html$/) ) { print "\n*** Specified output HTML file did not have a \".html\" " . "extension; I'm adding one for you...\n"; $outhtmlfile = $outhtmlfile . '.html'; } #Windows: $outhtmlfile = $outdir . '\\\\' . $outhtmlfile; #Unix: #$outhtmlfile = $outdir . '/' . $outhtmlfile; #----------------------------------------------------------------- # Storing map and group summary file information. if( !open(MAP, "<$infile") ) { die "\n*** Couldn't open $infile file for reading, quitting...\n"; } else { @maplines = ; $maplines[0] =~ s/^\s+|\s+$//g; $maplines[(($#maplines)/2)] =~ s/^\s+|\s+$//g; $maplines[$#maplines] =~ s/^\s+|\s+$//g; if( !$maplines[0] && !$maplines[$#maplines] && !$maplines[(($#maplines)/2)] ) { die "\n*** Map file \"$infile\" contains no lines; quitting...\n"; } } close(MAP); # initialise group names for each input group summary file. for($k = 0; $k < 4; $k++) { $gpname[$k] = 0; } if( $groupsumfile1 ne 'null' ) { if( !open(IN1, "<$groupsumfile1") ) { die "\n*** Couldn't open $groupsumfile1 file for reading, quitting...\n"; } else { @gslines1 = ; $gslines1[0] =~ s/^\s+|\s+$//g; if(!$gslines1[0]) { print "\n*** Warning: $groupsumfile1 contains no lines; continuing...\n"; } # Store official group name from 3rd element in this array. ($gpname[0] = $gslines1[2]) =~ s/GROUP-NAME|\s+//g; } close(IN1); } if( $groupsumfile2 ne 'null' ) { if( !open(IN2, "<$groupsumfile2") ) { die "\n*** Couldn't open $groupsumfile2 file for reading, quitting...\n"; } else { @gslines2 = ; $gslines2[0] =~ s/^\s+|\s+$//g; if(!$gslines2[0]) { print "\n*** Warning: $groupsumfile2 contains no lines; continuing...\n"; } # Store official group name from 3rd element in this array. ($gpname[1] = $gslines2[2]) =~ s/GROUP-NAME|\s+//g; } close(IN2); } if( $groupsumfile3 ne 'null' ) { if( !open(IN3, "<$groupsumfile3") ) { die "\n*** Couldn't open $groupsumfile3 file for reading, quitting...\n"; } else { @gslines3 = ; $gslines3[0] =~ s/^\s+|\s+$//g; if(!$gslines3[0]) { print "\n*** Warning: $groupsumfile3 contains no lines; continuing...\n"; } # Store official group name from 3rd element in this array. ($gpname[2] = $gslines3[2]) =~ s/GROUP-NAME|\s+//g; } close(IN3); } if( $groupsumfile4 ne 'null' ) { if( !open(IN4, "<$groupsumfile4") ) { die "\n*** Couldn't open $groupsumfile4 file for reading, quitting...\n"; } else { @gslines4 = ; $gslines4[0] =~ s/^\s+|\s+$//g; if(!$gslines4[0]) { print "\n*** Warning: $groupsumfile4 contains no lines; continuing...\n"; } # Store official group name from 3rd element in this array. ($gpname[3] = $gslines4[2]) =~ s/GROUP-NAME|\s+//g; } close(IN4); } #----------------------------------------------------------------- # For each series pair in map table, extract and store prior factors, # processing information and associated data. $i = 0; $skipcount = 0; foreach $pair (@maplines) { if( !($pair =~ m/[a-zA-Z0-9_]/) ) { next; } else { chomp($pair); } @cols = split(/,/, $pair); if( $#cols != 5 ) { die "\n*** Error: row in input mapfile does not have six comma delimited " . "fields, e.g:\n \"ownergrp1,grp1,series1,ownergrp2,grp2,series2\"; " . "quitting...\n\n"; } else { ($grp1 = $cols[1]) =~ s/^\s+|\s+$//g; ($grp2 = $cols[4]) =~ s/^\s+|\s+$//g; ($s1 = $cols[2]) =~ s/^\s+|\s+$//g; ($s2 = $cols[5]) =~ s/^\s+|\s+$//g; $series1[$i] = $grp1 . '.' . $s1; # format: "group1.series_name1" $series2[$i] = $grp2 . '.' . $s2; # format: "group2.series_name2" } # First assign appropriate group summary file to each "group.series_name": $found1 = 0; @garray1 =(); if( $gpname[0] && ($series1[$i] =~ m/$gpname[0]/) ) { @garray1 = @gslines1; $grpname1 = $gpname[0]; $found1 = 1; } if($gpname[1] && ($series1[$i] =~ m/$gpname[1]/)) { @garray1 = @gslines2; $grpname1 = $gpname[1]; $found1 = 1; } if($gpname[2] && ($series1[$i] =~ m/$gpname[2]/)) { @garray1 = @gslines3; $grpname1 = $gpname[2]; $found1 = 1; } if($gpname[3] && ($series1[$i] =~ m/$gpname[3]/)) { @garray1 = @gslines4; $grpname1 = $gpname[3]; $found1 = 1; } if( !$found1 ) { $skipcount++; next; } $found2 = 0; @garray2 =(); if( $gpname[0] && ($series2[$i] =~ m/$gpname[0]/) ) { @garray2 = @gslines1; $grpname2 = $gpname[0]; $found2 = 1; } if($gpname[1] && ($series2[$i] =~ m/$gpname[1]/)) { @garray2 = @gslines2; $grpname2 = $gpname[1]; $found2 = 1; } if($gpname[2] && ($series2[$i] =~ m/$gpname[2]/)) { @garray2 = @gslines3; $grpname2 = $gpname[2]; $found2 = 1; } if($gpname[3] && ($series2[$i] =~ m/$gpname[3]/)) { @garray2 = @gslines4; $grpname2 = $gpname[3]; $found2 = 1; } if( !$found2 ) { $skipcount++; next; } # For given @garray1 and @garray2, store specific information for series1, series2. $j1 = 0; foreach $line (@garray1) { chomp($line); ($templine = $line) =~ s/SERIES-NAME//; $templine =~ s/^\s+|\s+$//g; ($tempseries = $series1[$i]) =~ s/$grpname1//; $tempseries =~ s/\.//; if( ($line =~ m/SERIES-NAME/) && ($tempseries eq $templine) ) { ($period1[$i] = $garray1[$j1 + 1]) =~ s/FREQUENCY|\s+//g; if( $period1[$i] eq "QUARTERLY" ) {$period1[$i] = "QUART";} if( $period1[$i] eq "MONTHLY" ) {$period1[$i] = "MONTH";} ($ssperiod1[$i] = $garray1[$j1 + 2]) =~ s/SSPERIOD|\s+//g; ($ssyear1[$i] = $garray1[$j1 + 3]) =~ s/SSYEAR|\s+//g; ($seperiod1[$i] = $garray1[$j1 + 4]) =~ s/SEPERIOD|\s+//g; ($seyear1[$i] = $garray1[$j1 + 5]) =~ s/SEYEAR|\s+//g; ($asperiod1[$i] = $garray1[$j1 + 6]) =~ s/ASPERIOD|\s+//g; ($asyear1[$i] = $garray1[$j1 + 7]) =~ s/ASYEAR|\s+//g; ($aeperiod1[$i] = $garray1[$j1 + 8]) =~ s/AEPERIOD|\s+//g; ($aeyear1[$i] = $garray1[$j1 + 9]) =~ s/AEYEAR|\s+//g; ($usperiod1[$i] = $garray1[$j1 + 10]) =~ s/USPERIOD|\s+//g; ($usyear1[$i] = $garray1[$j1 + 11]) =~ s/USYEAR|\s+//g; ($ueperiod1[$i] = $garray1[$j1 + 12]) =~ s/UEPERIOD|\s+//g; ($ueyear1[$i] = $garray1[$j1 + 13]) =~ s/UEYEAR|\s+//g; if( !($garray1[$j1 + 6] =~ m/ASPERIOD/) && !($garray1[$j1 + 10] =~ m/USPERIOD/) ) { die "\n\n*** Error: your group summary files were not created " . "using the test version of TSA-download (version 2.6 or above); " . "quitting...\n\n" } ($numdata1[$i] = $garray1[$j1 + 14]) =~ s/DATA-ELEMENTS|\s+//g; ($method1[$i] = $garray1[$j1 + 29]) =~ s/IS_ADJUSTED:|\s+//g; if( $method1[$i] == 1 ) { $method1[$i] = 'I'; } else { $method1[$i] = 'D'; } ($model1[$i] = $garray1[$j1 + 19]) =~ s/MULADD|\s+//g; if( $model1[$i] == 0 ) { $model1[$i] = 'M'; } elsif( $model1[$i] == 1 ) { $model1[$i] = 'A'; } else { $model1[$i] = 'P'; } ($tma1[$i] = $garray1[$j1 + 15]) =~ s/TMA|\s+//g; ($sma1[$i] = $garray1[$j1 + 16]) =~ s/SMA|\s+//g; $sma1[$i] = '3x' . $sma1[$i]; ($pubtma1[$i] = $garray1[$j1 + 17]) =~ s/PUB_TMA|\s+//g; ($pubewp1[$i] = $garray1[$j1 + 18]) =~ s/PUB_EWP|\s+//g; $pubewp1[$i] =~ s/000$//; ($mvtd1 = $garray1[$j1 + 24]) =~ s/MVTD|\s+//g; ($wts1 = $garray1[$j1 + 27]) =~ s/DWTS_USED|\s+//g; if( ($mvtd1==1) || ($wts1==1) ) { $td1[$i] = 'Yes'; } else { $td1[$i] = 'No'; } # Search for and store trend breaks if exist: $tb1[$i] = ""; $value = ""; $tbcnt = 0; for($n=$j1+30; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/TBRK/) { ($value = $garray1[$n]) =~ s/TBRK/
\*/g; @valar = split(/-/,$value); $value = $valar[0]; $tb1[$i] = $tb1[$i] . $value; $tbcnt++; } else { last; } } if($tb1[$i] eq "") { $tb1[$i] = "none"; } # Search for and store large extremes if exist: $lgex1[$i] = ""; $value = ""; $lgexcnt = 0; for($n=$j1+30+$tbcnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/LGEX/) { ($value = $garray1[$n]) =~ s/LGEX/
\*/g; @valar = split(/-/,$value); $value = $valar[0]; $lgex1[$i] = $lgex1[$i] . $value; $lgexcnt++; } else { last; } } if($lgex1[$i] eq "") { $lgex1[$i] = "none"; } # Search for and store Additive Outliers (AOs) if exist: $ao1[$i] = ""; $value = ""; $aocnt = 0; for($n=$j1+30+$tbcnt+$lgexcnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/AO/) { ($value = $garray1[$n]) =~ s/AO/
\*/g; @valar = split(/-/,$value); $value = $valar[0]; $ao1[$i] = $ao1[$i] . $value; $aocnt++; } else { last; } } if($ao1[$i] eq "") { $ao1[$i] = "none"; } # Search for and store seasonal breaks if exist: $sb1[$i] = ""; $value = ""; $sbcnt = 0; $countstring = 0; for($n=$j1+30+$tbcnt+$lgexcnt+$aocnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/START_SBRK/) { $countstring = $countstring + 2; STARTSBRK: for($nn=$n+1; $nn<=$#garray1; $nn++) { if( !($garray1[$nn] =~ m/END_SBRK/) ) { $value = "
\* " . $garray1[$nn]; @valar = split(/-/,$value); $value = $valar[0]; $sb1[$i] = $sb1[$i] . $value; $sbcnt++; } elsif( ($garray1[$nn] =~ m/END_SBRK/) && ($garray1[$nn+1] =~ m/START_SBRK/) ) { $countstring = $countstring + 2; $n = $nn+1; goto STARTSBRK; } else { last; } } } else { last; } } if($sb1[$i] eq "") { $sb1[$i] = "none"; } # Following accounts for the START_SBRK and # END_SBRK additional keywords from above. if($sbcnt) { $sbcnt = $sbcnt + $countstring; } # Check to see if "START_EASTPROX" keyword exists # for EP effect. if( $garray1[$j1+30+$tbcnt+$lgexcnt+$aocnt+$sbcnt] =~ m/START_EASTPROX/ ) { $ep1[$i] = 'Yes'; $epcnt = 8; # if split year exists, there are more lines to count: ($epsplit = $garray1[$j1+30+$tbcnt+$lgexcnt+$aocnt+$sbcnt+1]) =~ s/SPLIT_YEAR|\s+//g; if($epsplit != 0) { $epcnt = 13; } } else { $ep1[$i] = 'No'; $epcnt = 0; } # Check to see if "START_FDPROX" keyword exists # for Father's Day proximity effect. if( $garray1[$j1+30+$tbcnt+$lgexcnt+$aocnt+$sbcnt+$epcnt] =~ m/START_FDPROX/ ) { $fd1[$i] = 'Yes'; } else { $fd1[$i] = 'No'; } # Store ARIMA model orders if exist. for($n=$j1+30+$tbcnt+$lgexcnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/ARIMA_MODEL_/) { $arcnt = 0; for($nn=$n; $nn<=$n+5; $nn++) { ($arima1[$i][$arcnt] = $garray1[$nn]) =~ s/\D|\s+//g; $arcnt++; } last; } } if( !(defined $arima1[$i][0]) ) { $arima1[$i][0] = "none"; } # Store F2B summary measures if exist. for($n=$j1+30+$tbcnt+$lgexcnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/F2B_/) { $arcnt = 0; for($nn=$n; $nn<=$n+6; $nn++) { ($F2B1[$i][$arcnt] = $garray1[$nn]) =~ s/F2B_//g; $F2B1[$i][$arcnt] =~ s/\s+/=/; @splitmeas = split(/=/,$F2B1[$i][$arcnt]); $x11value[$arcnt] = $splitmeas[1]; $arcnt++; } last; } } if( ($x11value[0]==0) && ($x11value[1]==0) && ($x11value[2]==0) && ($x11value[3]==0) && ($x11value[4]==0) && ($x11value[5]==0) ) { $F2B1[$i][0] = "none"; } # Store F2I summary measures if exist. for($n=$j1+30+$tbcnt+$lgexcnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/F2I_/) { $arcnt = 0; for($nn=$n; $nn<=$n+7; $nn++) { ($F2I1[$i][$arcnt] = $garray1[$nn]) =~ s/F2I_//g; $F2I1[$i][$arcnt] =~ s/\s+/=/; @splitmeas = split(/=/,$F2I1[$i][$arcnt]); $x11value[$arcnt] = $splitmeas[1]; $arcnt++; } last; } } if( ($x11value[0]==0) && ($x11value[1]==0) && ($x11value[2]==0) && ($x11value[3]==0) && ($x11value[4]==0) && ($x11value[6]==0) ) { $F2I1[$i][0] = "none"; } # Store series data: can be either "DATA ORIGINAL", # "DATA ADJUSTED", or "DATA TREND": # $datatype1[$i] = 0, 1, 2 respectively. # Assign dataspans too for SA and Trend TSA-downloads. $data1[$i] = ""; $value = ""; for($n=$j1+30+$tbcnt+$lgexcnt; $n<=$#garray1; $n++) { if($garray1[$n] =~ m/DATA ORIGINAL/) { $datatype1[$i] = 0; for($nn=$n+1; $nn<=$n+$numdata1[$i]; $nn++) { $value = $garray1[$nn] . "\_"; $data1[$i] = $data1[$i] . $value; } last; } elsif($garray1[$n] =~ m/DATA ADJUSTED/) { # Redefine series span = to update span if non-zero, # else = analysis span if non-zero, else abort: if( $usperiod1[$i] ) { $ssperiod1[$i] = $usperiod1[$i]; $ssyear1[$i] = $usyear1[$i]; $seperiod1[$i] = $ueperiod1[$i]; $seyear1[$i] = $ueyear1[$i]; } elsif( ($usperiod1[$i]==0) && $asperiod1[$i] ) { $ssperiod1[$i] = $asperiod1[$i]; $ssyear1[$i] = $asyear1[$i]; $seperiod1[$i] = $aeperiod1[$i]; $seyear1[$i] = $aeyear1[$i]; } else { die "\n*** Analysis and Update spans for series " . "\"$series1[$i]\" both zero; cannot continue; " . "quitting...\n"; } $datatype1[$i] = 1; for($nn=$n+1; $nn<=$n+$numdata1[$i]; $nn++) { $value = $garray1[$nn] . "\_"; $data1[$i] = $data1[$i] . $value; } last; } elsif($garray1[$n] =~ m/DATA TREND/) { # Redefine series span = to update span if non-zero, # else = analysis span if non-zero, else abort: if( $usperiod1[$i] ) { $ssperiod1[$i] = $usperiod1[$i]; $ssyear1[$i] = $usyear1[$i]; $seperiod1[$i] = $ueperiod1[$i]; $seyear1[$i] = $ueyear1[$i]; } elsif( ($usperiod1[$i]==0) && $asperiod1[$i] ) { $ssperiod1[$i] = $asperiod1[$i]; $ssyear1[$i] = $asyear1[$i]; $seperiod1[$i] = $aeperiod1[$i]; $seyear1[$i] = $aeyear1[$i]; } else { die "\n*** Analysis and Update spans for series " . "\"$series1[$i]\" both zero; cannot continue; " . "quitting...\n"; } $datatype1[$i] = 2; for($nn=$n+1; $nn<=$n+$numdata1[$i]; $nn++) { $value = $garray1[$nn] . "\_"; $data1[$i] = $data1[$i] . $value; } last; } } last; } $j1++; } $j2 = 0; foreach $line (@garray2) { chomp($line); ($templine = $line) =~ s/SERIES-NAME//; $templine =~ s/^\s+|\s+$//g; ($tempseries = $series2[$i]) =~ s/$grpname2//; $tempseries =~ s/\.//; if( ($line =~ m/SERIES-NAME/) && ($tempseries eq $templine) ) { ($period2[$i] = $garray2[$j2 + 1]) =~ s/FREQUENCY|\s+//g; if( $period2[$i] eq "QUARTERLY" ) {$period2[$i] = "QUART";} if( $period2[$i] eq "MONTHLY" ) {$period2[$i] = "MONTH";} ($ssperiod2[$i] = $garray2[$j2 + 2]) =~ s/SSPERIOD|\s+//g; ($ssyear2[$i] = $garray2[$j2 + 3]) =~ s/SSYEAR|\s+//g; ($seperiod2[$i] = $garray2[$j2 + 4]) =~ s/SEPERIOD|\s+//g; ($seyear2[$i] = $garray2[$j2 + 5]) =~ s/SEYEAR|\s+//g; ($asperiod2[$i] = $garray2[$j2 + 6]) =~ s/ASPERIOD|\s+//g; ($asyear2[$i] = $garray2[$j2 + 7]) =~ s/ASYEAR|\s+//g; ($aeperiod2[$i] = $garray2[$j2 + 8]) =~ s/AEPERIOD|\s+//g; ($aeyear2[$i] = $garray2[$j2 + 9]) =~ s/AEYEAR|\s+//g; ($usperiod2[$i] = $garray2[$j2 + 10]) =~ s/USPERIOD|\s+//g; ($usyear2[$i] = $garray2[$j2 + 11]) =~ s/USYEAR|\s+//g; ($ueperiod2[$i] = $garray2[$j2 + 12]) =~ s/UEPERIOD|\s+//g; ($ueyear2[$i] = $garray2[$j2 + 13]) =~ s/UEYEAR|\s+//g; if( !($garray2[$j2 + 6] =~ m/ASPERIOD/) && !($garray2[$j2 + 10] =~ m/USPERIOD/) ) { die "\n\n*** Error: your group summary files were not created " . "using the test version of TSA-download (version 2.6 or above); " . "quitting...\n\n" } ($numdata2[$i] = $garray2[$j2 + 14]) =~ s/DATA-ELEMENTS|\s+//g; ($method2[$i] = $garray2[$j2 + 29]) =~ s/IS_ADJUSTED:|\s+//g; if( $method2[$i] == 1 ) { $method2[$i] = 'I'; } else { $method2[$i] = 'D'; } ($model2[$i] = $garray2[$j2 + 19]) =~ s/MULADD|\s+//g; if( $model2[$i] == 0 ) { $model2[$i] = 'M'; } elsif( $model2[$i] == 1 ) { $model2[$i] = 'A'; } else { $model2[$i] = 'P'; } ($tma2[$i] = $garray2[$j2 + 15]) =~ s/TMA|\s+//g; ($sma2[$i] = $garray2[$j2 + 16]) =~ s/SMA|\s+//g; $sma2[$i] = '3x' . $sma2[$i]; ($pubtma2[$i] = $garray2[$j2 + 17]) =~ s/PUB_TMA|\s+//g; ($pubewp2[$i] = $garray2[$j2 + 18]) =~ s/PUB_EWP|\s+//g; $pubewp2[$i] =~ s/000$//; ($mvtd2 = $garray2[$j2 + 24]) =~ s/MVTD|\s+//g; ($wts2 = $garray2[$j2 + 27]) =~ s/DWTS_USED|\s+//g; if( ($mvtd2==1) || ($wts2==1) ) { $td2[$i] = 'Yes'; } else { $td2[$i] = 'No'; } # Search for and store trend breaks if exist: $tb2[$i] = ""; $value = ""; $tbcnt = 0; for($n=$j2+30; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/TBRK/) { ($value = $garray2[$n]) =~ s/TBRK/
\*/g; @valar = split(/-/,$value); $value = $valar[0]; $tb2[$i] = $tb2[$i] . $value; $tbcnt++; } else { last; } } if($tb2[$i] eq "") { $tb2[$i] = "none"; } # Search for and store large extremes if exist: $lgex2[$i] = ""; $value = ""; $lgexcnt = 0; for($n=$j2+30+$tbcnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/LGEX/) { ($value = $garray2[$n]) =~ s/LGEX/
\*/g; @valar = split(/-/,$value); $value = $valar[0]; $lgex2[$i] = $lgex2[$i] . $value; $lgexcnt++; } else { last; } } if($lgex2[$i] eq "") { $lgex2[$i] = "none"; } # Search for and store Additive Outliers (AOs) if exist: $ao2[$i] = ""; $value = ""; $aocnt = 0; for($n=$j2+30+$tbcnt+$lgexcnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/AO/) { ($value = $garray2[$n]) =~ s/AO/
\*/g; @valar = split(/-/,$value); $value = $valar[0]; $ao2[$i] = $ao2[$i] . $value; $aocnt++; } else { last; } } if($ao2[$i] eq "") { $ao2[$i] = "none"; } # Search for and store seasonal breaks if exist: $sb2[$i] = ""; $value = ""; $sbcnt = 0; $countstring = 0; for($n=$j2+30+$tbcnt+$lgexcnt+$aocnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/START_SBRK/) { $countstring = $countstring + 2; STARTSBRK: for($nn=$n+1; $nn<=$#garray2; $nn++) { if( !($garray2[$nn] =~ m/END_SBRK/) ) { $value = "
\* " . $garray2[$nn]; @valar = split(/-/,$value); $value = $valar[0]; $sb2[$i] = $sb2[$i] . $value; $sbcnt++; } elsif( ($garray2[$nn] =~ m/END_SBRK/) && ($garray2[$nn+1] =~ m/START_SBRK/) ) { $countstring = $countstring + 2; $n = $nn+1; goto STARTSBRK; } else { last; } } } else { last; } } if($sb2[$i] eq "") { $sb2[$i] = "none"; } # Following accounts for the START_SBRK and # END_SBRK additional keywords from above. if($sbcnt) { $sbcnt = $sbcnt + $countstring; } # Check to see if "START_EASTPROX" keyword exists # for EP effect. if( $garray2[$j2+30+$tbcnt+$lgexcnt+$aocnt+$sbcnt] =~ m/START_EASTPROX/ ) { $ep2[$i] = 'Yes'; $epcnt = 8; # if split year exists, there are more lines to count: ($epsplit = $garray2[$j2+30+$tbcnt+$lgexcnt+$aocnt+$sbcnt+1]) =~ s/SPLIT_YEAR|\s+//g; if($epsplit != 0) { $epcnt = 13; } } else { $ep2[$i] = 'No'; $epcnt = 0; } # Check to see if "START_FDPROX" keyword exists # for Father's Day proximity effect. if( $garray2[$j2+30+$tbcnt+$lgexcnt+$aocnt+$sbcnt+$epcnt] =~ m/START_FDPROX/ ) { $fd2[$i] = 'Yes'; } else { $fd2[$i] = 'No'; } # Store ARIMA model orders if exist. for($n=$j2+30+$tbcnt+$lgexcnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/ARIMA_MODEL_/) { $arcnt = 0; for($nn=$n; $nn<=$n+5; $nn++) { ($arima2[$i][$arcnt] = $garray2[$nn]) =~ s/\D|\s+//g; $arcnt++; } last; } } if( !(defined $arima2[$i][0]) ) { $arima2[$i][0] = "none"; } # Store F2B summary measures if exist. for($n=$j2+30+$tbcnt+$lgexcnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/F2B_/) { $arcnt = 0; for($nn=$n; $nn<=$n+6; $nn++) { ($F2B2[$i][$arcnt] = $garray2[$nn]) =~ s/F2B_//g; $F2B2[$i][$arcnt] =~ s/\s+/=/; @splitmeas = split(/=/,$F2B2[$i][$arcnt]); $x11value[$arcnt] = $splitmeas[1]; $arcnt++; } last; } } if( ($x11value[0]==0) && ($x11value[1]==0) && ($x11value[2]==0) && ($x11value[3]==0) && ($x11value[4]==0) && ($x11value[5]==0) ) { $F2B2[$i][0] = "none"; } # Store F2I summary measures if exist. for($n=$j2+30+$tbcnt+$lgexcnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/F2I_/) { $arcnt = 0; for($nn=$n; $nn<=$n+7; $nn++) { ($F2I2[$i][$arcnt] = $garray2[$nn]) =~ s/F2I_//g; $F2I2[$i][$arcnt] =~ s/\s+/=/; @splitmeas = split(/=/,$F2I2[$i][$arcnt]); $x11value[$arcnt] = $splitmeas[1]; $arcnt++; } last; } } if( ($x11value[0]==0) && ($x11value[1]==0) && ($x11value[2]==0) && ($x11value[3]==0) && ($x11value[4]==0) && ($x11value[5]==0) ) { $F2I2[$i][0] = "none"; } # Store series data: can be either "DATA ORIGINAL", # "DATA ADJUSTED", or "DATA TREND": # $datatype1[$i] = 0, 1, 2 respectively. # Assign dataspans too for SA and Trend TSA-downloads. $data2[$i] = ""; $value = ""; for($n=$j2+30+$tbcnt+$lgexcnt; $n<=$#garray2; $n++) { if($garray2[$n] =~ m/DATA ORIGINAL/) { $datatype2[$i] = 0; for($nn=$n+1; $nn<=$n+$numdata2[$i]; $nn++) { $value = $garray2[$nn] . "\_"; $data2[$i] = $data2[$i] . $value; } last; } elsif($garray2[$n] =~ m/DATA ADJUSTED/) { # Redefine series span = to update span if non-zero, # else = analysis span if non-zero, else abort: if( $usperiod2[$i] ) { $ssperiod2[$i] = $usperiod2[$i]; $ssyear2[$i] = $usyear2[$i]; $seperiod2[$i] = $ueperiod2[$i]; $seyear2[$i] = $ueyear2[$i]; } elsif( ($usperiod2[$i]==0) && $asperiod2[$i] ) { $ssperiod2[$i] = $asperiod2[$i]; $ssyear2[$i] = $asyear2[$i]; $seperiod2[$i] = $aeperiod2[$i]; $seyear2[$i] = $aeyear2[$i]; } else { die "\n*** Analysis and Update spans for series " . "\"$series2[$i]\" both zero; cannot continue; " . "quitting...\n"; } $datatype2[$i] = 1; for($nn=$n+1; $nn<=$n+$numdata2[$i]; $nn++) { $value = $garray2[$nn] . "\_"; $data2[$i] = $data2[$i] . $value; } last; } elsif($garray2[$n] =~ m/DATA TREND/) { # Redefine series span = to update span if non-zero, # else = analysis span if non-zero, else abort: if( $usperiod2[$i] ) { $ssperiod2[$i] = $usperiod2[$i]; $ssyear2[$i] = $usyear2[$i]; $seperiod2[$i] = $ueperiod2[$i]; $seyear2[$i] = $ueyear2[$i]; } elsif( ($usperiod2[$i]==0) && $asperiod2[$i] ) { $ssperiod2[$i] = $asperiod2[$i]; $ssyear2[$i] = $asyear2[$i]; $seperiod2[$i] = $aeperiod2[$i]; $seyear2[$i] = $aeyear2[$i]; } else { die "\n*** Analysis and Update spans for series " . "\"$series2[$i]\" both zero; cannot continue; " . "quitting...\n"; } $datatype2[$i] = 2; for($nn=$n+1; $nn<=$n+$numdata2[$i]; $nn++) { $value = $garray2[$nn] . "\_"; $data2[$i] = $data2[$i] . $value; } last; } } last; } $j2++; } # Check to see if series were actually present in specific group summary files. if( !(defined $period1[$i]) || !(defined $period2[$i]) ) { $skipcount++; next; } $i++; } $finalpaircount = $i; #----------------------------------------------------------------- # Report number of series pairs skipped to standard output. print "\nExecuting consistency.pl; version $version\n"; if( $skipcount ) { print "\n*** Warning: number of series pairs skipped from input " . "map table since unable to find matching group information " . " = $skipcount\n"; } else { print "\nYou're lucky! All series pairs from the input map table had " . "matching group information\n"; } #----------------------------------------------------------------- # Write consistency table results to output html file. if( !open(OUT, ">$outhtmlfile") ) { die "\n*** Couldn't open $outhtmlfile file for writing, quitting...\n"; } else { print "\nWriting consistency table to $outhtmlfile ...\n"; # Set up html page: print OUT "\n"; print OUT "\n"; print OUT "
Consistency Summary

\n"; print OUT "
Legend"; print OUT "
I = Indirectly adjusted\n"; print OUT "
D = Directly adjusted\n"; print OUT "
M = Multiplicative\n"; print OUT "
A = Additive\n"; print OUT "
P = Pseudo-Additive\n"; print OUT "
TMA = X11 Trend Moving Average filter length\n"; print OUT "
PTMA = Published Trend Moving Average filter length\n"; print OUT "
PEWP = Published End Weight Parameter\n"; print OUT "
SMA = Seasonal Moving Average filter length\n"; print OUT "
TD = Trading Day\n"; print OUT "
EP = Easter Proximity\n"; print OUT "
FD = Father's Day Proximity

\n"; print OUT "* YELLOW row = \"Series 1\" that is paired with \"Series 2\" in subsequent BLUE row\n"; print OUT "
* BLUE row = \"Series 2\" that is paired with \"Series 1\" in previous YELLOW row

\n"; # Set up table header: print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; if( $addpriors ) { print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; } # Following variable used to count number of successful datatype matches, # hence number of times "Rcommands.txt" file is generated so can rewind # to start of DATA block each time. $numsuccessdatatypematch = 1; for($i = 0; $i < $finalpaircount; $i++) { $k = $i + 1; #----------------------------------------------------------------- # Compute consistency diagnostics if "-t 1" was specified; return # plots and test summaries using call to "&Diagnostics()" and write # all into a html file for a given series pair $k. This html is # linked to second series of pair in consistency table output. if( $computediags ) { # If periodicities same, check and summarise in diagnostics output # if any of the _main_ processing parameters were inconsistent: $test1title = "* TEST 1: CONSISTENCY (SANITY) CHECK BETWEEN MAIN " . "PARAMETERS IN ABOVE TABLE:"; if( $period1[$i] eq $period2[$i] ) { $badparmatch = "null"; # Note, the "method" setting is only checked for inconsistency # if either series is "not adjusted". The "not adjusted" setting # is not supported by TSA-download, so following block will never # be satisfied. Must wait for SEASABS version to pick this up. if( ($method1[$i] ne $method2[$i]) && (($method1[$i] eq "not adjusted") || ($method2[$i] eq "not adjusted")) ) { $badparmatch = "Method"; } if( $model1[$i] ne $model2[$i] ) { $badparmatch = $badparmatch . ', ' . "Model"; } if( $tma1[$i] != $tma2[$i] ) { $badparmatch = $badparmatch . ', ' . "X11 TMA"; } if( $pubtma1[$i] != $pubtma2[$i] ) { $badparmatch = $badparmatch . ', ' . "PUB_TMA"; } if( $pubewp1[$i] != $pubewp2[$i] ) { $badparmatch = $badparmatch . ', ' . "PUB_EWP"; } if( $sma1[$i] ne $sma2[$i] ) { $badparmatch = $badparmatch . ', ' . "SMA"; } if( $td1[$i] ne $td2[$i] ) { $badparmatch = $badparmatch . ', ' . "Trading day"; } if( $ep1[$i] ne $ep2[$i] ) { $badparmatch = $badparmatch . ', ' . "Easter"; } if( $fd1[$i] ne $fd2[$i] ) { $badparmatch = $badparmatch . ', ' . "Father's Day"; } if( $badparmatch =~ m/null, / ) { $badparmatch =~ s/null, //; $test1result = "*** The \"$badparmatch\" parameter settings are " . "not consistent for this series pair. Please review."; } else { $test1result = "*** All main processing parameters for " . "this series pair are consistent (i.e. \"Model\", " . "\"TMA\", \"PUB_TMA\", \"PUB_EWP\", " . "\"SMA\", \"Trading day\", \"Easter\" and " . "\"Father's Day\")."; } } else { $test1result = "*** Periodicities are not the same: inconsistencies " . "are expected in processing parameters and settings. " . "Please check all are appropriate."; } ($mvttimeplotfilehtml, $mvtvsmvtplotfilehtml, $ccfplotfilehtml, $acfplotfilehtml, $acfcointplotfilehtml, $diagnosticssumm, $datatypeflag, $Rfailureflag, $identicalflag) = &Diagnostics($period1[$i], $period2[$i], $model1[$i], $model2[$i], $numdata1[$i], $numdata2[$i], $datatype1[$i], $datatype2[$i], $data1[$i], $data2[$i], $ssperiod1[$i], $ssperiod2[$i], $seperiod1[$i], $seperiod2[$i], $ssyear1[$i], $ssyear2[$i], $seyear1[$i], $seyear2[$i], $outdir, $k, $numsuccessdatatypematch); #Windows: $diagouthtmlfile = $outdir . '\\\\' . 'diagnostics_pair_' . $k . '.html'; #Unix: #$diagouthtmlfile = $outdir . '/' . 'diagnostics_pair_' . $k . '.html'; open(HTMLDIAGOUT, ">$diagouthtmlfile"); # set up diagnostics html page: print HTMLDIAGOUT "\n"; print HTMLDIAGOUT "\n"; print HTMLDIAGOUT "
Test Diagnostics

"; # Write diagnostics to html file only if datatypes were consistent, or if # there were no "R" execution failures, or, if movements not identical. if( $datatypeflag == 0 ) { printf HTMLDIAGOUT "
%s
" . "
%s
" . "
*** No diagnostics computed; datatypes of input " . "group summary files are inconsistent. " . "Please check.
\n", $test1title, $test1result; } elsif( $Rfailureflag ) { printf HTMLDIAGOUT "
%s
" . "
%s
" . "
*** No diagnostics computed since \"R-code\" " . "execution failed for this series pair.
\n", $test1title, $test1result; $numsuccessdatatypematch++; } elsif( $identicalflag ) { printf HTMLDIAGOUT "
%s
" . "
%s

%s\n", $test1title, $test1result, $diagnosticssumm; $numsuccessdatatypematch++; } else { printf HTMLDIAGOUT "
" . "* TEMPORAL MOVEMENT PLOTS...
" . "

" . "
%s
" . "
%s
" . "
%s
\n", $mvttimeplotfilehtml, $test1title, $test1result, $diagnosticssumm; $numsuccessdatatypematch++; } close(HTMLDIAGOUT); } #----------------------------------------------------------------- # Construct "failed tests" string by parsing above diagnostic strings # and checking for existence of key lines. $failedtests = "null"; # TEST 1: if( $test1result =~ m/parameter settings are not consistent/ ) { $failedtests = $failedtests . '1, '; } # TEST 2: if( $diagnosticssumm =~ m/SERIES MAY NOT BE RELATED/ ) { $failedtests = $failedtests . '2, '; } # TEST 3: if( ($diagnosticssumm =~ m/DIFFERENCE IN MOVEMENTS SIGNIFICANTLY DIFFERENT/) || ($diagnosticssumm =~ m/SLOPE SIGNIFICANTLY DIFFERENT FROM ONE/) || ($diagnosticssumm =~ m/INTERCEPT SIGNIFICANTLY DIFFERENT FROM ZERO/) ) { $failedtests = $failedtests . '3, '; } # TEST 4: if( $diagnosticssumm =~ m/ALL OUTLIERS:/ ) { $failedtests = $failedtests . '4, '; } # TEST 5: if( $diagnosticssumm =~ m/SYSTEMATIC INCONSISTENCY EXISTS AT ABOVE/ ) { $failedtests = $failedtests . '5, '; } # TEST 6: if( ($diagnosticssumm =~ m/WARNING: series pair not integrated with same order/) || ($diagnosticssumm =~ m/series pair is not cointegrating/) || ($diagnosticssumm =~ m/Series pair not cointegrating/) ) { $failedtests = $failedtests . '6, '; } if( $failedtests eq "null" ) { $failedtests = "Diagnostic tests (no failures)"; } else { $failedtests =~ s/null//; $failedtests =~ s/,\s+$//; $failedtests = "Diagnostic tests failed: " . $failedtests; } #----------------------------------------------------------------- # Consistency table data: if first series of pair is repeated in # input map table, write only once in output html. if( ($i > 0) && ($series1[$i] eq $series1[$i-1]) ) { goto WRITESERIES2; } print OUT "
\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; if( !(defined $arima1[$i][1]) ) { print OUT "\n"; } else { print OUT "\n"; } if( $F2B1[$i][0] eq "none" ) { print OUT "\n"; } else { print OUT "\n"; } if( $F2I1[$i][0] eq "none" ) { print OUT "\n"; } else { print OUT "\n"; } if( $addpriors ) { print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; } WRITESERIES2: if( $computediags ) { printf OUT "\n", $series2[$i], $diagouthtmlfile, $failedtests; } else { print OUT "\n"; } print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; if( !(defined $arima2[$i][1]) ) { print OUT "\n"; } else { print OUT "\n"; } if( $F2B2[$i][0] eq "none" ) { print OUT "\n"; } else { print OUT "\n"; } if( $F2I2[$i][0] eq "none" ) { print OUT "\n"; } else { print OUT "\n"; } if( $addpriors ) { print OUT "\n"; print OUT "\n"; print OUT "\n"; print OUT "\n"; } print "......................Wrote series pair $k\......................\n"; } print OUT "
Group.SeriesAnalysis Span.Update Span...PeriodMethodModelTMAPTMAPEWPSMATDEPFDARIMA(pdq)(PDQ)F2B Measures..........F2I Measures..........Trend Breaks....Seasonal Breaks..Large Extremes..Additive Outliers..
$series1[$i]$asperiod1[$i]/$asyear1[$i]-$aeperiod1[$i]/$aeyear1[$i]$usperiod1[$i]/$usyear1[$i]-$ueperiod1[$i]/$ueyear1[$i]$period1[$i]$method1[$i]$model1[$i]$tma1[$i]$pubtma1[$i]$pubewp1[$i]$sma1[$i]$td1[$i]$ep1[$i]$fd1[$i]$arima1[$i][0] " . "($arima1[$i][0],$arima1[$i][1],$arima1[$i][2]) " . "($arima1[$i][3],$arima1[$i][4],$arima1[$i][5])$F2B1[$i][0] " . "$F2B1[$i][0]
$F2B1[$i][1]
$F2B1[$i][2] " . "
$F2B1[$i][3]
$F2B1[$i][4]
$F2I1[$i][0] " . "Stable Seas: $F2I1[$i][0]
Stable Seas: $F2I1[$i][1] " . "
Stable Seas: $F2I1[$i][2]
Stable Seas: $F2I1[$i][3] " . "
Stable Seas: $F2I1[$i][4]
Stable Seas: $F2I1[$i][5] " . "
Moving Seas: $F2I1[$i][6]
Moving Seas: $F2I1[$i][7]
$tb1[$i]$sb1[$i]$lgex1[$i]$ao1[$i]

%s

\n " . "
%s

$series2[$i]$asperiod2[$i]/$asyear2[$i]-$aeperiod2[$i]/$aeyear2[$i]$usperiod2[$i]/$usyear2[$i]-$ueperiod2[$i]/$ueyear2[$i]$period2[$i]$method2[$i]$model2[$i]$tma2[$i]$pubtma2[$i]$pubewp2[$i]$sma2[$i]$td2[$i]$ep2[$i]$fd2[$i]$arima2[$i][0] " . "($arima2[$i][0],$arima2[$i][1],$arima2[$i][2]) " . "($arima2[$i][3],$arima2[$i][4],$arima2[$i][5])$F2B2[$i][0] " . "$F2B2[$i][0]
$F2B2[$i][1]
$F2B2[$i][2] " . "
$F2B2[$i][3]
$F2B2[$i][4]
$F2I2[$i][0] " . "Stable Seas: $F2I2[$i][0]
Stable Seas: $F2I2[$i][1] " . "
Stable Seas: $F2I2[$i][2]
Stable Seas: $F2I2[$i][3] " . "
Stable Seas: $F2I2[$i][4]
Stable Seas: $F2I2[$i][5] " . "
Moving Seas: $F2I2[$i][6]
Moving Seas: $F2I2[$i][7]
$tb2[$i]$sb2[$i]$lgex2[$i]$ao2[$i]
\n"; print OUT "
Made using consistency.pl; version $version\n"; print OUT "
Copyright (C) 2006 Australian Bureau of Statistics\n"; } close(OUT); #----------------------------------------------------------------- # If SpawnBrowserFlag is set, then display results directly on user's screen: if( $spawnviewer ) { print "\nSpawning browser on $outhtmlfile; standby...\n\n"; #Windows: if( system(" \"$outhtmlfile\" ") ) { #Unix: #if( system(" open $outhtmlfile ") ) { print "\n*** Warning: could not spawn browser on $outhtmlfile; " . "continuing...\n"; } } #--------------------------- Subroutines -------------------------- # Time-match series data and movements and compute diagnostics by # executing "R" batch file. sub Diagnostics { my($period1, $period2, $model1, $model2, $numdata1, $numdata2, $datatype1, $datatype2, $data1, $data2, $ssperiod1, $ssperiod2, $seperiod1, $seperiod2, $ssyear1, $ssyear2, $seyear1, $seyear2, $outdir, $k, $numsuccessdatatypematch) = @_; my($matcheds1, $matcheds2, $moves1, $moves2, $absmoves1, $absmoves2, $timestring, $args, $mvttimeplotfile, $mvtvsmvtplotfile, $ccfplotfile, $acfplotfile, $diagnosticssumm, $datatypeflag, $ii, @routlines, $routline, $skiplines, $linkline1, $linkline2, $linkline3, $linkline4, $Rfailureflag, $identicalflag, $linkline5, $acfcointplotfile, $mvtvsmvtplotfilehtml, $mvttimeplotfilehtml, $ccfplotfilehtml, $acfplotfilehtml, $acfcointplotfilehtml); $Rfailureflag = 0; $identicalflag = 0; if($datatype1 != $datatype2) { $datatypeflag = 0; $mvttimeplotfile = "null"; $mvtvsmvtplotfile = "null"; $ccfplotfile = "null"; $acfplotfile = "null"; $acfcointplotfile = "null"; $diagnosticssumm = "null"; } else { # if datattypes consistent. $datatypeflag = 1; #------------- # Compute diagnostics and plots in "R". First create arrays # of movement data by matching $data1 and $data2 at equal times. # Return arrays of series data, equal time movements and timestrings. ($matcheds1, $matcheds2, $moves1, $moves2, $absmoves1, $absmoves2, $timestring) = &MatchTimeSeries($period1, $period2, $numdata1, $numdata2, $data1, $data2, $ssperiod1, $ssperiod2, $seperiod1, $seperiod2, $ssyear1, $ssyear2, $seyear1, $seyear2); #------------- # Reformat $outdir for windows. $mvttimeplotfile = $outdir . '\\\\' . 'mvt_vs_time_pair_' . $k . '.png'; $mvtvsmvtplotfile = $outdir . '\\\\' . 'mvt_vs_mvt_pair_' . $k . '.png'; $ccfplotfile = $outdir . '\\\\' . 'cross-correlation_pair_' . $k .'.png'; $acfplotfile = $outdir . '\\\\' . 'pattern_acf_pair_' . $k . '.png'; $acfcointplotfile = $outdir . '\\\\' . 'cointegration_acf_pair_' . $k . '.png'; if($outdir eq ".") { $mvttimeplotfile =~ s/^.\\\\//; $mvtvsmvtplotfile =~ s/^.\\\\//; $ccfplotfile =~ s/^.\\\\//; $acfplotfile =~ s/^.\\\\//; $acfcointplotfile =~ s/^.\\\\//; } #------------- # Create "R" data input file. open(RDATA, ">Rinpdata.txt"); for($ii = 0; $ii <= $#timestring; $ii++){ print RDATA "$timestring[$ii] $moves1[$ii] $moves2[$ii] " . "$absmoves1[$ii] $absmoves2[$ii] $matcheds1[$ii] " . "$matcheds2[$ii] $model1 $model2\n"; } close(RDATA); #------------- # Create "R" input command batch file. &CreateRbatchfile($mvttimeplotfile, $mvtvsmvtplotfile, $ccfplotfile, $acfplotfile, $acfcointplotfile, $numsuccessdatatypematch); #------------- # Execute above batch file. $args = "\"S:\\R\\2.0.1\\rw2001\\bin\\Rterm.exe\" -q --no-restore " . "--no-save < Rcommands.txt > R.out HOME=\"$outdir\""; if( system(" \"$args\" ") ) { print "\n*** Warning: could not execute \"R\" to create plot for " . "series pair $k; continuing...\n"; $Rfailureflag = 1; } #Unix: #$args = "/usr/bin/R -q --no-restore --no-save -q --no-restore " . # "--no-save < Rcommands.txt > R.out HOME=\"$outdir\""; #if( system(" $args ") ) { # print "\n*** Warning: could not execute \"R\" to create plot for " . # "series pair $k; continuing...\n"; # $Rfailureflag = 1; #} #------------- # Store required diagnostic lines from R.out file into a single # html formatted string. if( !$Rfailureflag ) { $diagnosticssumm = ""; # skip some lines at top of R.out file: $skiplines = 5; open(ROUT, "; $ii = 0; foreach $routline (@routlines) { if($ii <= $skiplines || ($routline =~ m/\=\=\=\=\=/) ) { $ii++; next; } # lines starting with just single "*" => store as normal: if( ($routline =~ m/^\*/) && !($routline =~ m/^\*\*\*/) ) { chomp($routline); $routline = "\" . $routline; $diagnosticssumm = $diagnosticssumm . "$routline\n"; # lines starting with "***" => colour these red: } elsif( $routline =~ m/^\*\*\*/ ) { chomp($routline); $routline = "\\" . $routline . "\<\/font\>"; $diagnosticssumm = $diagnosticssumm . "$routline\n"; # retain blank lines: } elsif( !($routline =~ m/.{1,}/) ) { chomp($routline); $routline = "\" . $routline; $diagnosticssumm = $diagnosticssumm . "$routline\n"; } $ii++; } close(ROUT); # Create html wrappers to plot files so have better quality when displayed: ($mvttimeplotfilehtml = $mvttimeplotfile) =~ s/\.png/\.html/; ($mvtvsmvtplotfilehtml = $mvtvsmvtplotfile) =~ s/\.png/\.html/; ($ccfplotfilehtml = $ccfplotfile) =~ s/\.png/\.html/; ($acfplotfilehtml = $acfplotfile) =~ s/\.png/\.html/; ($acfcointplotfilehtml = $acfcointplotfile) =~ s/\.png/\.html/; open(HTMLPLOT, ">$mvttimeplotfilehtml"); print HTMLPLOT "\\\<\/BODY\>\n"; close(HTMLPLOT); open(HTMLPLOT, ">$mvtvsmvtplotfilehtml"); print HTMLPLOT "\\\<\/BODY\>\n"; close(HTMLPLOT); open(HTMLPLOT, ">$ccfplotfilehtml"); print HTMLPLOT "\\\<\/BODY\>\n"; close(HTMLPLOT); open(HTMLPLOT, ">$acfplotfilehtml"); print HTMLPLOT "\\\<\/BODY\>\n"; close(HTMLPLOT); open(HTMLPLOT, ">$acfcointplotfilehtml"); print HTMLPLOT "\\\<\/BODY\>\n"; close(HTMLPLOT); # Make some key lines hyperlinks to 'html-wrapped' plots: $linkline1 = '\* CROSS-CORRELATION FOR "RELATEDNESS" \(OR WEAK CONSISTENCY\) CHECK:'; $linkline2 = '\* EQUAL-MOVEMENT MAGNITUDE TESTS \(ON OUTLIER-CORRECTED MOVEMENTS\):'; $linkline3 = '\* MOVEMENT VS MOVEMENT OUTLIER TEST:'; $linkline4 = '\* TEST FOR SIGNIFICANT TEMPORAL PATTERN IN MOVEMENT DIFFERENCES \(M1 - M2\):'; $linkline5 = '\* UNIT ROOT & COINTEGRATION TESTS:'; $diagnosticssumm =~ s/$linkline1/\\ \* TEST 2: $linkline1 PLOTS...\<\/font\><\/b>\<\/a\>/; $diagnosticssumm =~ s/$linkline2/\\ \* TEST 3: $linkline2 PLOTS...\<\/font\><\/b>\<\/a\>/; $diagnosticssumm =~ s/$linkline3/\\ \* TEST 4: $linkline3 PLOTS...\<\/font\><\/b>\<\/a\>/; $diagnosticssumm =~ s/$linkline4/\\ \* TEST 5: $linkline4 PLOTS...\<\/font\><\/b>\<\/a\>/; $diagnosticssumm =~ s/$linkline5/\\ \* TEST 6: $linkline5 PLOTS...\<\/font\><\/b>\<\/a\>/; $diagnosticssumm =~ s/\\\*//g; $diagnosticssumm =~ s/\\\(/\(/g; $diagnosticssumm =~ s/\\\)/\)/g; # If movements were identical. if( $diagnosticssumm =~ m/two series appear identical/) { $identicalflag = 1; } } #------------- # Clean up. unlink("Rinpdata.txt","Rcommands.txt","R.out"); } return($mvttimeplotfilehtml, $mvtvsmvtplotfilehtml, $ccfplotfilehtml, $acfplotfilehtml, $acfcointplotfilehtml, $diagnosticssumm, $datatypeflag, $Rfailureflag, $identicalflag); } #----------------------------------------------------------------- # Match two series data strings: $data1 and $data2 at equal times. # Return arrays of matched data values. sub MatchTimeSeries { my($period1, $period2, $numdata1, $numdata2, $data1, $data2, $ssperiod1, $ssperiod2, $seperiod1, $seperiod2, $ssyear1, $ssyear2, $seyear1, $seyear2) = @_; my(@svals1, @svals2, $val, $m, $p, $q, @month1, @month2, @year1, @year2, $divisor1, $divisor2, @qsvals1, @qsvals2, @qmonth1, @qmonth2, @qyear1, @qyear2, $x, $y, $z); # Initialise. @svals1=(); @svals2=(); @qsvals1=(); @qsvals2=(); @month1=(); @month2=(); @qmonth1=(); @qmonth2=(); @year1=(); @year2=(); @qyear1=(); @qyear2=(); @matcheds1=(); @matcheds2=(); @moves1=(); @moves2=(); @absmoves1=(); @absmoves2=(); @timestring=(); # Convert input data series strings into arrays. @svals1 = split(/\_/, $data1); @svals2 = split(/\_/, $data2); # convert input periods into month numbers and quarter numbers # for quarterly series. $ssperiod1 =~ s/^0//; $ssperiod2 =~ s/^0//; $seperiod1 =~ s/^0//; $seperiod2 =~ s/^0//; $divisor1 = 12; $divisor2 = 12; if($period1 eq 'QUART') { $divisor1 = 4; } if($period2 eq 'QUART') { $divisor2 = 4; } # Store series data / month or qtr / year into arrays. # series 1: $m = 0; $p = 0; $q = 0; foreach $val (@svals1) { $val =~ s/^\s+|\s+$//g; $svals1[$m] = $val; $month1[$m] = $ssperiod1 + $p; $year1[$m] = $ssyear1 + $q; # print " X $year1[$m] : $month1[$m] : $svals1[$m]\n"; if( (int($month1[$m]/$divisor1) == ($month1[$m]/$divisor1)) && (int($month1[$m]/$divisor1) > 0) ) { $ssperiod1 = 0; $p = 0; $q++; } $m++; $p++; } # series 2: $m = 0; $p = 0; $q = 0; foreach $val (@svals2) { $val =~ s/^\s+|\s+$//g; $svals2[$m] = $val; $month2[$m] = $ssperiod2 + $p; $year2[$m] = $ssyear2 + $q; # print " Y $year2[$m] : $month2[$m] : $svals2[$m]\n"; if( (int($month2[$m]/$divisor2) == ($month2[$m]/$divisor2)) && (int($month2[$m]/$divisor2) > 0) ) { $ssperiod2 = 0; $p = 0; $q++; } $m++; $p++; } # Convert any monthly series 1 to quarterly by summing data # from every consequetive three months. Store new data # in new arrays: @qsvals1, @qmonth1, @qyear1. if( ($period1 eq 'MONTH') && ($period2 eq 'QUART') ) { $q = 0; $m = 0; foreach $val (@svals1) { if( ($month1[$m] == 1) ) { if( (defined $svals1[$m]) && (defined $svals1[$m+1]) && (defined $svals1[$m+2]) ) { $qsvals1[$q] = $svals1[$m] + $svals1[$m+1] + $svals1[$m+2]; $qmonth1[$q] = 1; $qyear1[$q] = $year1[$m]; $q++; } else { last; } } elsif( ($month1[$m] == 4) ) { if( (defined $svals1[$m]) && (defined $svals1[$m+1]) && (defined $svals1[$m+2]) ) { $qsvals1[$q] = $svals1[$m] + $svals1[$m+1] + $svals1[$m+2]; $qmonth1[$q] = 2; $qyear1[$q] = $year1[$m]; $q++; } else { last; } } elsif( ($month1[$m] == 7) ) { if( (defined $svals1[$m]) && (defined $svals1[$m+1]) && (defined $svals1[$m+2]) ) { $qsvals1[$q] = $svals1[$m] + $svals1[$m+1] + $svals1[$m+2]; $qmonth1[$q] = 3; $qyear1[$q] = $year1[$m]; $q++; } else { last; } } elsif( ($month1[$m] == 10) ) { if( (defined $svals1[$m]) && (defined $svals1[$m+1]) && (defined $svals1[$m+2]) ) { $qsvals1[$q] = $svals1[$m] + $svals1[$m+1] + $svals1[$m+2]; $qmonth1[$q] = 4; $qyear1[$q] = $year1[$m]; $q++; } else { last; } } $m++; } } # Convert any monthly series 2 to quarterly by summing data # from every consequetive three months. Store new data # in new arrays: @qsvals2, @qmonth2, @qyear2. if( ($period2 eq 'MONTH') && ($period1 eq 'QUART') ) { $q = 0; $m = 0; foreach $val (@svals2) { if( ($month2[$m] == 1) ) { if( (defined $svals2[$m]) && (defined $svals2[$m+1]) && (defined $svals2[$m+2]) ) { $qsvals2[$q] = $svals2[$m] + $svals2[$m+1] + $svals2[$m+2]; $qmonth2[$q] = 1; $qyear2[$q] = $year2[$m]; $q++; } else { last; } } elsif( ($month2[$m] == 4) ) { if( (defined $svals2[$m]) && (defined $svals2[$m+1]) && (defined $svals2[$m+2]) ) { $qsvals2[$q] = $svals2[$m] + $svals2[$m+1] + $svals2[$m+2]; $qmonth2[$q] = 2; $qyear2[$q] = $year2[$m]; $q++; } else { last; } } elsif( ($month2[$m] == 7) ) { if( (defined $svals2[$m]) && (defined $svals2[$m+1]) && (defined $svals2[$m+2]) ) { $qsvals2[$q] = $svals2[$m] + $svals2[$m+1] + $svals2[$m+2]; $qmonth2[$q] = 3; $qyear2[$q] = $year2[$m]; $q++; } else { last; } } elsif( ($month2[$m] == 10) ) { if( (defined $svals2[$m]) && (defined $svals2[$m+1]) && (defined $svals2[$m+2]) ) { $qsvals2[$q] = $svals2[$m] + $svals2[$m+1] + $svals2[$m+2]; $qmonth2[$q] = 4; $qyear2[$q] = $year2[$m]; $q++; } else { last; } } $m++; } } # Time-match above series and store in new arrays returned below. # Only match monthly-to-monthly or quarterly-to-quarterly. if( ($period1 eq 'MONTH') && ($period2 eq 'QUART') ) { $x = 0; for($y = 0; $y <= $#qsvals1; $y++) { for($z = 0; $z <= $#svals2; $z++) { if( ($qyear1[$y] == $year2[$z]) && ($qmonth1[$y] == $month2[$z]) ) { $matcheds1[$x] = $qsvals1[$y]; $matcheds2[$x] = $svals2[$z]; $timestring[$x] = $qmonth1[$y] . '/' . $qyear1[$y]; $x++; last; } } } } elsif( ($period2 eq 'MONTH') && ($period1 eq 'QUART') ) { $x = 0; for($y = 0; $y <= $#qsvals2; $y++) { for($z = 0; $z <= $#svals1; $z++) { if( ($qyear2[$y] == $year1[$z]) && ($qmonth2[$y] == $month1[$z]) ) { $matcheds1[$x] = $svals1[$z]; $matcheds2[$x] = $qsvals2[$y]; $timestring[$x] = $qmonth2[$y] . '/' . $qyear2[$y]; $x++; last; } } } } else { # if either both quarterly or monthly. $x = 0; for($y = 0; $y <= $#svals2; $y++) { for($z = 0; $z <= $#svals1; $z++) { if( ($year2[$y] == $year1[$z]) && ($month2[$y] == $month1[$z]) ) { $matcheds1[$x] = $svals1[$z]; $matcheds2[$x] = $svals2[$y]; $timestring[$x] = $month2[$y] . '/' . $year2[$y]; $x++; last; } } } } # Ensure lengths of arrays are the same. if( $#matcheds1 != $#matcheds2 ) { print "*** Warning: number of elements in each time-matched " . "series are not the same! This is a pretty serious problem.\n"; } # Create point-to-point % movement arrays. # For cases where divisor S[t-1] = 0, set the movement to 99.999. for($x = 0; $x <= $#matcheds1; $x++) { $moves1[$x] = 99.999; $moves2[$x] = 99.999; $absmoves1[$x] = 99.999; $absmoves2[$x] = 99.999; if( $x > 0 ) { # since mvts for $x = 0 not defined (set to 99.999)! if( ($matcheds1[$x - 1] != 0) && ($matcheds2[$x - 1] != 0) ) { $moves1[$x] = (($matcheds1[$x] - $matcheds1[$x - 1]) / $matcheds1[$x - 1]) * 100.0; $moves2[$x] = (($matcheds2[$x] - $matcheds2[$x - 1]) / $matcheds2[$x - 1]) * 100.0; } $absmoves1[$x] = $matcheds1[$x] - $matcheds1[$x - 1]; $absmoves2[$x] = $matcheds2[$x] - $matcheds2[$x - 1]; } } return(\@matcheds1, \@matcheds2, \@moves1, \@moves2, \@absmoves1, \@absmoves2, \@timestring); } #----------------------------------------------------------------- # Create R-commands temporary batch file by reading in master copy # under __DATA__ below, replace output plot filenames with those # represented by variables. sub CreateRbatchfile { my($mvttimeplotfile, $mvtvsmvtplotfile, $ccfplotfile, $acfplotfile, $acfcointplotfile, $numsuccessdatatypematch) = @_; my($liner, $pos1, $pos2, $ii); $pos1 = tell(DATA); if( $numsuccessdatatypematch == 1 ) { @rlines = ; } $pos2 = tell(DATA); if( $numsuccessdatatypematch > 1 ) { seek(DATA, -($pos2 - $pos1), 0); } open(RIN, ">Rcommands.txt"); for($ii = 0; $ii < $#rlines; $ii++) { $liner = $rlines[$ii]; if( $liner =~ /RRR/ ) { chomp($liner); $liner =~ s/RRR//; $liner =~ s/mvt_vs_time\.png/$mvttimeplotfile/; $liner =~ s/mvt_vs_mvt\.png/$mvtvsmvtplotfile/; $liner =~ s/pattern_acf\.png/$acfplotfile/; $liner =~ s/ccf\.png/$ccfplotfile/; $liner =~ s/coint\.png/$acfcointplotfile/; print RIN "$liner\n"; } } close(RIN); return 0; } #---------------------END OF consistency.pl----------------------- __DATA__ RRR ####################### START R-CODE ######################### RRR RRR options(echo = FALSE) RRR library(MASS); RRR suppressWarnings(library(urca)); RRR RRR # Read in data file: RRR # times, mvt1, mvt2, absmvt1, absmvt2, series1, series2, model1, model2: RRR xydata <- scan("Rinpdata.txt", list("",0,0,0,0,0,0,"","")); RRR RRR #------------------------------------------------------------- RRR # Assign variables to columns in above input file: RRR RRR times_in <- xydata[[1]]; RRR x_in <- xydata[[2]]; RRR y_in <- xydata[[3]]; RRR absx_in <- xydata[[4]]; RRR absy_in <- xydata[[5]]; RRR serx_in <- xydata[[6]]; RRR sery_in <- xydata[[7]]; RRR model1 <- xydata[[8]]; RRR model2 <- xydata[[9]]; RRR RRR #------------------------------------------------------------- RRR # Filter out 99.999 mvt values which were assigned in perl script for RRR # cases when mvts could not be computed because of zero data values. RRR # New arrays are created. This is important since otherwise, rho RRR # estimates will be biased. RRR RRR times <- ""; RRR x <- 0; RRR y <- 0; RRR absx <- 0; RRR absy <- 0; RRR serx <- 0; RRR sery <- 0; RRR jj <- 1; RRR RRR for( ii in 1:length(times_in) ) { RRR if( (x_in[[ii]] != 99.999) && (y_in[[ii]] != 99.999) ) { RRR times[[jj]] <- times_in[[ii]]; RRR x[[jj]] <- x_in[[ii]]; RRR y[[jj]] <- y_in[[ii]]; RRR absx[[jj]] <- absx_in[[ii]]; RRR absy[[jj]] <- absy_in[[ii]]; RRR serx[[jj]] <- serx_in[[ii]]; RRR sery[[jj]] <- sery_in[[ii]]; RRR jj = jj + 1; RRR } RRR } RRR RRR rm(ii, jj, times_in, x_in, y_in, absx_in, absy_in, serx_in, sery_in); RRR RRR #------------------------------------------------------------- RRR # Check to see if movements in two series are exactly the same RRR # by: if frac(M1-M2 = 0) > maxidentfrac, then stop here since RRR # robust linear regression, MAD and sd measures below will be RRR # unreliable. RRR RRR maxidentfrac <- 98.5; # maximum tolerable fraction in % units. RRR mvtdiffcheck <- x - y; RRR RRR countzeros <- 0; RRR for( ii in 1:length(mvtdiffcheck) ) { RRR if( mvtdiffcheck[[ii]] == 0 ) { RRR countzeros = countzeros + 1; RRR } RRR } RRR RRR if( (100*(countzeros/length(mvtdiffcheck))) > maxidentfrac ) { RRR write(paste("\n*** Movements in two series appear identical. Fraction of all movement differences that are exactly zero (i.e. M1 - M2 = 0) is", round((100*(countzeros/length(mvtdiffcheck))),2), "%. Did not proceed to compute any diagnostics for this heuristic case!"), file=""); RRR q(); RRR } RRR RRR #------------------------------------------------------------- RRR # Robust linear regression using "rlm" function (outlier resistant). RRR # Try "MM" method first, if fails (from size of returned object RRR # containing just error message string), re-execute with "M" method. RRR # Otherwise let fail. RRR RRR lin <- try(suppressWarnings(rlm(y ~ x + 1, method="MM")), silent=TRUE); RRR if( length(lin) == 1 ) { RRR lin <- suppressWarnings(rlm(y ~ x + 1, method="M")); RRR } RRR RRR par <- coefficients(summary(lin)); RRR RRR # If large frac. of mvts same, then saving parameters may fail in cases RRR # where slope = exactly 1 or se's exactly 0. Therefore must guard against: RRR RRR intercept <- try(par[1,1], silent=TRUE); RRR if( length(grep("Error in", as.character(intercept))) ) { RRR intercept <- 0; RRR } RRR RRR slope <- try(par[2,1], silent=TRUE); RRR if( length(grep("Error in", as.character(slope))) ) { RRR slope <- 1; RRR } RRR RRR seint <- try(par[1,2], silent=TRUE); RRR if( length(grep("Error in", as.character(seint))) ) { RRR seint <- 0; RRR } RRR RRR seslope <- try(par[2,2], silent=TRUE); RRR if( length(grep("Error in", as.character(seslope))) ) { RRR seslope <- 0; RRR } RRR RRR tint <- abs((intercept - 0)/seint); ## H0: intercept=0 RRR tslope <- abs((slope - 1)/seslope); ## H0: slope=1 RRR # If se's happen to be zero due to large frac. of similar mvts: RRR if( seint == 0 ) { RRR tint <- 99999999; RRR } RRR if( seslope == 0 ) { RRR tslope <- 99999999; RRR } RRR pvalueint <- 2*pt(tint, (length(x)-2), lower.tail = FALSE); RRR pvalueslope <- 2*pt(tslope, (length(x)-2), lower.tail = FALSE); RRR RRR # residuals in "lin" object will be corrupted if there were errors RRR # in saving regression parameters above. Therefore, need to compute RRR # these explicitly. Otherwise, proceed as normal. RRR RRR if( (intercept == 0) || (slope == 1) || (seint == 0) || (seslope == 0) ) { RRR resid <- y - (slope*x + intercept); RRR } else { RRR resid <- residuals(lin); RRR } RRR rm(lin,par); RRR RRR slopeplotstring <- paste("slope =",round(slope,3),"+/-",round(seslope,3)); RRR intplotstring <- paste("intercept =",round(intercept,3),"+/-",round(seint,3)); RRR RRR #------------------------------------------------------------- RRR # Find outliers in residual distribution using MAD method with "threshold". RRR # Also store original data with outliers removed for robust cov. matrix. RRR RRR threshold <- 3.0; RRR MADvalue <- mad(resid); RRR # If MAD value happens to be zero (ie. large frac. of mvts same), then use sd. RRR if(MADvalue == 0) { RRR MADvalue <- sd(resid); RRR } RRR zscore <- ( resid - median(resid) ) / MADvalue; RRR RRR #initialise. RRR outtimes <- 0; RRR outx <- 0; RRR outy <- 0; RRR outzscore <- 0; RRR nooutx <- 0; RRR noouty <- 0; RRR RRR j <- 1; RRR k <- 1; RRR for( i in 1:length(zscore) ) { RRR if( abs(zscore[[i]]) > threshold ) { RRR outtimes[[j]] <- times[[i]]; RRR outx[[j]] <- x[[i]]; RRR outy[[j]] <- y[[i]]; RRR outzscore[[j]] <- zscore[[i]]; RRR j = j + 1; RRR } else { RRR nooutx[[k]] <- x[[i]]; RRR noouty[[k]] <- y[[i]]; RRR k = k + 1; RRR } RRR } RRR RRR numoutliers <- j - 1; RRR RRR outlierflag <- 0; RRR if( numoutliers > 0 ) { RRR outlierflag <- 1; RRR } RRR RRR outlabelplotstring <- paste("- crosses: >",threshold, "sigma outliers from robust linear fit"); RRR intervalplotstring <- paste("- green lines: ~", (100*round((pnorm(threshold) - pnorm(-threshold)),4)), "% (~", threshold, "sigma) confidence int."); RRR RRR #------------------------------------------------------------- RRR # Mean difference in movement magnitudes WITHOUT OUTLIERS: H0: = 0; RRR RRR mvtdiffs <- nooutx - noouty; RRR meandiff <- mean(mvtdiffs); RRR sediffs <- sd(mvtdiffs)/sqrt(length(mvtdiffs)); RRR meddiffs <- median(mvtdiffs); RRR tdiffs <- abs((meandiff - 0)/sediffs); RRR # if sd(mvtdiffs) above happens to be zero (ie. large frac. of mvts same): RRR if( sediffs == 0) { RRR tdiffs <- 99999999; RRR } RRR pvaluediffs <- 2*pt(tdiffs, (length(x)-1), lower.tail = FALSE); RRR RRR #------------------------------------------------------------- RRR # Using original data (with outliers), compute covariances/correlation RRR RRR covmatorig <- cov(cbind(x, y), use = "all.obs", method = "pearson"); RRR cormatorig <- cov2cor(covmatorig); RRR corout <- cormatorig[1,2]; RRR RRR #------------------------------------------------------------- RRR # Using good data (no outliers), compute robust (outlier corrected) covariances for their mahalanobis distance below. RRR RRR covmat <- cov(cbind(nooutx, noouty), use = "all.obs", method = "pearson"); RRR cormat <- cov2cor(covmat); RRR cornoout <- cormat[1,2]; RRR RRR #------------------------------------------------------------- RRR # 95% confidence intervals for population corrs corresponding to RRR # above measures using Fisher's Z-transformation. RRR RRR if( abs(corout) < 1) { RRR zcorout_min <- (0.5 * log((1+corout)/(1-corout))) - (1.96/sqrt(length(x)-3)); RRR zcorout_max <- (0.5 * log((1+corout)/(1-corout))) + (1.96/sqrt(length(x)-3)); RRR corout_min <- (exp(2*zcorout_min) - 1) / (exp(2*zcorout_min) + 1); RRR corout_max <- (exp(2*zcorout_max) - 1) / (exp(2*zcorout_max) + 1); RRR } else { RRR corout_min <- 1; RRR corout_max <- 1; RRR } RRR RRR if( abs(cornoout) < 1) { RRR zcornoout_min <- (0.5 * log((1+cornoout)/(1-cornoout))) - (1.96/sqrt(length(nooutx)-3)); RRR zcornoout_max <- (0.5 * log((1+cornoout)/(1-cornoout))) + (1.96/sqrt(length(nooutx)-3)); RRR cornoout_min <- (exp(2*zcornoout_min) - 1) / (exp(2*zcornoout_min) + 1); RRR cornoout_max <- (exp(2*zcornoout_max) - 1) / (exp(2*zcornoout_max) + 1); RRR } else { RRR cornoout_min <- 1; RRR cornoout_max <- 1; RRR } RRR RRR if( ((corout_min != 1) && (corout_max != 1)) && RRR (round(corout_min,3) != round(corout_max,3)) ) { RRR plotstring1 <- paste(round(corout_min,3),"<","CCF(0)","<",round(corout_max,3)); RRR } else if( (round(corout_min,3)) == (round(corout_max,3)) ) { RRR plotstring1 <- paste("CCF(0) =", round(corout_min,3)); RRR } else { RRR plotstring1 <- "CCF(0) = 1"; RRR } RRR RRR if( ((cornoout_min != 1) && (cornoout_max != 1)) && RRR (round(cornoout_min,3) != round(cornoout_max,3)) ) { RRR plotstring2 <- paste(round(cornoout_min,3),"<","CCF(0)","<",round(cornoout_max,3)); RRR } else if( (round(cornoout_min,3)) == (round(cornoout_max,3)) ) { RRR plotstring2 <- paste("CCF(0) =", round(cornoout_min,3)); RRR } else { RRR plotstring2 <- "CCF(0) = 1"; RRR } RRR RRR #------------------------------------------------------------- RRR # Cross-correlation (relatedness weak consistency) test results. RRR # test H0: corr = corr_observed; H1: corr = 0. RRR RRR write(paste("\n* CROSS-CORRELATION FOR \"RELATEDNESS\" (OR WEAK CONSISTENCY) CHECK:"), file=""); RRR write(paste("=================================================================="), file=""); RRR RRR write(paste("\n* CCF(0) WITH OUTLIERS =",corout), file=""); RRR write(paste("* 95% CL ON ABOVE:",corout_min,"<","CCF(0)","<",corout_max), file=""); RRR write(paste("* CCF(0) WITHOUT OUTLIERS =",cornoout), file=""); RRR write(paste("* 95% CL ON ABOVE:",cornoout_min,"<","CCF(0)","<",cornoout_max,"\n"), file=""); RRR RRR write(paste("* NUMBER OF MATCHED TIMEPOINTS FROM TWO SERIES = ", length(mvtdiffcheck)), file=""); RRR write(paste("* FRACTION OF MOVEMENTS THAT ARE EXACTLY EQUAL (M1 = M2) =", round((100*(countzeros/length(mvtdiffcheck))),2),"%\n"), file=""); RRR RRR if( ((corout_min <= 0) && (corout_max > 0)) || RRR ((corout_min < 0) && (corout_max >= 0)) ) { RRR write(paste("*** CROSS-CORRELATION COEFFICIENT (WITH OUTLIERS) CONSISTENT WITH ZERO AT >5% LEVEL => SERIES MAY NOT BE RELATED"), file=""); RRR } else { RRR write(paste("*** CROSS-CORRELATION COEFFICIENT (WITH OUTLIERS) SIGNIFICANTLY DIFFERENT FROM ZERO AT <5% LEVEL => SERIES ARE INDEED RELATED"), file=""); RRR } RRR RRR if( ((cornoout_min <= 0) && (cornoout_max > 0)) || RRR ((cornoout_min < 0) && (cornoout_max >= 0)) ) { RRR write(paste("*** CROSS-CORRELATION COEFFICIENT (WITHOUT OUTLIERS) CONSISTENT WITH ZERO AT >5% LEVEL => SERIES MAY NOT BE RELATED"), file=""); RRR } else { RRR write(paste("*** CROSS-CORRELATION COEFFICIENT (WITHOUT OUTLIERS) SIGNIFICANTLY DIFFERENT FROM ZERO AT <5% LEVEL => SERIES ARE INDEED RELATED"), file=""); RRR } RRR RRR #------------------------------------------------------------- RRR # Equal-movement magnitude test results from above. RRR RRR # P-values below represent probability of H0 occuring by chance. If small RRR # (say <5%) then reject H0's: slope significantly different from 1 or intercept RRR # significantly different from 0 and hence movement magnitudes inconsistent. RRR RRR write(paste("\n* EQUAL-MOVEMENT MAGNITUDE TESTS (ON OUTLIER-CORRECTED MOVEMENTS):"), file=""); RRR write(paste("=================================================================="), file=""); RRR RRR write(paste("\n* MEAN_DIFF: < M1-M2 > =",meandiff), file=""); RRR write(paste("* STANDERROR_DIFF (sigma[M1-M2]) =",sediffs), file=""); RRR write(paste("* MEDIAN_DIFF =",meddiffs), file=""); RRR write(paste("* T-VALUE_DIFF =",tdiffs), file=""); RRR write(paste("* P-VALUE_DIFF (H0: < M1-M2 >=0) =",pvaluediffs), file=""); RRR RRR write(paste("\n* SLOPE (OUTLIER RESISTENT) =",slope), file=""); RRR write(paste("* STANDERROR_SLOPE =",seslope), file=""); RRR write(paste("* T-VALUE_SLOPE =",tslope), file=""); RRR write(paste("* P-VALUE_SLOPE (H0: slope=1) =",pvalueslope), file=""); RRR write(paste("* INTERCEPT (OUTLIER RESISTENT) =",intercept), file=""); RRR write(paste("* STANDERROR_INT =",seint), file=""); RRR write(paste("* T-VALUE_INT =",tint), file=""); RRR write(paste("* P-VALUE_INT (H0: intercept=0) =",pvalueint,"\n"), file=""); RRR RRR if(pvaluediffs < 0.05) { RRR write(paste("*** MEAN DIFFERENCE IN MOVEMENTS SIGNIFICANTLY DIFFERENT FROM ZERO AT 5% LEVEL"), file=""); RRR } else { RRR write(paste("*** MEAN DIFFERENCE IN MOVEMENTS CONSISTENT WITH ZERO AT", round(100*pvaluediffs,2),"% LEVEL"), file=""); RRR } RRR RRR if(pvalueslope < 0.05) { RRR write(paste("*** FITTED SLOPE SIGNIFICANTLY DIFFERENT FROM ONE AT 5% LEVEL"), file=""); RRR } else { RRR write(paste("*** FITTED SLOPE CONSISTENT WITH ONE AT", round(100*pvalueslope,2),"% LEVEL"), file=""); RRR } RRR RRR if(pvalueint < 0.05) { RRR write(paste("*** FITTED INTERCEPT SIGNIFICANTLY DIFFERENT FROM ZERO AT 5% LEVEL"), file=""); RRR } else { RRR write(paste("*** FITTED INTERCEPT CONSISTENT WITH ZERO AT", round(100*pvalueint,2),"% LEVEL"), file=""); RRR } RRR RRR #------------------------------------------------------------- RRR # Mahalanobis distance measures and P-values for the outliers. RRR # (movement versus movement outlier test results) RRR RRR #not interested in elliptical CL anymore where sqrt(mahala) = joint sigma boundary: mahala <- mahalanobis(cbind(outx, outy), colMeans(cbind(nooutx, noouty)), covmat); RRR #pvalue <- pchisq(mahala,2,lower.tail=FALSE); RRR RRR # following assumes asymptotic normality in zscore MAD values. RRR pvalue <- 2*pnorm(abs(outzscore), lower.tail=FALSE); RRR RRR write(paste("\n* MOVEMENT VS MOVEMENT OUTLIER TEST:"), file=""); RRR write(paste("===================================="), file=""); RRR RRR if( outlierflag ) { RRR write(paste("\n*** NUMBER OF", ">", threshold, "SIGMA OUTLIERS DETECTED IN MOVEMENT VS MOVEMENT PLOT =", numoutliers,":\n"), file=""); RRR write(paste("* ALL OUTLIERS:"), file=""); RRR write(paste("* MNTH or QTR/YEAR | MVT1(%) | MVT2(%) | approx.\#SIGMA | P-VALUE:"), file=""); RRR write(paste("* ---------------------------------------------------------------"), file=""); RRR RRR for( i in 1:length(outx) ) { RRR formatstring <- sprintf("* %s | %4.2f | %4.2f | %4.2f | %3.3e", outtimes[[i]], outx[[i]], outy[[i]], outzscore[[i]], pvalue[[i]]); RRR write(formatstring,file=""); RRR } RRR RRR write(paste("* ---------------------------------------------------------------"), file=""); RRR } else { RRR write(paste("\n*** NO", ">", threshold, "SIGMA OUTLIERS DETECTED IN MOVEMENT VS MOVEMENT PLOT"), file=""); RRR } RRR RRR #------------------------------------------------------------- RRR # Test for significant temporal pattern in movement differences with magnitude >"thresholddiffs". RRR # H0: (P)ACF[i(t), lagh] = 0 versus H1: (P)ACF[i(t), lagh].ne.0. Rejection of H0 => inconsistency. RRR # i(t) = indicator variable. [e.g. bad seasonal adjustment for a month across series.] RRR RRR thresholddiffs <- 2.0; RRR maxlag <- 12; RRR xymvtdiffs <- x - y; RRR MADvaluediff <- mad(xymvtdiffs); RRR # If MAD value happens to be zero (ie. large frac. of mvts same), then use sd. RRR if(MADvaluediff == 0) { RRR MADvaluediff <- sd(xymvtdiffs); RRR } RRR zscorediff <- ( xymvtdiffs - median(xymvtdiffs) ) / MADvaluediff; RRR RRR #initialise. RRR indicat <- 0; RRR RRR j <- 1; RRR for( i in 1:length(zscorediff) ) { RRR if( abs(zscorediff[[i]]) > thresholddiffs ) { RRR indicat[[i]] <- 1; RRR j <- j + 1; RRR } else { RRR indicat[[i]] <- 0; RRR } RRR } RRR RRR numdiffsabovethres <- j - 1; RRR RRR diffthresplotstring <- paste("Test for temporal pattern in excess movement differences\n","i(t)=1 => abs(M1-M2) >",thresholddiffs,"sigma"); RRR RRR if(numdiffsabovethres) { RRR acfdiffs <- acf(indicat, lag.max = maxlag, type="correlation", plot = FALSE); RRR pacfdiffs <- pacf(indicat, lag.max = maxlag, plot = FALSE); RRR } RRR RRR #------------------------------------------------------------- RRR # Output results for significant temporal pattern in movement differences. RRR RRR write(paste("\n* TEST FOR SIGNIFICANT TEMPORAL PATTERN IN MOVEMENT DIFFERENCES (M1 - M2):"), file=""); RRR write(paste("=========================================================================="), file=""); RRR RRR write(paste("\n* MOVEMENT DIFFERENCE THRESHOLD =", thresholddiffs, "SIGMA"), file=""); RRR write(paste("* NUMBER OF |M1 - M2| VALUES ABOVE THRESHOLD =", numdiffsabovethres), file=""); RRR write(paste("* LAGS WHERE ACF & PACF[i(t)] ARE GREATER THAN 95% CL VALUE:"), file=""); RRR RRR found <- 0; RRR if(numdiffsabovethres) { RRR lagstring <- "*"; RRR for( i in 1:maxlag ) { RRR if( (acfdiffs$acf[i+1] > (1.96/sqrt(length(indicat)))) && RRR (pacfdiffs$acf[i] > (1.96/sqrt(length(indicat)))) && RRR (i > 1) ) { RRR lagstring <- paste(lagstring, "LAG", i, ";"); RRR found <- 1; RRR } RRR } RRR } RRR RRR if(!found) { RRR write("* NONE.", file=""); RRR write("*** => NO EVIDENCE FOR TEMPORAL PATTERN IN MOVEMENT DIFFERENCES; ALL OKAY", file=""); RRR } else { RRR write(lagstring, file=""); RRR write(paste("*** => REJECT \"H0: ACF & PACF[i(t)]=0\" OF NO PATTERN IN MOVEMENT DIFFERENCES"), file=""); RRR write(paste("*** => SYSTEMATIC INCONSISTENCY EXISTS AT ABOVE (LAG) PERIODICITIES (CHECK ACF PLOTS)"), file=""); RRR } RRR RRR #------------------------------------------------------------- RRR # Unit Root and Cointegration Tests. RRR RRR write(paste("\n* UNIT ROOT & COINTEGRATION TESTS:"), file=""); RRR write(paste("=================================="), file=""); RRR RRR #---------- RRR # UR test 1: Augmented Dickey-Fuller test on series 1 and 2: Ho: non-stationary. RRR RRR adf_serx <- ur.df(serx, type="drift", lags=2); RRR adf_sery <- ur.df(sery, type="drift", lags=2); RRR RRR adf_stat_serx <- adf_serx@teststat[1]; RRR adf_stat_sery <- adf_sery@teststat[1]; RRR adf_pvalue1 <- adf_serx@cval[1]; RRR adf_pvalue5 <- adf_serx@cval[3]; RRR adf_pvalue10 <- adf_serx@cval[5]; RRR RRR title <- "Augmented Dickey-Fuller Unit Root Test:"; RRR write(paste("\n*",title), file=""); RRR write(cat("* ",(rep("-",trunc(0.5*(nchar(title) + 2)))), sep="-"), file=""); RRR write(paste("* Series 1: ADF_STATISTIC =",round(adf_stat_serx,4)), file=""); RRR write(paste("* Series 2: ADF_STATISTIC =",round(adf_stat_sery,4)), file=""); RRR write(paste("* CRIT. VALUES (H0: integrated): 1% =",round(adf_pvalue1,4),"; 5% =",round(adf_pvalue5,4),"; 10% =",round(adf_pvalue10,4)), file=""); RRR RRR #---------- RRR # UR test 2: Phillips-Perron test on series 1 and 2: Ho: non-stationary. RRR RRR pp_serx <- ur.pp(serx, type="Z-alpha", model="constant", use.lag=2); RRR pp_sery <- ur.pp(sery, type="Z-alpha", model="constant", use.lag=2); RRR RRR pp_stat_serx <- pp_serx@teststat; RRR pp_stat_sery <- pp_sery@teststat; RRR pp_pvalue1 <- pp_serx@cval[1]; RRR pp_pvalue5 <- pp_serx@cval[2]; RRR pp_pvalue10 <- pp_serx@cval[3]; RRR RRR title <- "Phillips-Perron Unit Root Test:"; RRR write(paste("\n*",title), file=""); RRR write(cat("* ",(rep("-",trunc(0.5*(nchar(title) + 2)))), sep="-"), file=""); RRR write(paste("* Series 1: Z_STATISTIC =",round(pp_stat_serx,4)), file=""); RRR write(paste("* Series 2: Z_STATISTIC =",round(pp_stat_sery,4)), file=""); RRR write(paste("* CRIT. VALUES (H0: integrated): 1% =",round(pp_pvalue1,4),"; 5% =",round(pp_pvalue5,4),"; 10% =",round(pp_pvalue10,4)), file=""); RRR RRR #---------- RRR # Unit roots conclusions: RRR RRR integ_flag_serx <- 0; RRR integ_flag_sery <- 0; RRR RRR # Series 1: RRR if( (adf_stat_serx > adf_pvalue10) || (pp_stat_serx > pp_pvalue10) ) { RRR write(paste("\n*** According to either test, series 1 = integrated I(1) process [at >10% level]"),file=""); RRR integ_flag_serx <- 1; RRR } else if( (adf_stat_serx > adf_pvalue5) || (pp_stat_serx > pp_pvalue5) ) { RRR write(paste("\n*** According to either test, series 1 = integrated I(1) process [at >5% level]"),file=""); RRR integ_flag_serx <- 1; RRR } else if( (adf_stat_serx < adf_pvalue1) || (pp_stat_serx < pp_pvalue1) ) { RRR write(paste("\n*** According to either test, series 1 ~ I(0) process [at <1% level]"),file=""); RRR } else if( (adf_stat_serx < adf_pvalue5) || (pp_stat_serx < pp_pvalue5) ) { RRR write(paste("\n*** According to either test, series 1 ~ I(0) process [at <5% level]"),file=""); RRR } RRR RRR # Series 2: RRR if( (adf_stat_sery > adf_pvalue10) || (pp_stat_sery > pp_pvalue10) ) { RRR write(paste("*** According to either test, series 2 = integrated I(1) process [at >10% level]"),file=""); RRR integ_flag_sery <- 1; RRR } else if( (adf_stat_sery > adf_pvalue5) || (pp_stat_sery > pp_pvalue5) ) { RRR write(paste("*** According to either test, series 2 = integrated I(1) process [at >5% level]"),file=""); RRR integ_flag_sery <- 1; RRR } else if( (adf_stat_sery < adf_pvalue1) || (pp_stat_sery < pp_pvalue1) ) { RRR write(paste("*** According to either test, series 2 ~ I(0) process [at <1% level]"),file=""); RRR } else if( (adf_stat_sery < adf_pvalue5) || (pp_stat_sery < pp_pvalue5) ) { RRR write(paste("*** According to either test, series 2 ~ I(0) process [at <5% level]"),file=""); RRR } RRR RRR if( integ_flag_serx != integ_flag_sery ) { RRR write(paste("*** WARNING: series pair not integrated with same order:","[",integ_flag_serx,integ_flag_sery,"]"), file=""); RRR } RRR RRR #-------------------------------------------------------------------- RRR # Regress series 2 (Yt) on series 1 (Xt) if both have same integration order RRR # (0,0 included): Yt = B.Xt + A. Use "robust" linear regression using "rlm" RRR # function (outlier resistant). Try "MM" method first, if fails (from size of RRR # returned object containing just error message string), re-execute with "M" RRR # method. Otherwise let fail. For multiplicative decomposition models, take RRR # logs of Xt and Yt if both >0 to make variance stationary. Note that tests RRR # below only test for stationarity in mean of residuals not variance, so RRR # doesn't matter if don't make variance stationary! RRR RRR lserx <- serx; RRR xlabplot <- "Series 1"; RRR logflag1 <- 0; RRR if( length(grep("M", model1)) ) { RRR if( (min(serx)>0) || (max(serx)<0) ) { RRR lserx <- log(abs(serx)); RRR xlabplot <- "Log[Series 1]"; RRR logflag1 <- 1; RRR } RRR } RRR RRR lsery <- sery; RRR ylabplot <- "Series 2"; RRR logflag2 <- 0; RRR if( length(grep("M", model2)) ) { RRR if( (min(sery)>0) || (max(sery)<0) ) { RRR lsery <- log(abs(sery)); RRR ylabplot <- "Log[Series 2]"; RRR logflag2 <- 1; RRR } RRR } RRR RRR title_levelsplot <- "Series2 vs. Series1"; RRR if( logflag1 && logflag2 ) { RRR title_levelsplot <- "Log[Series2] vs. Log[Series1]"; RRR } else if( (logflag1==0) && (logflag2==1) ) { RRR title_levelsplot <- "Log[Series2] vs. Series1"; RRR } else if( (logflag1==1) && (logflag2==0) ) { RRR title_levelsplot <- "Series2 vs. Log[Series1]"; RRR } RRR RRR # Ensure first that series have same length before regressing! RRR samelength <- 1; RRR if( length(serx) != length(sery) ) { RRR write(paste("\n*** Series have different lengths; cannot proceed with regression and cointegration tests."), file=""); RRR samelength <- 0; RRR } RRR RRR # Create series of ratio Yt/Xt = series 2/series 1 for plotting. RRR if( samelength ) { RRR seriesratio <- sery / serx; RRR } RRR RRR if( (integ_flag_serx==integ_flag_sery) && samelength ) { RRR write(paste("*** Series have same integration order:","[",integ_flag_serx,integ_flag_sery,"]"), file=""); RRR write(paste("\n* Estimates of regression: \"S2 = b.S1 + a\":"), file=""); RRR RRR lin_co <- try(suppressWarnings(rlm(lsery ~ lserx + 1, method="MM")), silent=TRUE); RRR if( length(lin_co) == 1 ) { RRR lin_co <- suppressWarnings(rlm(lsery ~ lserx + 1, method="M")); RRR } RRR resid_co <- residuals(lin_co); RRR par_co <- coefficients(summary(lin_co)); RRR RRR # Parameter estimates may fail in cases where slope = exactly 1 or RRR # se's exactly 0. Therefore must guard against: RRR RRR intercept_co <- try(par_co[1,1], silent=TRUE); RRR if( length(grep("Error in", as.character(intercept_co))) ) { RRR intercept_co <- 0; RRR } RRR slope_co <- try(par_co[2,1], silent=TRUE); RRR if( length(grep("Error in", as.character(slope_co))) ) { RRR slope_co <- 1; RRR } RRR seint_co <- try(par_co[1,2], silent=TRUE); RRR if( length(grep("Error in", as.character(seint_co))) ) { RRR seint_co <- 0; RRR } RRR seslope_co <- try(par_co[2,2], silent=TRUE); RRR if( length(grep("Error in", as.character(seslope_co))) ) { RRR seslope_co <- 0; RRR } RRR RRR cor_co <- cor(lserx, lsery, use="all.obs", method="pearson"); RRR RRR write(paste("* SLOPE (OUTLIER RESISTENT) =",slope_co), file=""); RRR write(paste("* STANDERROR_SLOPE =",seslope_co), file=""); RRR write(paste("* INTERCEPT (OUTLIER RESISTENT) =",intercept_co), file=""); RRR write(paste("* STANDERROR_INT =",seint_co), file=""); RRR write(paste("* CROSS-CORRELATION COEFF. (WITH OUTLIERS) =",cor_co), file=""); RRR } RRR RRR #-------------------------------------------------------------------- RRR # Apply two cointegration tests if series had same length and both I(1). RRR RRR if( samelength ) { RRR if( integ_flag_serx && integ_flag_sery ) { RRR coint_flag_test1 <- 0; RRR coint_flag_test2 <- 0; RRR RRR #-------------------------------------------------------------------- RRR # Cointegration test 1: Augmented Engle-Granger (AEG) test on residuals RRR # of cointegrating regression: Ut = Yt - B.Xt - A. Use critical tau RRR # statistics for cointegration tests from MacKinnon, J. G., 1996. RRR RRR N <- length(serx); RRR RRR critval_0.01 <- -4.3266 + (-15.531/N) + (-34.03*(1/N)**2); RRR critval_0.05 <- -3.7809 + (-9.421/N) + (-15.06*(1/N)**2); RRR critval_0.10 <- -3.4959 + (-7.203/N) + (-4.01*(1/N)**2); RRR RRR title <- "Cointegration Test 1: Augmented Engle-Granger (AEG) test on residuals: \"u=S2-b.S1-a\":"; RRR write(paste("\n*",title), file=""); RRR write(cat("* ",(rep("-",trunc(0.5*(nchar(title) + 2)))), sep="-"), file=""); RRR RRR adf_resid_coint <- ur.df(resid_co, type="drift", lags=2); RRR adf_stat <- adf_resid_coint@teststat[1]; RRR RRR write(paste("* Residuals series [u=S2-b.S1-a] ADF_STATISTIC =",round(adf_stat,4)), file=""); RRR write(paste("* CRIT. VALUES (H0: u=\'non-stationary\'): 1% =",round(critval_0.01,4),"; 5% =",round(critval_0.05,4),"; 10% =",round(critval_0.10,4)), file=""); RRR RRR if( adf_stat < critval_0.01 ) { RRR write(paste("* Series pair is cointegrating [H0: no-cointegration rejected at <1% level]"),file=""); RRR coint_flag_test1 <- 1; RRR } else if( adf_stat < critval_0.05 ) { RRR write(paste("* Series pair is cointegrating [H0: no-cointegration rejected at <5% level]"),file=""); RRR coint_flag_test1 <- 1; RRR } else if( adf_stat > critval_0.10 ) { RRR write(paste("* Series pair is not cointegrating [H0: no-cointegration accepted at >10% level]"),file=""); RRR } else if( adf_stat > critval_0.05 ) { RRR write(paste("* Series pair is not cointegrating [H0: no-cointegration accepted at >5% level]"),file=""); RRR } RRR RRR #-------------------------------------------------------------------- RRR # Cointegration test 2: Johansen's VAR-ECM procedure for RRR # significantly non-zero eigenvalues in long run matrix. RRR RRR title <- "Cointegration Test 2: Johansen's VAR procedure for significantly non-zero eigenvalues:"; RRR write(paste("\n*",title), file=""); RRR write(cat("* ",(rep("-",trunc(0.5*(nchar(title) + 2)))), sep="-"), file=""); RRR RRR tsobject <- ts( cbind(lserx, lsery) ); RRR RRR # Trace test: RRR vecm_trace_coint <- ca.jo(tsobject, type="trace", constant=FALSE, K=2, spec="longrun", season=NULL, dumvar=NULL, ctable="A2"); RRR eigenvalues <- vecm_trace_coint@lambda; RRR tr_statistics <- vecm_trace_coint@teststat; RRR tr_cvals <- vecm_trace_coint@cval; RRR RRR write(paste("* With max. lag = 2 and linear trend included."), file=""); RRR write(paste("* Eigenvalue (e1) =", round(eigenvalues[[2]],8),";","Eigenvalue (e2) =",round(eigenvalues[[1]],8)), file=""); RRR RRR write(paste("\n* Trace statistic[e1 + e2] (H0: r=0 cointegrating vectors vs H1: r<=2) =", round(tr_statistics[[2]],4)), file=""); RRR write(paste("* CRIT. VALUES [r=0]: 1% =",round(tr_cvals[[6]],4),"; 5% =",round(tr_cvals[[4]],4),"; 10% =",round(tr_cvals[[2]],4)), file=""); RRR write(paste("* Trace statistic[e1] (H0: r<=1 cointegrating vector vs H1: r<=2) =", round(tr_statistics[[1]],4)), file=""); RRR write(paste("* CRIT. VALUES [r<=1]: 1% =",round(tr_cvals[[5]],4),"; 5% =",round(tr_cvals[[3]],4),"; 10% =",round(tr_cvals[[1]],4)), file=""); RRR RRR # Max. eigenvalue test: RRR vecm_eigen_coint <- ca.jo(tsobject, type="eigen", constant=FALSE, K=2, spec="longrun", season=NULL, dumvar=NULL, ctable="A2"); RRR max_statistics <- vecm_eigen_coint@teststat; RRR max_cvals <- vecm_eigen_coint@cval; RRR RRR write(paste("\n* Max. eigenvalue statistic[e2] (H0: r=0 cointegrating vectors vs H1: r=1) =", round(max_statistics[[2]],4)), file=""); RRR write(paste("* CRIT. VALUES [r=0]: 1% =",round(max_cvals[[6]],4),"; 5% =",round(max_cvals[[4]],4),"; 10% =",round(max_cvals[[2]],4)), file=""); RRR write(paste("* Max. eigenvalue statistic[e1] (H0: r=1 cointegrating vector vs H1: r=2) =", round(max_statistics[[1]],4)), file=""); RRR write(paste("* CRIT. VALUES [r=1]: 1% =",round(max_cvals[[5]],4),"; 5% =",round(max_cvals[[3]],4),"; 10% =",round(max_cvals[[1]],4)), file=""); RRR RRR if( (abs(tr_statistics[[2]]) < abs(tr_cvals[[1]])) && (abs(max_statistics[[2]]) < abs(max_cvals[[1]])) ) { RRR write(paste("\n* Series pair is not cointegrating [H0: 0 cointegrating vectors accepted at >10% level]"),file=""); RRR } else if( (abs(tr_statistics[[2]]) < abs(tr_cvals[[3]])) && (abs(max_statistics[[2]]) < abs(max_cvals[[3]])) ) { RRR write(paste("\n* Series pair is not cointegrating [H0: 0 cointegrating vectors accepted at >5% level]"),file=""); RRR } else if( (abs(tr_statistics[[1]]) < abs(tr_cvals[[1]])) && (abs(max_statistics[[1]]) < abs(max_cvals[[1]])) ) { RRR write(paste("\n* Series pair is cointegrating [H0: 1 cointegrating vector accepted at >10% level]"),file=""); RRR coint_flag_test2 <- 1; RRR } else if( (abs(tr_statistics[[1]]) < abs(tr_cvals[[3]])) && (abs(max_statistics[[1]]) < abs(max_cvals[[3]])) ) { RRR write(paste("\n* Series pair is cointegrating [H0: 1 cointegrating vector accepted at >5% level]"),file=""); RRR coint_flag_test2 <- 1; RRR } else if( (abs(tr_statistics[[1]]) > abs(tr_cvals[[5]])) && (abs(max_statistics[[1]]) > abs(max_cvals[[5]])) ) { RRR write(paste("\n* Series pair is not cointegrating [H0: 1 cointegrating vector rejected at <1% level]"),file=""); RRR } else if( (abs(tr_statistics[[1]]) > abs(tr_cvals[[3]])) && (abs(max_statistics[[1]]) > abs(max_cvals[[3]])) ) { RRR write(paste("\n* Series pair is not cointegrating [H0: 1 cointegrating vector rejected at <5% level]"),file=""); RRR } RRR RRR if( coint_flag_test1 || coint_flag_test2 ) { RRR write(paste("\n*** According to either AEG or Johansen tests, series pair is cointegrating\n"),file=""); RRR } else { RRR write(paste("\n*** According to both AEG and Johansen tests, series pair is not cointegrating\n"),file=""); RRR } RRR RRR } else { RRR write(paste("*** Series pair not cointegrating since components not integrated with same non-zero order"),file=""); RRR } RRR } RRR RRR #------------------------------------------------------------- RRR # Percentage movement and absolute movement versus time plots: RRR RRR shortspanlen <- 12; RRR if( length(x) <= shortspanlen ) { RRR shortspanlen <- length(x); RRR } RRR RRR fullspaninterval <- 9; RRR if( length(times) < fullspaninterval ) { RRR fullspaninterval <- length(times); RRR } RRR RRR #Unix: RRR #bitmap(file="mvt_vs_time.png", width=5, height=5, res=400) RRR png(file="mvt_vs_time.png", width = 750, height = 800, pointsize = 12, bg="transparent", res = 400); RRR RRR par(mfcol = c(2,2), mfg = c(1,1,2,2), mai = c(0.0,0.7,1.0,0.0)); RRR plot(x, main="Full span", xlab="", ylab="% Movement", col.main="red", cex.main=1.7, pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, ylim=range(x,y)); RRR mtext("Series 1 (Blue) : Series 2 (Green)", side=3, line=1, padj=0, cex=1.3); RRR lines(x, col="blue",lty=1); RRR lines(y, col="green",lty=1); RRR points(length(x), x[length(x)], col="red", pch=16, cex=1.5); RRR points(length(y), y[length(y)], col="red", pch=16, cex=1.5); RRR abline(0, 0, col="black"); RRR axis(2, cex.axis=1.3); RRR RRR par(mfcol = c(2,2), mfg = c(1,2,2,2), mai = c(0.0,0.3,1.0,0.4)); RRR plot(x[(length(x)-(shortspanlen-1)):(length(x))], main="Short (recent) span", xlab="", ylab="", col.main="red", cex.main=1.7, pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, ylim=range(x[(length(x)-(shortspanlen-1)):(length(x))],y[(length(y)-(shortspanlen-1)):(length(y))])); RRR lines(x[(length(x)-(shortspanlen-1)):(length(x))], col="blue",lty=1); RRR lines(y[(length(y)-(shortspanlen-1)):(length(y))], col="green",lty=1); RRR points(shortspanlen, x[length(x)], col="red", pch=16, cex=1.5); RRR points(shortspanlen, y[length(y)], col="red", pch=16, cex=1.5); RRR abline(0, 0, col="black"); RRR axis(2, cex.axis=1.3); RRR RRR par(mfcol = c(2,2), mfg = c(2,1,2,2), mai = c(1.0,0.7,0.0,0.0)); RRR plot(absx, xlab="(month or quarter) / year", ylab="First Difference", pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, ylim=range(absx,absy)); RRR lines(absx, col="blue",lty=1); RRR lines(absy, col="green",lty=1); RRR points(length(absx), absx[length(absx)], col="red", pch=16, cex=1.5); RRR points(length(absy), absy[length(absy)], col="red", pch=16, cex=1.5); RRR abline(0, 0, col="black"); RRR axis(1, seq(1,length(absx),round((length(absx)/fullspaninterval),0)), labels=times[seq(1,length(absx),round((length(absx)/fullspaninterval),0))], las=1, cex.axis=1.3); RRR axis(2, cex.axis=1.3); RRR RRR par(mfcol = c(2,2), mfg = c(2,2,2,2), mai = c(1.0,0.3,0.0,0.4)); RRR plot(absx[(length(absx)-(shortspanlen-1)):(length(absx))], xlab="(month or quarter) / year", ylab="", pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, ylim=range(absx[(length(absx)-(shortspanlen-1)):(length(absx))],absy[(length(absy)-(shortspanlen-1)):(length(absy))])); RRR lines(absx[(length(absx)-(shortspanlen-1)):(length(absx))], col="blue",lty=1); RRR lines(absy[(length(absy)-(shortspanlen-1)):(length(absy))], col="green",lty=1); RRR points(shortspanlen, absx[length(absx)], col="red", pch=16, cex=1.5); RRR points(shortspanlen, absy[length(absy)], col="red", pch=16, cex=1.5); RRR abline(0, 0, col="black"); RRR axis(1, seq(1,12,1), labels=times[(length(absx)-(shortspanlen-1)):(length(absx))], las=1, cex.axis=1.3); RRR axis(2, cex.axis=1.3); RRR RRR #------------------------------------------------------------- RRR # movement versus movement plot: RRR RRR #Unix: RRR #bitmap(file="mvt_vs_mvt.png", width=5, height=5, res=400) RRR png(file="mvt_vs_mvt.png", width = 750, height = 800, pointsize = 12, bg="transparent", res = 400); RRR par(mfcol = c(2,1), mai = c(0.7,1.1,0.7,1.1)); RRR RRR plot(x, y, main="Movements for series pair at equal times", xlab="% movement in series 1", ylab="% movement in series 2", pch=" ", cex.lab=1.6, cex.axis=1.3, cex.main=1.4) RRR points(x, y, col="red", pch=24, bg="red") RRR if( outlierflag ) { RRR points(outx, outy, pch=4, cex=2.5) RRR } RRR abline(0, 1, col="black") RRR abline(intercept, slope, col="blue", lty=2) RRR abline((intercept + (threshold*MADvalue)), slope, col="green", lwd=2) RRR abline((intercept - (threshold*MADvalue)), slope, col="green", lwd=2) RRR #data.ellipse(nooutx, noouty, levels=0.95, center.pch=1, center.cex=0, plot.points=FALSE, robust=TRUE, col="green", lwd=2) RRR plot(c(0,10),c(0,10),pch=" ",type="n", bty="n", axes=FALSE, lab=FALSE, xlab="", ylab=""); RRR text(-0.5, 10, "- red triangles: all time-matched movements", pos=4, cex=1.4, col="blue"); RRR text(-0.5, 8, outlabelplotstring, pos=4, cex=1.4, col="blue"); RRR text(-0.5, 6, intervalplotstring, pos=4, cex=1.4, col="blue"); RRR text(-0.5, 4, "- black line: line of equal movements", pos=4, cex=1.4, col="blue"); RRR text(-0.5, 2, "- blue dashed: robust linear fit (outliers excluded)", pos=4, cex=1.4, col="blue"); RRR text(2.5, 1, slopeplotstring, pos=4, cex=1.4, col="blue"); RRR text(2.5, 0, intplotstring, pos=4, cex=1.4, col="blue"); RRR RRR #------------------------------------------------------------- RRR # plots for testing temporal structure/pattern in movement differences. RRR RRR #Unix: RRR #bitmap(file="pattern_acf.png", width=5, height=5, res=400) RRR png(file="pattern_acf.png", width = 750, height = 800, pointsize = 12, bg="transparent", res = 400); RRR par(mfcol = c(3,1), mai = c(0.7,1.1,0.7,1.1)); RRR RRR plot(indicat, main=diffthresplotstring, xlab="(month or quarter) / year", ylab="indicator: i(t)", pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, cex.main=1.3, ylim=c(-0.1, 1.1)); RRR lines(indicat, type="h", col="blue",lty=1); RRR points(indicat, col="red", pch=16, cex=1.2); RRR abline(0, 0, col="black"); RRR axis(1, seq(1,length(indicat),round((length(indicat)/fullspaninterval),0)), labels=times[seq(1,length(indicat),round((length(indicat)/fullspaninterval),0))], las=1, cex.axis=1.3); RRR axis(2, cex.axis=1.3); RRR RRR if(numdiffsabovethres) { RRR acf(indicat, lag.max = maxlag, plot = TRUE, main="Autocorrelation of indicator sequence", xlab="Lag h", ylab="ACF(h)", ci.type="white", type="correlation", cex.lab=1.6, cex.axis=1.3, cex.main=1.4); RRR pacf(indicat, lag.max = maxlag, plot = TRUE, main="Partial-autocorrelation of indicator sequence", xlab="Lag h", ylab="PACF(h)", ci.type="white", cex.lab=1.6, cex.axis=1.3, cex.main=1.4); RRR } RRR RRR #------------------------------------------------------------- RRR # cross-correlation plots: RRR RRR #Unix: RRR #bitmap(file="ccf.png", width=5, height=5, res=400) RRR png(file="ccf.png", width = 750, height = 800, pointsize = 12, bg="transparent", res = 400); RRR par(mfcol = c(2,1), mai = c(0.7,1.1,0.7,1.1)) RRR ccf(x, y, lag.max=3, plot = TRUE, ylim=c(-0.3,1), main="Cross-correlations in Movements (outliers included)", xlab="Lag h", ylab="CCF(h)", ci.type="white", type="correlation", cex.lab=1.6, cex.axis=1.3); RRR text(0.0,0.95,plotstring1, cex=1.4, col="blue"); RRR RRR ccf(nooutx, noouty, lag.max=3, plot = TRUE, ylim=c(-0.3,1), main="Cross-correlations in Movements (outliers removed)", xlab="Lag h", ylab="CCF(h)", ci.type="white", type="correlation", cex.lab=1.6, cex.axis=1.3); RRR text(0.0,0.95,plotstring2, cex=1.4, col="blue"); RRR RRR #------------------------------------------------------------- RRR # Plots of series levels and residuals from regression: RRR RRR #Unix: RRR #bitmap(file="coint.png", width=7, height=7, res=400) RRR png(file="coint.png", width = 750, height = 1600, pointsize = 12, bg="transparent", res = 400); RRR par(mfcol = c(5,1), mai = c(0.7,1.1,0.7,1.1)); RRR RRR if( samelength ) { RRR plot(serx, main="Series 1 (Blue) : Series 2 (Green)", xlab="(month or quarter) / year", ylab="Level", pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, cex.main=1.4, ylim=range(serx,sery)); RRR lines(serx, col="blue",lty=1); RRR lines(sery, col="green",lty=1); RRR axis(1, seq(1,length(serx),round((length(serx)/fullspaninterval),0)), labels=times[seq(1,length(serx),round((length(serx)/fullspaninterval),0))], las=1, cex.axis=1.3); RRR axis(2, cex.axis=1.3); RRR plot(seriesratio, main="Ratio: Series 2 / Series 1", xlab="(month or quarter) / year", ylab="Ratio", pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, cex.main=1.4); RRR lines(seriesratio, col="red",lty=1); RRR axis(1, seq(1,length(seriesratio),round((length(seriesratio)/fullspaninterval),0)), labels=times[seq(1,length(seriesratio),round((length(seriesratio)/fullspaninterval),0))], las=1, cex.axis=1.3); RRR axis(2, cex.axis=1.3); RRR plot(lserx, lsery, main=title_levelsplot, xlab=xlabplot, ylab=ylabplot, pch=" ", cex.lab=1.6, cex.axis=1.3, cex.main=1.4); RRR points(lserx, lsery, col="red", pch=24, bg="red"); RRR RRR # Plot slope/intecept line and residuals only if series integrated with same order: RRR if( integ_flag_serx==integ_flag_sery ) { RRR abline(intercept_co, slope_co, col="blue", lty=1); RRR plot(resid_co, main=paste("Residuals from Regression:\n",title_levelsplot), xlab="(month or quarter) / year", ylab="Residual", pch=" ", frame = TRUE, axes=FALSE, cex.lab=1.6, cex.main=1.4); RRR lines(resid_co, col="blue",lty=1); RRR abline(0, 0, col="black"); RRR axis(1, seq(1,length(resid_co),round((length(resid_co)/fullspaninterval),0)), labels=times[seq(1,length(resid_co),round((length(resid_co)/fullspaninterval),0))], las=1, cex.axis=1.3); RRR axis(2, cex.axis=1.3); RRR acf(resid_co, lag.max=maxlag, plot=TRUE, main="ACF of Residuals from Regression", xlab="Lag h", ylab="ACF(h)", ci.type="white", type="correlation", cex.lab=1.6, cex.axis=1.3, cex.main=1.4); RRR } else { RRR plot(c(0,10),c(0,10),pch=" ",type="n", bty="n", axes=FALSE, lab=FALSE, xlab="", ylab=""); RRR text(-0.5, 10, "Residuals from regression: 'S2 ~ S1' not", pos=4, cex=1.4, col="blue"); RRR text(-0.5, 8, "computed since series not integrated with", pos=4, cex=1.4, col="blue"); RRR text(-0.5, 6, "same order d (d >= 0) [see diagnostics].", pos=4, cex=1.4, col="blue"); RRR } RRR } else { RRR plot(c(0,10),c(0,10),pch=" ",type="n", bty="n", axes=FALSE, lab=FALSE, xlab="", ylab=""); RRR text(-0.5, 10, "Series 1 and 2 do not have same length!", pos=4, cex=1.4, col="blue"); RRR text(-0.5, 8, "Cannot generate plots.", pos=4, cex=1.4, col="blue"); RRR } RRR RRR q() RRR RRR ####################### END R-CODE ######################### Purpose: -------- Generate a consistency table for all series pairs specified in an input map table. Series information is read from at least one "TSA-downloaded" group summary file and up to four can be specified on input. The output is written to a file in HTML format in the specified output directory. Repeated runs of this script will result in the output HTML being over-written. Usage: ------ consistency.pl -i [Required; path allowed] -o [Required; path allowed] -a [Optional; path allowed] -b [Optional; path allowed] -c [Optional; path allowed] -d [Optional; path allowed] -h [Required; written to ] -s [Optional; 0='no'; 1='yes'; default=0] -p [Optional; 0='no'; 1='yes'; default=0] -t [Optional; 0='no'; 1='yes'; default=0] Example 1: ---------- This reads in a map table and two "TSA-downloaded" group summary files located in directories different from the directory in which you are executing "consistency.pl". Results are written to "results.html" in the directory specified by the "-o" flag: consistency.pl -i "S:\\data\\BOP\\Frank'sConsistency\\bopmap_short.txt" -o "S:\\data\\BOP\\Frank'sConsistency" -a "S:\\data\\BOP\\Frank'sConsistency\\SEASABS_BOPM_O_sa.dat" -b "S:\\data\\BOP\\Frank'sConsistency\\SEASABS_QSERVCR_O_sa.dat" -h results.html -s 1 -p 1 -t 1 Example 2: ---------- This is the same as above except that now the script is executed in the same (local) directory "." where all input files reside. Results are written to "results.html" in the same directory. consistency.pl -i bopmap_short.txt -o . -a SEASABS_BOPM_O_sa.dat -b SEASABS_QSERVCR_O_sa.dat -h results.html -s 1 -p 1 -t 1 * Omit "-s 1" if you do not want an automatically spawned browser. * Omit "-p 1" if you do not want priors included in the consistency table. * Omit "-t 1" if you do not want diagnostic tests reported.