/* SAS program for utilizing Proc Expand in treating missing observations and dealing with data of mixed frequencies. Here we are using part of the Lead Production Data and putting missing observations in it to illustrate the treatment of missing obervations. Also shown is the creation of a moving average from an original time series. */ /* Here is the original data. */ data leadprd; input date:monyy5. leadprod @@; format date monyy5.; title 'Lead Production Data'; title2 '(in tons)'; datalines; jan86 47400 feb86 41600 mar86 49400 apr86 40200 may86 40200 jun86 26500 jul86 16900 aug86 31300 sep86 27500 oct86 28600 nov86 28500 dec86 30100 jan87 38800 feb87 33600 mar87 36000 apr87 33200 may87 34200 jun87 29500 jul87 28800 aug87 30900 sep87 33400 oct87 37600 nov87 39500 dec87 36500 jan88 34300 feb88 35400 mar88 39100 apr88 34300 may88 36200 jun88 34800 jul88 31800 aug88 25300 sep88 32100 oct88 49600 nov88 40000 dec88 39300 jan89 41000 feb89 36000 mar89 39000 apr89 34000 may89 36200 jun89 35500 jul89 35700 aug89 38000 sep89 39300 oct89 34700 nov89 35300 dec89 32300 jan90 38500 feb90 37900 mar90 36900 apr90 38600 may90 36400 jun90 33300 jul90 34000 aug90 38000 sep90 37400 oct90 42300 nov90 36900 dec90 34800 jan91 33900 feb91 34000 mar91 37200 apr91 33300 may91 29800 jun91 24700 jul91 30800 aug91 31100 sep91 32400 oct91 32900 nov91 29100 dec91 31800 jan92 32100 feb92 30500 mar92 36800 apr92 30300 may92 29500 jun92 24700 jul92 27600 aug92 23800 sep92 21400 ; /* Here is the contrived data with a few missing observations. Missing data at Jan87, jun88, nov89, and apr91. */ data leadmissing; input date:monyy5. leadprod @@; format date monyy5.; title 'Lead Production Data with missing observations'; title2 '(in tons)'; datalines; jan86 47400 feb86 41600 mar86 49400 apr86 40200 may86 40200 jun86 26500 jul86 16900 aug86 31300 sep86 27500 oct86 28600 nov86 28500 dec86 30100 jan87 . feb87 33600 mar87 36000 apr87 33200 may87 34200 jun87 29500 jul87 28800 aug87 30900 sep87 33400 oct87 37600 nov87 39500 dec87 36500 jan88 34300 feb88 35400 mar88 39100 apr88 34300 may88 36200 jun88 . jul88 31800 aug88 25300 sep88 32100 oct88 49600 nov88 40000 dec88 39300 jan89 41000 feb89 36000 mar89 39000 apr89 34000 may89 36200 jun89 35500 jul89 35700 aug89 38000 sep89 39300 oct89 34700 nov89 . dec89 32300 jan90 38500 feb90 37900 mar90 36900 apr90 38600 may90 36400 jun90 33300 jul90 34000 aug90 38000 sep90 37400 oct90 42300 nov90 36900 dec90 34800 jan91 33900 feb91 34000 mar91 37200 apr91 . may91 29800 jun91 24700 jul91 30800 aug91 31100 sep91 32400 oct91 32900 nov91 29100 dec91 31800 jan92 32100 feb92 30500 mar92 36800 apr92 30300 may92 29500 jun92 24700 jul92 27600 aug92 23800 sep92 21400 ; /* First going to fill in the missing observations using the cubic spline interpolation. */ proc expand data=leadmissing out=leadtreated1 from=month; convert leadprod=leadt1/observed=total method=spline; id date; run; data leadtreated1; set leadtreated1; leadt1=round(leadt1); run; title 'Treat missing lead data using linear spline interpolation'; proc print data=leadtreated1; run; /* Now going to treat mining observations using cubic spline interpolation. */ proc expand data=leadmissing out=leadtreated2 from=month; convert leadprod=leadt2/observed=total method=join; id date; run; data leadtreated2; set leadtreated2; leadt2=round(leadt2); run; title 'Treat missing lead data using cubic spline interpolation'; proc print data=leadtreated2; run; /* Now let's take the linear spline interpolated data (leadtreated2) and convert its frequency from monthly to quarterly. */ proc expand data=leadtreated2 out=leadquarterly from=month to=qtr; convert leadt2=leadqtr / observed=(total,total); id date; run; data leadquarterly; set leadquarterly; leadqtr=round(leadqtr); run; title 'Converting Monthly lead production to quarterly production'; proc print data=leadquarterly; run; /* Quarterly data on profits in producing lead */ data profit; input date:YYQ4. profit @@; format date YYQ4.; title 'Profit in millions of dollars'; datalines; 86Q1 460 86Q2 375 86Q3 260 86Q4 315 87Q1 360 87Q2 340 87Q3 335 87Q4 380 88Q1 370 88Q2 364 88Q3 310 88Q4 440 89Q1 390 89Q2 355 98Q3 387 89Q4 325 90Q1 364 90Q2 390 90Q3 361 90Q4 374 91Q1 371 91Q2 290 91Q3 295 91Q4 303 92Q1 305 92Q2 280 92Q3 250 ; title 'Quarterly profits of the Lead Production Company'; proc print data=profit; run; /* Here is where we synchronize the data from the two sources. We use the merge statement. */ data combine; merge leadquarterly profit; run; title 'Combined Quarterly Data'; proc print data = combine; run; title 'Regression on combined quarterly data'; proc reg data = combine; model profit = leadqtr; run; /* Now let us create a 3-term moving average of the original lead data and plot both simultaneously. */ proc expand data=leadprd out=lead3ma from=month method=none; convert leadprod=lead3ma / transformout=(movave 3); id date; run; title '3-term moving average of lead production'; proc print data=lead3ma; run; goptions cback=white colors=(black) border reset=(axis symbol); axis1 offset=(1 cm) label=('Year') minor=none order=('01jan86'd to '01jan93'd by year); axis2 label=(angle=90 'Lead Production') order=(15000 to 50000 by 5000); symbol1 i=join color=red; symbol2 i=join color=green; legend1 position=(bottom center outside); title 'Plot of lead production and its 3-term (trailing) moving average'; proc gplot data=lead3ma; format date year4.; plot leadprod*date lead3ma*date / overlay haxis=axis1 vaxis=axis2 vminor=1 legend=legend1; run;