options ls=120 nocenter nofmterr;
libname in "/var/tmp/scratch";
libname here '.';
data zero;
set in.allpy (where=(year>=1995 and year<=2005 and marginal=0 and trainee=0 and parttime=0));
if age>=22 and age<=38;
if edgroup in (1,2,4);
if gap=0;
if edgroup=0 then schooling=10.5;
else if edgroup=1 then schooling=11;
else if edgroup=2 then schooling=13;
else if edgroup=3 then schooling=15;
else if edgroup=4 then schooling=18;
logwage=log(dailywage);
exp=age-schooling-6;
jobnumber=njob;
keep id year age female logwage schooling censor firmid jobnumber tenure;
run;
proc means data=work.zero;
class year;
run;
proc freq data=work.zero;
tables year*censor schooling / missing;
run;
proc sql;
create table work.pall as
select id,
min(age) as minage,
max(age) as maxage,
min(schooling) as minschooling,
max(schooling) as maxschooling,
mean(schooling) as meanschooling,
mean(logwage) as meanlogwage,
count(logwage) as nwages,
mean(censor) as meancensor
from work.zero
group by id;
quit;
proc means data=work.pall;
run;
proc freq data=work.pall;
tables nwages minschooling*maxschooling / missing;
run;
proc sql;
create table work.one as
select a.*,
b.nwages,
b.minage,
b.minschooling,
b.maxschooling,
b.meanschooling,
b.meancensor
from work.zero as a
left join work.pall as b on (a.id=b.id);
quit;
proc datasets lib=work;
delete zero pall;
quit;
proc sort data=work.one;
by id year;
run;
data work.two;
set work.one;
if nwages=11;
if minschooling=maxschooling;
drop minschooling maxschooling meanschooling;
run;
proc datasets lib=work;
delete one;
quit;
proc print data=work.two (obs=200);
var id year jobnumber tenure schooling female censor logwage;
proc means data=work.two;
title2 'subset with 11 wages (every year 1995-2005)';
class year;
run;
data work.two;
set work.two;
w11=logwage;
w10=lag(logwage);
w9=lag2(logwage);
w8=lag3(logwage);
w7=lag4(logwage);
w6=lag5(logwage);
w5=lag6(logwage);
w4=lag7(logwage);
w3=lag8(logwage);
w2=lag9(logwage);
w1=lag10(logwage);
j11=jobnumber;
j10=lag(jobnumber);
j9=lag2(jobnumber);
j8=lag3(jobnumber);
j7=lag4(jobnumber);
j6=lag5(jobnumber);
j5=lag6(jobnumber);
j4=lag7(jobnumber);
j3=lag8(jobnumber);
j2=lag9(jobnumber);
j1=lag10(jobnumber);
cen11=censor;
cen10=lag(censor);
cen9=lag2(censor);
cen8=lag3(censor);
cen7=lag4(censor);
cen6=lag5(censor);
cen5=lag6(censor);
cen4=lag7(censor);
cen3=lag8(censor);
cen2=lag9(censor);
cen1=lag10(censor);
starttenure=lag10(tenure);
if year=2005;
startage=minage;
*if meancensor<.5;
run;
proc means data=work.two;
title2 'wide data - one obs per person';
run;
proc freq data=work.two;
tables j11 / missing;
run;
proc corr data=work.two;
var startage schooling w1-w11;
run;
proc corr data=work.two;
where (female=0);
title2 'men only';
var startage schooling w1-w11;
run;
proc corr data=work.two;
where (female=1);
title2 'women only';
var startage schooling w1-w11;
run;
data here.wagedata;
set work.two;
newid=_n_;
startage=minage;
keep newid w1-w11 j1-j11 cen1-cen11 startage starttenure schooling female;
proc means;
title2 'test data ';
run;
proc freq;
tables j11 / missing;
run;
proc corr;
var schooling w1-w11;
run;
proc reg;
model w1=schooling female startage;
model w2=schooling female startage;
model w3=schooling female startage;
model w4=schooling female startage;
model w5=schooling female startage;
model w6=schooling female startage;
model w7=schooling female startage;
model w8=schooling female startage;
model w9=schooling female startage;
model w10=schooling female startage;
model w11=schooling female startage;