Statistics/OpenStat/Research projects/Formatting rules and function for importing into R

From Wikiversity
Jump to navigation Jump to search

What is the most easyiest and consintent way defining timedependet datasets and importing into R?

Data structures as the MAMEU example can be imported e.g. with the following R-script (click edit to conserve formating):

library(XML) wikiurl <- 'http://en.wikiversity.org/w/index.php?title=Topic:Statistics:OpenStat:MAMEU&action=edit' page <- htmlTreeParse( readLines( url(wikiurl),encoding='UTF-8' ), asText=TRUE ) part <- (page$children$html$children$body$children$div$children$div$children$div['div',all=TRUE][2]$div$children) textfeld <- part$form$children$textarea$children$text$value #split by main titles textfeld <- strsplit(as.character(textfeld),'==')[[1]] dataname <- gsub('=','',as.character(textfeld[1])) dataname <- gsub('\n','',as.character(dataname[1])) dataname <- gsub('Data ','',as.character(dataname[1])) dataname description <-textfeld[3] description ### make dataframe eval(parse(text=paste(dataname,' <- data.frame(year=1:2100)'))) ### make dataframe comment eval(parse(text=paste('comment(',dataname,') <-\"',description,'\"'))) # ### descripion of the structure variablestructure <- textfeld[5] ### referenceslist references <- textfeld[7] ### the folloing variables counter <- (1:length(textfeld))[-c(1:7)] counter <- seq(from=counter[1],to=counter[length(counter)],by=2) ### for each variable for (count in counter) { variablename <- gsub('^ *= *','',textfeld[count]) variablename <- gsub(' ','_',variablename) variablename <- gsub('\\(','',variablename) variablename <- gsub('\\)','',variablename) variablename <- gsub('/','_',variablename) ### variablename print(variablename) #eval(parse(text=paste(variablename,' <- numeric()',sep=''))) #print(ls()) vardatalist <- unlist(strsplit(textfeld[count+1],'\n')) missing <- c(grep('^=$',vardatalist),grep('^-*$',vardatalist)) missing vardatalist <- vardatalist[-missing] varlist <- strsplit(vardatalist,'\\|') varrefyear <- lapply(1:length(varlist),function(number){ varlist[[number]][1] }) varrefyear <- as.numeric(gsub(' *','',varrefyear)) vardata <- lapply(1:length(varlist),function(number){ varlist[[number]][2] }) vardata <- as.numeric(gsub(' *','',vardata)) varsource<- lapply(1:length(varlist),function(number){ varlist[[number]][3] }) varcomment <- paste(lapply(1:length(varlist),function(number){ varlist[[number]][4] }),lapply(1:length(varlist),function(number){ varlist[[number]][5] })) #varcomment varsource eval(parse(text=paste(dataname,'$',variablename,'[',dataname,'$','year==',varrefyear,'] <- ',vardata,sep=''))) } print(eval(parse(text=dataname)))