# Statistics/OpenStat/Research projects/Formatting rules and function for importing into R

What is the most easyiest and consintent way defining timedependet datasets and importing into R?

Data structures as the MAMEU example can be imported e.g. with the following R-script (click edit to conserve formating):

``` library(XML) wikiurl <- 'http://en.wikiversity.org/w/index.php?title=Topic:Statistics:OpenStat:MAMEU&action=edit' page <- htmlTreeParse( readLines( url(wikiurl),encoding='UTF-8' ), asText=TRUE ) part <- (page\$children\$html\$children\$body\$children\$div\$children\$div\$children\$div['div',all=TRUE][2]\$div\$children) textfeld <- part\$form\$children\$textarea\$children\$text\$value #split by main titles textfeld <- strsplit(as.character(textfeld),'==')[[1]] dataname <- gsub('=','',as.character(textfeld[1])) dataname <- gsub('\n','',as.character(dataname[1])) dataname <- gsub('Data ','',as.character(dataname[1])) dataname description <-textfeld[3] description ### make dataframe eval(parse(text=paste(dataname,' <- data.frame(year=1:2100)'))) ### make dataframe comment eval(parse(text=paste('comment(',dataname,') <-\"',description,'\"'))) # ### descripion of the structure variablestructure <- textfeld[5] ### referenceslist references <- textfeld[7] ### the folloing variables counter <- (1:length(textfeld))[-c(1:7)] counter <- seq(from=counter[1],to=counter[length(counter)],by=2) ### for each variable for (count in counter) { variablename <- gsub('^ *= *','',textfeld[count]) variablename <- gsub(' ','_',variablename) variablename <- gsub('\\(','',variablename) variablename <- gsub('\\)','',variablename) variablename <- gsub('/','_',variablename) ### variablename print(variablename) #eval(parse(text=paste(variablename,' <- numeric()',sep=''))) #print(ls()) vardatalist <- unlist(strsplit(textfeld[count+1],'\n')) missing <- c(grep('^=\$',vardatalist),grep('^-*\$',vardatalist)) missing vardatalist <- vardatalist[-missing] varlist <- strsplit(vardatalist,'\\|') varrefyear <- lapply(1:length(varlist),function(number){ varlist[[number]][1] }) varrefyear <- as.numeric(gsub(' *','',varrefyear)) vardata <- lapply(1:length(varlist),function(number){ varlist[[number]][2] }) vardata <- as.numeric(gsub(' *','',vardata)) varsource<- lapply(1:length(varlist),function(number){ varlist[[number]][3] }) varcomment <- paste(lapply(1:length(varlist),function(number){ varlist[[number]][4] }),lapply(1:length(varlist),function(number){ varlist[[number]][5] })) #varcomment varsource eval(parse(text=paste(dataname,'\$',variablename,'[',dataname,'\$','year==',varrefyear,'] <- ',vardata,sep=''))) } print(eval(parse(text=dataname))) ```