The Art of R Programming

(WallPaper) #1

15 # how many Person records?
16 npr <- intextract(hrec,c(106,107))
17 if (npr > 0)
18 for (i in 1:npr) {
19 prec <- readLines(con,1) # get Person record
20 # make this person's row for the data frame
21 person <- makerow(serno,prec,flds)
22 # add it to the data frame
23 dtf <- rbind(dtf,person)
24 }
25 }
26 return(dtf)
27 }
28
29 # set up this person's row for the data frame
30 makerow <- function(srn,pr,fl) {
31 l <- list()
32 l[["serno"]] <- srn
33 for (nm in names(fl)) {
34 l[[nm]] <- intextract(pr,fl[[nm]])
35 }
36 return(l)
37 }
38
39 # extracts an integer field in the string s, in character positions
40 # rng[1] through rng[2]
41 intextract <- function(s,rng) {
42 fld <- substr(s,rng[1],rng[2])
43 return(as.integer(fld))
44 }


Let’s see how this works. At the beginning ofextractpums(), we create an
empty data frame and set up the connection for the PUMS file read.

dtf <- data.frame() # data frame to be built
con <- file(pf,"r") # connection

The main body of the code then consists of arepeatloop.

repeat {
hrec <- readLines(con,1) # read Household record
if (length(hrec) == 0) break # end of file, leave loop
# get household serial number
serno <- intextract(hrec,c(2,8))
# how many Person records?
npr <- intextract(hrec,c(106,107))

Input/Output 241
Free download pdf