Binary files /tmp/tmpRlTXqJ/nXiTXaBNsj/addhazard-1.0.0/data/nwts2ph.rda and /tmp/tmpRlTXqJ/IsARiyUNQR/addhazard-1.1.0/data/nwts2ph.rda differ diff -Nru addhazard-1.0.0/debian/changelog addhazard-1.1.0/debian/changelog --- addhazard-1.0.0/debian/changelog 2017-03-26 03:05:24.000000000 +0000 +++ addhazard-1.1.0/debian/changelog 2017-03-26 03:05:24.000000000 +0000 @@ -1,13 +1,27 @@ -addhazard (1.0.0-1cran1ppa0trusty0) trusty; urgency=medium +addhazard (1.1.0-1cran1ppa0trusty0) trusty; urgency=medium - * Compilation for Ubuntu 14.04.4 LTS + * Compilation for Ubuntu 14.04.5 LTS + + -- Michael Rutter Sun, 26 Mar 2017 03:03:41 +0000 + +addhazard (1.1.0-1cran1) testing; urgency=low + + * cran2deb svn: 362M with DB version 1. + + -- cran2deb4ubuntu Sat, 25 Mar 2017 18:18:21 -0400 + + +addhazard (1.0.0-1cran2) testing; urgency=low + + * cran2deb svn: 362M with DB version 1. + + -- cran2deb4ubuntu Sun, 23 Oct 2016 02:16:47 -0400 - -- Michael Rutter Sun, 21 Aug 2016 20:20:55 +0000 addhazard (1.0.0-1cran1) testing; urgency=low * cran2deb svn: 362M with DB version 1. - -- cran2deb4ubuntu Sun, 21 Aug 2016 10:22:55 -0400 + -- cran2deb4ubuntu Sun, 21 Aug 2016 10:23:03 -0400 diff -Nru addhazard-1.0.0/debian/compat addhazard-1.1.0/debian/compat --- addhazard-1.0.0/debian/compat 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/debian/compat 2017-03-26 03:05:24.000000000 +0000 @@ -0,0 +1 @@ +7 \ No newline at end of file diff -Nru addhazard-1.0.0/debian/control addhazard-1.1.0/debian/control --- addhazard-1.0.0/debian/control 2017-03-26 03:05:24.000000000 +0000 +++ addhazard-1.1.0/debian/control 2017-03-26 03:05:24.000000000 +0000 @@ -13,11 +13,13 @@ Description: GNU R package "Fit Additive Hazards Models for Survival Analysis" . - Contains tools to fit additive hazards model to random sampling, - two-phase sampling and two-phase sampling with auxiliary information. - This package provides regression parameter estimates and their - model-based and robust standard errors. It also offers tools to make - prediction of individual specific hazards. + Contains tools to fit the additive hazards model to data from a cohort, + random sampling, two-phase Bernoulli sampling and two-phase finite + population sampling, as well as calibration tool to incorporate phase + I auxiliary information into the two-phase data model fitting. This + package provides regression parameter estimates and their model-based + and robust standard errors. It also offers tools to make prediction of + individual specific hazards. . Author: Jie (Kate) Hu [aut, cre], Norman Breslow [aut], Gary Chan [aut] . diff -Nru addhazard-1.0.0/debian/copyright addhazard-1.1.0/debian/copyright --- addhazard-1.0.0/debian/copyright 2017-03-26 03:05:24.000000000 +0000 +++ addhazard-1.1.0/debian/copyright 2017-03-26 03:05:24.000000000 +0000 @@ -2,7 +2,7 @@ automatically using cran2deb4ubuntu by cran2deb4ubuntu . -The original GNU R package is Copyright (C) 2016 Jie (Kate) Hu [aut, +The original GNU R package is Copyright (C) 2017 Jie (Kate) Hu [aut, cre], Norman Breslow [aut], Gary Chan [aut] and possibly others. The original GNU R package is maintained by Jie (Kate) Hu diff -Nru addhazard-1.0.0/DESCRIPTION addhazard-1.1.0/DESCRIPTION --- addhazard-1.0.0/DESCRIPTION 2015-08-17 21:07:27.000000000 +0000 +++ addhazard-1.1.0/DESCRIPTION 2017-03-21 06:50:30.000000000 +0000 @@ -1,21 +1,27 @@ Package: addhazard Title: Fit Additive Hazards Models for Survival Analysis -Version: 1.0.0 +Version: 1.1.0 Authors@R: c(person("Jie (Kate)","Hu", role = c("aut", "cre"), email = "hujie0704@gmail.com"), - person("Norman","Breslow", role = "aut"), + person("Norman","Breslow", role = "aut"), person("Gary","Chan", role = "aut")) -Date: 2015-08-08 -Description: Contains tools to fit additive hazards model to random sampling, two-phase sampling and two-phase sampling with auxiliary information. This package provides regression parameter estimates and their model-based and robust standard errors. It also offers tools to make prediction of individual specific hazards. +Date: 2017-03-20 +Description: Contains tools to fit the additive hazards model to data from a cohort, + random sampling, two-phase Bernoulli sampling and two-phase finite population sampling, + as well as calibration tool to incorporate phase I auxiliary information into the + two-phase data model fitting. This package provides regression parameter estimates and + their model-based and robust standard errors. It also offers tools to make prediction of + individual specific hazards. LazyData: true -Depends: R (>= 3.1.2) +Depends: R (>= 3.3.1) Imports: ahaz, survival, rootSolve +RoxygenNote: 5.0.1 License: GPL-2 -Packaged: 2015-08-17 17:45:28 UTC; kate.hu +NeedsCompilation: no +Packaged: 2017-03-21 00:07:53 UTC; Kate Author: Jie (Kate) Hu [aut, cre], Norman Breslow [aut], Gary Chan [aut] Maintainer: Jie (Kate) Hu -NeedsCompilation: no Repository: CRAN -Date/Publication: 2015-08-17 23:07:27 +Date/Publication: 2017-03-21 06:50:30 UTC diff -Nru addhazard-1.0.0/man/ah.2ph.Rd addhazard-1.1.0/man/ah.2ph.Rd --- addhazard-1.0.0/man/ah.2ph.Rd 2015-08-17 17:44:58.000000000 +0000 +++ addhazard-1.1.0/man/ah.2ph.Rd 2017-03-21 00:05:35.000000000 +0000 @@ -1,82 +1,108 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ah2phase.R \name{ah.2ph} \alias{ah.2ph} \title{Fit Additive Hazards Regression Models to Two-phase Sampling} \usage{ -ah.2ph(formula, data, R, Pi, robust = FALSE, calibration.variables = NULL) +ah.2ph(formula, data, R, Pi, ties, robust = FALSE, + calibration.variables = NULL, ...) } \arguments{ -\item{formula}{a formula object for the regression model of the form response ~ predictors. -The outcome is a survival object created by \code{\link[survival]{Surv}}.} +\item{formula}{a formula object for the regression model of the form +response ~ predictors. The outcome is a survival object created by \code{\link[survival]{Surv}}.} \item{data}{a data frame. Input dataset.} -\item{R}{a phase II membership indicator. A vector of values of 0 and 1. The subject is selected to phase II if R = 1.} +\item{R}{a phase II membership indicator. A vector of values of 0 and 1. +The subject is selected to phase II if R = 1.} \item{Pi}{the probability of a subject to be selected to the phase II subsample.} +\item{ties}{a logical variable. FALSE if there are no ties in the censored failure times.} + \item{robust}{a logical variable. Robust standard errors are provided if robust = TRUE.} -\item{calibration.variables}{a vector of some column names of the data. These are the variables available for every observation. -They are used to calibrate the weight assigned to each subject in order to improve estimation efficiency.} +\item{calibration.variables}{a vector of some column names of the data. +These are the variables available for every observation. They are used to +calibrate the weight assigned to each subject in order to improve estimation efficiency.} + +\item{...}{additional arguments to be passed to the low level regression fitting +functions.} } \value{ -An object of class "ah.2h" representing the fit. +An object of class 'ah.2h' representing the fit. } \description{ -The function fits a semiparametric additive hazards model \deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} to -two-phase sampling data. The estimating procedures follow Hu (2014). +The function fits a semiparametric additive hazards model + \deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} to two-phase sampling data. + The estimating procedures follow Hu (2014). } \note{ -This function estimates both model-based and robust standard errors. It can be used to analyze -case-cohort studies. It allows subsampling among cases. It can incoporate the calibration procedure -and analyze the combined dataset of phase I and phase II samples. +This function estimates both model-based and robust standard errors. It can be +used to analyze case-cohort studies. It allows subsampling among cases. It can +incorporate the calibration procedure and analyze the combined dataset of phase +I and phase II samples. } \examples{ library(survival) -### load data -nwts <- nwtsco[1:100,] - -### create strata based on institutional histology and disease status -nwts$strt <- 1+nwts$instit -### add a stratum containing all (relapsed) cases -nwts$strt[nwts$relaps==1] <- 3 - -### assign phase II subsampling probabilities -### oversample unfavorable histology (instit =1) and cases -### Pi = 0.5 for instit =0, Pi =1 for instit =1 and relaps =1 -nwts$Pi<- 0.5 * (nwts$strt == 1) + 1 * (nwts$strt == 2) + 1 * (nwts$strt == 3) - -### generate phase II sampling indicators -N <- dim(nwts)[1] -nwts$in.ph2 <- rbinom(N, 1, nwts$Pi) - ### fit an additive hazards model to two-phase sampling data without calibration -fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, - robust = FALSE, calibration.variables = NULL) +nwts2ph$trel <- nwts2ph$trel + runif(dim(nwts2ph)[1],0,1)*1e-8 +fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, ties = FALSE, data = nwts2ph, R = in.ph2, Pi = Pi, + robust = FALSE, calibration.variables = NULL) summary(fit1) - ### fit an additve hazards model with calibration on age -fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, - robust = FALSE, calibration.variables = age) +fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, ties = FALSE, data = nwts2ph, R = in.ph2, Pi = Pi, + robust = FALSE, calibration.variables = "age") summary(fit2) ### calibrate on age square ### note if users create a calibration variable, then ### the new variable should be added to the original data frame -nwts$age2 <- nwts$age^2 -fit3 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, - robust = FALSE, calibration.variables = age2) +nwts2ph$age2 <- nwts2ph$age^2 +fit3 <- ah.2ph(Surv(trel,relaps) ~ age + histol, ties = FALSE, data = nwts2ph, R = in.ph2, Pi = Pi, + robust = FALSE, calibration.variables = "age2") summary(fit3) + +############################################################################# +### When phase II samples are obtained by finite Sampling ############# +############################################################################# + +### calculating the sample size for each straum +### calculate the strata size +strt.size <- table(nwts2ph$strt) +ph2.strt.size <- table(subset(nwts2ph, in.ph2 == 1)$strt) +### fit an additve hazards model with finite stratified sampling +### calculate the sampling fractions +frac <- ph2.strt.size/strt.size +### treating the problem as bernoulli sampling coupled with calibration on strata sizes +### using frac as the sampling probilities +nwts2ph_by_FPSS <- nwts2ph +nwts2ph_by_FPSS$Pi <- NULL +for (i in 1:length(strt.size)){ + nwts2ph_by_FPSS$Pi[nwts2ph_by_FPSS$strt ==i] <- frac[i] +} + +### create strt indicators, which become our calibration variables +for (i in 1:length(strt.size)){ + nwts2ph_by_FPSS$strt_ind <- as.numeric(nwts2ph_by_FPSS$strt ==i) + names(nwts2ph_by_FPSS)[ncol(nwts2ph_by_FPSS)]= paste0("strt", i) +} +### fit an additve hazards model with finate sampling +fit4 <- ah.2ph(Surv(trel,relaps) ~ age + histol, + data = nwts2ph_by_FPSS, ties = FALSE, + R = in.ph2, Pi = Pi, + robust = FALSE, + calibration.variables = c("strt1","strt2","strt3")) +summary(fit4) } \references{ -Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications to Additive Hazards Models and -Epidemiologic Studies. Dissertation, University of Washington. +Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications to +Additive Hazards Models and Epidemiologic Studies. Dissertation, University of Washington. } \seealso{ -\code{\link{predict.ah.2ph}} for prediction based on fitted additive hazards model with two-phase sampling -and \code{\link{nwtsco}} for the description of nwtsco dataset. +\code{\link{predict.ah.2ph}} for prediction based on fitted additive +hazards model with two-phase sampling and \code{\link{nwtsco}} for the description +of nwtsco dataset. } diff -Nru addhazard-1.0.0/man/ah.Rd addhazard-1.1.0/man/ah.Rd --- addhazard-1.0.0/man/ah.Rd 2015-08-17 17:44:58.000000000 +0000 +++ addhazard-1.1.0/man/ah.Rd 2016-11-08 07:20:26.000000000 +0000 @@ -1,62 +1,69 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ah.R \name{ah} \alias{ah} \title{Fit Additive Hazards Regression Models} \usage{ -ah(formula, data, robust, weights, ...) +ah(formula, data, robust, weights, ties, ...) } \arguments{ -\item{formula}{a formula object for the regression model of the form response ~ predictors. -The outcome is a survival object created by \code{\link[survival]{Surv}}.} +\item{formula}{a formula object for the regression model of the form +response ~ predictors. The outcome is a survival object created by +\code{\link[survival]{Surv}}.} \item{data}{a data frame. Input dataset.} -\item{robust}{a logical variable. Robust standard errors are provided if robust == TRUE.} +\item{robust}{a logical variable. Robust standard errors are provided if +robust == TRUE.} \item{weights}{a numeric vector. The weight of each observation.} -\item{...}{additional arguments to be passed to the low level regression fitting functions (see below).} +\item{ties}{a logical variable. FALSE if there are no ties in the censored failure times.} + +\item{...}{additional arguments to be passed to the low level regression +fitting functions.} } \value{ -An object of class "ah" representing the fit. +An object of class 'ah' representing the fit. } \description{ -Fit a semiparametric additive hazard model \deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} The -estimating procedures follow Lin & Ying (1994). +Fit a semiparametric additive hazard model +'\deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} +The estimating procedures follow Lin & Ying (1994). } \note{ The response variable is a survival object. If there are ties in the - survival time, users need to break the ties first. The regression model can be univariate or multivariate. This - package is built upon the function \code{\link[ahaz]{ahaz}} by Anders Gorst-Rasmussen. + survival time, in the current version we recommend users to break ties by adding + a small random number to the survival time. An example is provided. The regression + model can be univariate or multivariate. This package is built upon the function + \code{\link[ahaz]{ahaz}} by Anders Gorst-Rasmussen. } \examples{ library(survival) ### using the first 100 rows in nwtsco to build an additive hazards model nwts<- nwtsco[1:100,] - ### fit the additive hazards model to the data ### the model-based standard errors are reported when setting robust = FALSE -fit1 <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, robust = FALSE) +fit1 <- ah(Surv(trel,relaps) ~ age + instit, ties = FALSE, data = nwts, robust = FALSE) summary(fit1) - ### fit the additive hazards model to the data with robust standard errors -fit2 <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, robust = TRUE) +fit2 <- ah(Surv(trel,relaps) ~ age + instit, ties = FALSE, data = nwts, robust = TRUE) summary(fit2) - ### when there are ties, break the ties first nwts_all <- nwtsco nwts_all$trel <- nwtsco$trel + runif(dim(nwts_all)[1],0,1)*1e-8 -fit3 <- ah(Surv(trel,relaps) ~ age + instit, data = nwts_all, robust = TRUE) +fit3 <- ah(Surv(trel,relaps) ~ age + instit, ties = FALSE, data = nwts_all, robust = TRUE) +summary(fit3) } \references{ -Lin, D.Y. & Ying, Z. (1994). Semiparametric analysis of the additive risk model. Biometrika; 81:61-71. +Lin, D.Y. & Ying, Z. (1994). Semiparametric analysis of the additive risk model. + Biometrika; 81:61-71. } \seealso{ -\code{\link{predict.ah}} for prediction based on fitted \code{\link{ah}} model, \code{\link{nwtsco}} for -the description of nwtsco dataset +\code{\link{predict.ah}} for prediction based on fitted + \code{\link{ah}} model, \code{\link{nwtsco}} for the description of nwtsco dataset } diff -Nru addhazard-1.0.0/man/nwts2ph.generate.Rd addhazard-1.1.0/man/nwts2ph.generate.Rd --- addhazard-1.0.0/man/nwts2ph.generate.Rd 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/man/nwts2ph.generate.Rd 2017-03-20 17:36:35.000000000 +0000 @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nwts2ph.generator.R +\name{nwts2ph.generate} +\alias{nwts2ph.generate} +\title{This file genreate the example dataset nwts2ph +importFrom("stats", "rbinom")} +\usage{ +nwts2ph.generate(data, seed = 20) +} +\arguments{ +\item{data}{the full cohort data} + +\item{seed}{the random seed we use for generating this dataset} +} +\description{ +This file genreate the example dataset nwts2ph +importFrom("stats", "rbinom") +} + diff -Nru addhazard-1.0.0/man/nwts2ph.Rd addhazard-1.1.0/man/nwts2ph.Rd --- addhazard-1.0.0/man/nwts2ph.Rd 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/man/nwts2ph.Rd 2017-03-20 17:21:54.000000000 +0000 @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nwts2ph.R +\docType{data} +\name{nwts2ph} +\alias{nwts2ph} +\title{An hypothetical two-phase sampling dataset based on \link{nwtsco} dataset from +the National Wilms Tumor Study (NWTS)} +\format{A data frame with 3915 rows and 15 variables: +\describe{ +We create a hypothetical two-phase sampling (stratified sampling) dataset. +In this dataset, only the subjects who have relapses and some of the controls have +their samples sent to the central laboratory for more accurate histology examination. + +Institutional histology is examined in the local hospital. It is usually available for +all the samples. Central histology is more expensive to obtain since the samples have to be +sent to the central laboratory and the work requires experienced lab scientists. +It is reasonable to assume only some samples were tested for central +histology. Following the two-phase sampling design, we selected subjects for central histology +measurements based on their outcomes and institutional histology results. + +\item{trel}{Time to relapse or last date seen (yr), continuous} +\item{tsur}{Time to death or last date seen (yr), continuous} +\item{relaps}{Indicator of relapse, + 0 = Alive no prior relapse when last seen, + 1 = Relapsed after trel years} +\item{dead}{Indicator of death, + 0 = Alive when last seen, + 1 = Died after tsur years} +\item{study}{NWTS study, + 3 = NWTS-3, + 4 = NWTS-4} +\item{stage}{Stage of disease, + 1=I, + 2=II, + 3=III, + 4=IV} +\item{histol}{Central Path histology, + 0 = Favorable (FH) and the subject is selected into the phase II subsample (in.ph2 = 1), + 1 = Unfavorable (UH) and the subject is selected into the phase II subsample (in.ph2 = 1), + NA = subject is NOT selected into the phase II subsample (in.ph2 = 1) } +\item{instit}{Institutional histology, + 0 = Favorable (FH), + 1 = Unfavorable (UH)} +\item{age}{Age at diagnosis (yr), continuous} +\item{yr}{Year of diagnosis, calendar year} +\item{specwgt}{Weight of tumor bearing specimen, in grams (continuous)} +\item{tumdiam}{Diameter of tumor, in centimeters (continuous)} +\item{strt}{Strata, + 1 = Unfavorable Institutional histology and no relapse, + 2 = favorable Institutional histology and no relapse, + 3 = relapse} +\item{Pi}{Sampling probability, + 0.5 for strata =1, + 0.9 for strata = 2, + 0.9 for strata = 3} +\item{in.ph2}{Phase II membership, + 1 = selected into the phase II subsample, + 0 = not selected into the phase II subsample} + }} +\source{ +This dataset was created based on \link{nwtsco} dataset from the National Wilms Tumor Study (NWTS) +} +\usage{ +nwts2ph +} +\description{ +An hypothetical two-phase sampling dataset based on \link{nwtsco} dataset from +the National Wilms Tumor Study (NWTS) +} +\keyword{datasets} + diff -Nru addhazard-1.0.0/man/nwtsco.Rd addhazard-1.1.0/man/nwtsco.Rd --- addhazard-1.0.0/man/nwtsco.Rd 2015-08-01 07:22:32.000000000 +0000 +++ addhazard-1.1.0/man/nwtsco.Rd 2017-03-20 17:21:54.000000000 +0000 @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nwtsco.R \docType{data} \name{nwtsco} @@ -34,10 +34,9 @@ \item{tumdiam}{Diameter of tumor, in centimeters (continuous)} }} \source{ -\url{http://faculty.washington.edu/norm/datasets/nwts-share.txt} - Originally used by M. Kulich and D.Y. Lin: - Improving the efficiency of relative-risk estimation in case-cohort studies. J Amer Statis - Assoc 99:832-844, 2004. +Originally used by M. Kulich and D.Y. Lin: + Improving the efficiency of relative-risk estimation in case-cohort studies. + J Amer Statis Assoc 99:832-844, 2004. } \usage{ nwtsco diff -Nru addhazard-1.0.0/man/predict.ah.2ph.Rd addhazard-1.1.0/man/predict.ah.2ph.Rd --- addhazard-1.0.0/man/predict.ah.2ph.Rd 2015-08-17 17:44:58.000000000 +0000 +++ addhazard-1.1.0/man/predict.ah.2ph.Rd 2016-11-08 07:32:20.000000000 +0000 @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pred.2ph.R \name{predict.ah.2ph} \alias{predict.ah.2ph} @@ -7,7 +7,7 @@ \method{predict}{ah.2ph}(object, newdata, newtime, ...) } \arguments{ -\item{object}{an object of class inhering from "ah.2ph".} +\item{object}{an object of class inhering from 'ah.2ph'.} \item{newdata}{a dataframe of an individual's predictors.} @@ -16,13 +16,14 @@ \item{...}{further arguments passed to or from other methods.} } \value{ -A dataframe including the given time points, predicted hazards, their standard errors, - their variances, the phase I component of the variance for predicted hazards - and the phase II component of the variance. +A dataframe including the given time points, predicted hazards, their + standard errors, their variances, the phase I component of the variance for + predicted hazards and the phase II component of the variance. } \description{ -This function predicts a subject's overall hazard rates at given time points based on this subject's covariate -values. The prediction function is an object from \code{\link{ah.2ph}}. The estimating procedures follow Hu (2014). +This function predicts a subject's overall hazard rates at given time points +based on this subject's covariate values. The prediction function is an object +from \code{\link{ah.2ph}}. The estimating procedures follow Hu (2014). } \examples{ library(survival) @@ -44,9 +45,11 @@ nwts$in.ph2 <- rbinom(N, 1, nwts$Pi) ### fit an additive hazards model to two-phase sampling data without calibration -fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, robust = FALSE) - - +fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, + data = nwts, + ties = FALSE, + R = in.ph2, Pi = Pi, + robust = FALSE) ### input the new data for prediction newdata <- nwtsco[101,] @@ -56,17 +59,26 @@ ### fit an additve hazards model to two-phase sampling data with calibration ### The calibration variable is stage fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, - robust = FALSE, calibration.variables = stage) + ties = FALSE, robust = FALSE, calibration.variables = "stage") ### based on the fitted model fit2, perform prediction at time points t =3 and t= 5 predict(fit2, newdata, newtime = c(3,5)) + +\dontrun{ +### The calibration variable is stage, when set robust = TRUE +fit3 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, + ties = FALSE, robust = TRUE, calibration.variables = "stage") + +### based on the fitted model fit2, perform prediction at time points t =3 and t= 5 +predict(fit3, newdata, newtime = c(3,5)) +} } \references{ -Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications to Additive Hazards Models and -Epidemiologic Studies. Dissertation, University of Washington. +Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications + to Additive Hazards Models and Epidemiologic Studies. Dissertation, + University of Washington. } \seealso{ -\code{\link{ah.2ph}} for fitting the additive hazards model with two-phase sampling and -\code{\link{nwtsco}} for the description of nwtsco dataset +\code{\link{ah.2ph}} for fitting the additive hazards model with two-phase } diff -Nru addhazard-1.0.0/man/predict.ah.Rd addhazard-1.1.0/man/predict.ah.Rd --- addhazard-1.0.0/man/predict.ah.Rd 2015-08-17 17:44:58.000000000 +0000 +++ addhazard-1.1.0/man/predict.ah.Rd 2016-11-08 07:22:54.000000000 +0000 @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pred.ah.R \name{predict.ah} \alias{predict.ah} @@ -20,14 +20,15 @@ A dataframe including the time points for prediction, predicted values and their standard errors. } \description{ -This function predicts a subject's overall hazard rates at given time points based on this subject's covariate -values. The prediction function is an additive hazards model fitted using \code{\link{ah}}. +This function predicts a subject's overall hazard rates at given time points +based on this subject's covariate values. The prediction function is an additive +hazards model fitted using \code{\link{ah}}. } \examples{ library(survival) ### fit the additive hazards model to the data nwts<- nwtsco[1:100,] -fit <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, robust = FALSE) +fit <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, ties = FALSE, robust = FALSE) ### see the covariate names in the prediction function fit$call diff -Nru addhazard-1.0.0/MD5 addhazard-1.1.0/MD5 --- addhazard-1.0.0/MD5 2015-08-17 21:07:27.000000000 +0000 +++ addhazard-1.1.0/MD5 2017-03-21 06:50:30.000000000 +0000 @@ -1,15 +1,22 @@ -ccedb0763d1be4f0915a1ad772d02d69 *DESCRIPTION -2a7a18e2370876ce632ab9fb69766959 *NAMESPACE -66bc2b6f62aff05df6da89c04d919ed2 *R/ah.R -8d8c2476b91955f12d1c293e71869697 *R/ah2phase.R -92cd0e2daa0c98a22b55239961189782 *R/nwtsco.R -64555bf266f5ab1f12518a28b5f0f44e *R/pred.2ph.R -66129df325283b9a75bf34656fc62ecb *R/pred.ah.R -df29af3ec3d76cd442df153796000d3e *R/summary.R -79e0a8b4efe1a348819ffb8cc433df05 *R/tools.R +c23bdb6d486d115b2b82d111a3dcab1e *DESCRIPTION +3db9c84c8f32ebe16e92b832ae51d367 *NAMESPACE +94267ac6d96e1bb874ca61f6deb4f0c6 *R/ah.R +792f8c4b5d2ebcd54db0fa8a6f980b69 *R/ah.fit.R +8058a01b37dc3393c6df74556f95c08a *R/ah2phase.R +13fb9e56b32d7d36dc07724a96ff00fe *R/nwts2ph.R +4b6c14cc95e7725b3a75952ace54a43b *R/nwts2ph.generator.R +7c90430c067f0e8e56d8a8bfe2c54fbd *R/nwtsco.R +554e8c381737101d736b0b9c9acc5c4e *R/pred.2ph.R +6010775681cb3acb6606c6b786cdce60 *R/pred.ah.R +3c9054a362513c13acf960a4ccb9f78e *R/summary.R +25aa68b6bc896f46df924fc3bf355a59 *R/tools.R +6ff764c8dbea16d17c4adfb980d34af4 *README.md +a616ffe9f37a3807a9417a0d2876d978 *data/nwts2ph.rda 01056fc494ef5470c9e2a734cb93609c *data/nwtsco.rdata -8c938884eb71fe8b6926614ec45ca9cf *man/ah.2ph.Rd -af55f8a9b9abe5bfecb5e8ad6cfddde6 *man/ah.Rd -fda35ff3b267ab9b2cb392a6959f2112 *man/nwtsco.Rd -5bbcc2c6cf665f03204844e01bb54fcb *man/predict.ah.2ph.Rd -4fbff5f302bdd9be76fe2994736fb175 *man/predict.ah.Rd +6325ab0b6c1476ba47f3ea699fc45bdf *man/ah.2ph.Rd +11ff84d06c231e731dcc70f00e53d6d7 *man/ah.Rd +3c45f7a3eb4a6ad8c1786ef26abf43c2 *man/nwts2ph.Rd +0f34cf8f97f80407589da1120b9b2f04 *man/nwts2ph.generate.Rd +41ee433bd9019ab1d15e45e04bf3665d *man/nwtsco.Rd +96daefdbf54e60ef13bdc5cbd6437886 *man/predict.ah.2ph.Rd +51f996791fcd435ed754750dd3506a38 *man/predict.ah.Rd diff -Nru addhazard-1.0.0/NAMESPACE addhazard-1.1.0/NAMESPACE --- addhazard-1.0.0/NAMESPACE 2015-08-15 00:29:52.000000000 +0000 +++ addhazard-1.1.0/NAMESPACE 2017-03-20 20:49:01.000000000 +0000 @@ -1,17 +1,22 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand S3method(predict,ah) S3method(predict,ah.2ph) S3method(print,summary.ah) S3method(print,summary.ah.2ph) +S3method(summary,ah) S3method(summary,ah.2ph) export(ah) export(ah.2ph) +export(nwts2ph.generate) +import(ahaz) importFrom(stats,delete.response) importFrom(stats,model.extract) importFrom(stats,model.matrix) +importFrom(stats,model.weights) importFrom(stats,pnorm) importFrom(stats,printCoefmat) +importFrom(stats,rbinom) importFrom(stats,residuals) importFrom(stats,terms) importFrom(survival,Surv) diff -Nru addhazard-1.0.0/R/ah2phase.R addhazard-1.1.0/R/ah2phase.R --- addhazard-1.0.0/R/ah2phase.R 2015-08-17 17:42:42.000000000 +0000 +++ addhazard-1.1.0/R/ah2phase.R 2017-03-21 00:04:56.000000000 +0000 @@ -1,196 +1,203 @@ +####################################################### +# Author: Kate HU +# Date: July 27th, 2016 +# Task: update finite sampling documentation +# Date: March 30th, 2017 +# clear problems in calling wts.pha2 and wts.cal +# To do: Interval censoring and add finite sampling argument +# in the function +####################################################### #' Fit Additive Hazards Regression Models to Two-phase Sampling #' -#' The function fits a semiparametric additive hazards model \deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} to -#' two-phase sampling data. The estimating procedures follow Hu (2014). +#' The function fits a semiparametric additive hazards model +#' \deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} to two-phase sampling data. +#' The estimating procedures follow Hu (2014). #' -#' @param formula a formula object for the regression model of the form response ~ predictors. -#' The outcome is a survival object created by \code{\link[survival]{Surv}}. +#' @param formula a formula object for the regression model of the form +#' response ~ predictors. The outcome is a survival object created by \code{\link[survival]{Surv}}. #' @param data a data frame. Input dataset. -#' @param R a phase II membership indicator. A vector of values of 0 and 1. The subject is selected to phase II if R = 1. -#' @param Pi the probability of a subject to be selected to the phase II subsample. +#' @param ties a logical variable. FALSE if there are no ties in the censored failure times. +#' @param R a phase II membership indicator. A vector of values of 0 and 1. +#' The subject is selected to phase II if R = 1. +#' @param Pi the probability of a subject to be selected to the phase II subsample. #' @param robust a logical variable. Robust standard errors are provided if robust = TRUE. -#' @param calibration.variables a vector of some column names of the data. These are the variables available for every observation. -#' They are used to calibrate the weight assigned to each subject in order to improve estimation efficiency. -#' @return An object of class "ah.2h" representing the fit. +#' @param calibration.variables a vector of some column names of the data. +#' These are the variables available for every observation. They are used to +#' calibrate the weight assigned to each subject in order to improve estimation efficiency. +#' @param ... additional arguments to be passed to the low level regression fitting +#' functions. +#' @return An object of class 'ah.2h' representing the fit. #' @importFrom survival Surv #' @export #' -#' @note -#' This function estimates both model-based and robust standard errors. It can be used to analyze -#' case-cohort studies. It allows subsampling among cases. It can incoporate the calibration procedure -#' and analyze the combined dataset of phase I and phase II samples. -#' -#' -#' @seealso \code{\link{predict.ah.2ph}} for prediction based on fitted additive hazards model with two-phase sampling -#' and \code{\link{nwtsco}} for the description of nwtsco dataset. +#' @note +#' This function estimates both model-based and robust standard errors. It can be +#' used to analyze case-cohort studies. It allows subsampling among cases. It can +#' incorporate the calibration procedure and analyze the combined dataset of phase +#' I and phase II samples. +#' +#' @seealso \code{\link{predict.ah.2ph}} for prediction based on fitted additive +#' hazards model with two-phase sampling and \code{\link{nwtsco}} for the description +#' of nwtsco dataset. #' #' @references -#' Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications to Additive Hazards Models and -#' Epidemiologic Studies. Dissertation, University of Washington. +#' Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications to +#' Additive Hazards Models and Epidemiologic Studies. Dissertation, University of Washington. # #' @examples #' library(survival) -#' ### load data -#' nwts <- nwtsco[1:100,] -#' -#' ### create strata based on institutional histology and disease status -#' nwts$strt <- 1+nwts$instit -#' ### add a stratum containing all (relapsed) cases -#' nwts$strt[nwts$relaps==1] <- 3 -#' -#' ### assign phase II subsampling probabilities -#' ### oversample unfavorable histology (instit =1) and cases -#' ### Pi = 0.5 for instit =0, Pi =1 for instit =1 and relaps =1 -#' nwts$Pi<- 0.5 * (nwts$strt == 1) + 1 * (nwts$strt == 2) + 1 * (nwts$strt == 3) -#' -#' ### generate phase II sampling indicators -#' N <- dim(nwts)[1] -#' nwts$in.ph2 <- rbinom(N, 1, nwts$Pi) -#' #' ### fit an additive hazards model to two-phase sampling data without calibration -#' fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, -#' robust = FALSE, calibration.variables = NULL) +#' nwts2ph$trel <- nwts2ph$trel + runif(dim(nwts2ph)[1],0,1)*1e-8 +#' fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, ties = FALSE, data = nwts2ph, R = in.ph2, Pi = Pi, +#' robust = FALSE, calibration.variables = NULL) #' summary(fit1) #' -#' -#' ### fit an additve hazards model with calibration on age -#' fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, -#' robust = FALSE, calibration.variables = age) +#' ### fit an additve hazards model with calibration on age +#' fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, ties = FALSE, data = nwts2ph, R = in.ph2, Pi = Pi, +#' robust = FALSE, calibration.variables = "age") #' summary(fit2) #' #' ### calibrate on age square -#' ### note if users create a calibration variable, then +#' ### note if users create a calibration variable, then #' ### the new variable should be added to the original data frame -#' nwts$age2 <- nwts$age^2 -#' fit3 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, -#' robust = FALSE, calibration.variables = age2) +#' nwts2ph$age2 <- nwts2ph$age^2 +#' fit3 <- ah.2ph(Surv(trel,relaps) ~ age + histol, ties = FALSE, data = nwts2ph, R = in.ph2, Pi = Pi, +#' robust = FALSE, calibration.variables = "age2") #' summary(fit3) +#' +#' ############################################################################# +#' ### When phase II samples are obtained by finite Sampling ############# +#' ############################################################################# +#' +#' ### calculating the sample size for each straum +#' ### calculate the strata size +#' strt.size <- table(nwts2ph$strt) +#' ph2.strt.size <- table(subset(nwts2ph, in.ph2 == 1)$strt) +#' ### fit an additve hazards model with finite stratified sampling +#' ### calculate the sampling fractions +#' frac <- ph2.strt.size/strt.size +#' ### treating the problem as bernoulli sampling coupled with calibration on strata sizes +#' ### using frac as the sampling probilities +#' nwts2ph_by_FPSS <- nwts2ph +#' nwts2ph_by_FPSS$Pi <- NULL +#' for (i in 1:length(strt.size)){ +#' nwts2ph_by_FPSS$Pi[nwts2ph_by_FPSS$strt ==i] <- frac[i] +#' } +#' +#' ### create strt indicators, which become our calibration variables +#' for (i in 1:length(strt.size)){ +#' nwts2ph_by_FPSS$strt_ind <- as.numeric(nwts2ph_by_FPSS$strt ==i) +#' names(nwts2ph_by_FPSS)[ncol(nwts2ph_by_FPSS)]= paste0("strt", i) +#' } +#' ### fit an additve hazards model with finate sampling +#' fit4 <- ah.2ph(Surv(trel,relaps) ~ age + histol, +#' data = nwts2ph_by_FPSS, ties = FALSE, +#' R = in.ph2, Pi = Pi, +#' robust = FALSE, +#' calibration.variables = c("strt1","strt2","strt3")) +#' summary(fit4) + +ah.2ph <- function(formula, data, R, Pi, ties, robust = FALSE, + calibration.variables = NULL, ...) { + + # Z.pha2<-Z[R==1,] Y.pha2<-Y[R==1,] Creating ask + Call <- match.call() + R = data[, as.character(Call[["R"]])] + Pi = data[, as.character(Call[["Pi"]])] + calibration.variables = data[, calibration.variables] + Pi.pha2 <- Pi[R == 1] + data.pha2 <- data[R == 1, ] + wts.pha2 <- as.numeric(1/Pi.pha2) + data.pha2$wts.pha2 <- wts.pha2 + + if (!length(calibration.variables)) { + + # Use the new weights and fit the model to the data + # In ahaz, weights is extracted from data by calling the column name + # Thus the varible name assigned to weights has to to included in + # the column name of data + fit.A <- ah(formula, data = data.pha2, robust = robust, + weights = wts.pha2, ties = ties) + resid <- fit.A$resid + temp <- resid * sqrt(1 - Pi.pha2) + temp1 <- resid * sqrt(Pi.pha2) + } else { + aux <- as.matrix(calibration.variables) + P <- t(aux) %*% (aux) + aux.pha2 <- aux[R == 1, ] + wts.cal <- cook.wts.cal(aux = aux, aux.pha2 = aux.pha2, + P = P, wts.pha2 = wts.pha2) + data.pha2$wts.cal <- wts.cal + fit.A <- ah(formula, data = data.pha2, robust = robust, + weights = wts.cal, ties = ties) + resid <- fit.A$resid + temp1 <- resid * sqrt(1/wts.cal) + # multiplied by sqrt(wts.cal) because Qf= sum wts.cal*f + Q <- t(aux.pha2 * sqrt(wts.cal)) %*% (resid/sqrt(wts.cal)) + # and resid already weighted by wts.cal + resid.adj <- resid - (aux.pha2 %*% solve(P) %*% Q) * wts.cal + temp <- resid.adj * sqrt((1 - Pi.pha2)/(Pi.pha2 * wts.cal)) + + } + + var.pha1 <- fit.A$var + iA <- fit.A$iA + if (robust == TRUE) { + var.pha1 <- iA %*% t(temp1) %*% temp1 %*% iA + } + + var.pha2 <- iA %*% t(temp) %*% temp %*% iA + var.tot <- var.pha1 + var.pha2 + + fit <- NULL + fit$coef <- fit.A$coef + fit$var.pha1 <- var.pha1 + fit$var.pha2 <- var.pha2 + fit$var.tot <- var.pha1 + var.pha2 + fit$se <- sqrt(diag(var.tot)) + fit$Pi.pha2 <- Pi.pha2 + fit$wts.pha2 <- wts.pha2 + fit$calibration.variables <- calibration.variables + fit$R <- R + fit$call <- Call + fit$fit.pha1 <- fit.A + class(fit) <- "ah.2ph" + fit +} +############# calculate the new weight ##################################### -ah.2ph <- function(formula, data, R, Pi, robust=FALSE, calibration.variables=NULL){ - -#Z.pha2<-Z[R==1,] -#Y.pha2<-Y[R==1,] - ## Creating - # ask - Call <- match.call() - R = data[, as.character(Call[["R"]])] - Pi = data[, as.character(Call[["Pi"]])] - calibration.variables = data[, as.character(Call[["calibration.variables"]])] - Pi.pha2<-Pi[R==1] - wts.pha2<-as.numeric(1/Pi.pha2) - data.pha2<-data[R==1,] - - - - - - - if(!length(calibration.variables)){ - - #Use the new weights and fit the model to the data - fit.A<-ah(formula,data=data.pha2,robust=robust,weights=wts.pha2) - resid<-fit.A$resid - - temp<-resid*sqrt(1-Pi.pha2) - temp1<- resid*sqrt(Pi.pha2) - - - }else{ - - aux<-as.matrix(calibration.variables) - P<-t(aux)%*%(aux) - aux.pha2<-aux[R==1,] - - wts.cal<-cook.wts.cal(aux=aux,aux.pha2=aux.pha2,P=P,wts.pha2=wts.pha2) - wts.pha2<-wts.cal - - fit.A<-ah(formula,data=data.pha2,robust=robust,weights=wts.pha2) - resid<-fit.A$resid - temp1<- resid*sqrt(1/wts.pha2) - - - Q<- t(aux.pha2*sqrt(wts.cal))%*% (resid/sqrt(wts.cal)) # multiplied by sqrt(wts.cal) because Qf= sum wts.cal*f - # and resid already weighted by wts.cal - resid.adj<-resid-(aux.pha2%*%solve(P)%*% Q )*wts.cal - temp<-resid.adj*sqrt((1-Pi.pha2)/(Pi.pha2*wts.pha2)) - - } - - var.pha1 <- fit.A$var - iA<-fit.A$iA - if (robust ==TRUE){ - - var.pha1 <-iA%*%t(temp1)%*%temp1%*%iA - } - - -var.pha2 <- iA%*%t(temp)%*%temp%*%iA -var.tot <- var.pha1+var.pha2 - -fit <- NULL -fit$coef <- fit.A$coef -fit$var.pha1 <- var.pha1 -fit$var.pha2 <- var.pha2 -fit$var.tot <- var.pha1+var.pha2 -fit$se <- sqrt(diag(var.tot)) -fit$Pi.pha2 <- Pi.pha2 -fit$wts.pha2 <- wts.pha2 -fit$calibration.variables <- calibration.variables -fit$R <- R -fit$call <- Call -fit$fit.pha1 <- fit.A - +cook.wts.cal <- function(aux, aux.pha2, P, wts.pha2) { -class(fit) <- "ah.2ph" -fit -} + if (!is.matrix(aux.pha2)) + aux.pha2 <- as.matrix(aux.pha2) + aux.tot <- apply(aux, 2, sum) + aux.tot.pha2 <- apply(aux.pha2 * wts.pha2, 2, sum) + ### phase I total, 1 x q + L0 <- solve(P) %*% (aux.tot.pha2 - aux.tot) - ################################################################################################# - ################################# calculate the new weight ##################################### - ################################################################################################## - ################################################################################################## - -cook.wts.cal<-function(aux,aux.pha2,P,wts.pha2){ - - - - if(!is.matrix(aux.pha2)) aux.pha2<-as.matrix(aux.pha2) - aux.tot<-apply(aux,2,sum) - aux.tot.pha2<-apply(aux.pha2*wts.pha2,2,sum) - ### phase I total, 1 x q - - L0<-solve(P)%*%(aux.tot.pha2-aux.tot) - - - - - - model.calibration<-function(L){ - F<-NULL - wts.fish<-as.vector(exp(-aux.pha2%*%L)*wts.pha2) - for (i in 1:dim(aux)[2] ){ - F[i]<-sum(wts.fish*aux.pha2[,i])-aux.tot[i] - } - F - } - - eval<-multiroot(model.calibration,start=L0) - L<-eval$root - # est.acc<-eval$est.acc - wts.cal<-as.vector(exp(-aux.pha2%*%L)*wts.pha2) - return(wts.cal) - } - - + model.calibration <- function(L) { + F <- NULL + wts.fish <- as.vector(exp(-aux.pha2 %*% L) * wts.pha2) + for (i in 1:dim(aux)[2]) { + F[i] <- sum(wts.fish * aux.pha2[, i]) - aux.tot[i] + } + F + } + eval <- rootSolve::multiroot(model.calibration, start = L0) + L <- eval$root + # est.acc<-eval$est.acc + wts.cal <- as.vector(exp(-aux.pha2 %*% L) * wts.pha2) + return(wts.cal) +} diff -Nru addhazard-1.0.0/R/ah.fit.R addhazard-1.1.0/R/ah.fit.R --- addhazard-1.0.0/R/ah.fit.R 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/R/ah.fit.R 2017-03-20 20:49:56.000000000 +0000 @@ -0,0 +1,343 @@ +####################################################### +# Author: Kate HU +# Date: July 27th, 2016 +# Task: write function when assuming there may be ties +# Date: March 20, 2017 +# Task: change censor variable name to "event" +# To do: interval censoring +####################################################### +#' @import ahaz + +ah.fit<-function(Y,Z,wts,robust){ + + n.obs <- dim(Y)[1] + if(!is.vector(Z)) { + if(!is.matrix(Z)) warning ("Covariates Z has to be either a vector or a matrix") + n.par <- dim(Z)[2] + } else { + n.par <- 1 + } + + t.fail <- Y[,1] + event <- Y[,2] # 1 means death and 0 means censored + if (missing(wts) || is.null(wts)) wts <- rep(1, n.obs) + + scale(Z, center = rep(1,n.par),scale=F) + # some statistics about time points---------------------------------------- + # ordering time based on 1st: failure time and 2nd: events + new.order <- order(Y[,1], Y[,2]) + Y.ord <- Y[new.order, ] + # Y.ord[,2] is the ordered events + # Y.ord[,1] is the ordered time + if (is.vector(Z)){ + Z.ord <- Z[new.order] + wts.ord <- wts[new.order] + } + #add matrix later + + # count how many events at each sorted time + event.table <- tapply(Y.ord[,2], Y.ord[,1], sum) + # sorted unique time, either censoring or failure time + t.marks <- as.numeric(names(event.table)) + # number of time points + n.t.marks <- length(t.marks) + + # number of events at each time point + n.events.at.t.marks <- as.numeric(event.table) + + # count total Z for people who are leaving the risk set at each time point + Z.sum.lost.at.t.marks <- tapply(Z.ord * wts.ord, Y.ord[,1], sum) + # count total Z for people who have events at each time point + Z.ord.times.event <- Z.ord * wts.ord * Y.ord[,2] + Z.sum.at.event.at.t.marks <- tapply(Z.ord.times.event, Y.ord[,1], sum) + + # some statistics about time segements -------------------------------------- + # number of people at risk for each segment + # the first time segment includes all the individuals + n.in.dur <- c(n.obs, n.obs - tapply(Y.ord[,1] * wts.ord, Y.ord[,1], length)) + # count cumulative total Z lost for each duration + Z.cumsum.lost.in.dur <- cumsum(c(0, Z.sum.lost.at.t.marks)) + # count cumulative total Z for each duration + Z.cumsum.in.dur <- sum(Z) - Z.cumsum.lost.in.dur + + # divide the timeline to small intervals + t.diff <- t.marks- c(0,t.marks[-n.t.marks]) + + # Section 1: calculate eta(s) ------------------------------------------------ + # when Z is a vector + # n.in.dur for the last duration is 0 + eta <- Z.cumsum.in.dur[-(n.t.marks+1)]/n.in.dur[-(n.t.marks+1)] + eta.cum <- cumsum(eta * t.diff) + + # Section 2: calculate h(s) ------------------------------------------------ + + h <- n.events.at.t.marks/n.in.dur[-(n.t.marks+1)] + h.cum <- cumsum(h) + # Lambda starts from t1 and ends right before T + Lambda <- h.cum[-n.t.marks] - eta.cum[-1] * theta + + + +# time when fail events occur +tmp <- t.fail * event +t1.marks <- sort(tmp[tmp!=0]) + +# mark each person's event time on the timeline +sub.pos.on.t.marks <- match(t.fail, t.marks) + +# mark all the event times on the timeline +event.pos.on.t.marks <- match(t1.marks, t.marks) + + +eta.num <- eta.den <- matrix(NA, nrow=n.par, ncol = n.t.marks) +z.death <- matrix(NA, nrow=n.par, ncol = n.t.marks) + +n.death<-NULL +eta.den.vec<-NULL + +# Centralized the covariates +# Z <- scale( Z,center= rep(1,n.par),scale=F) + +# Divide the timeline to small intervals according to events or censoring + +t.sorted <- sort(t.fail) +t.unique <- unique(t.sorted) +# The length of eta +eta.ncol <- length(t.unique) +t.diff <- t.unique - c(0,t.unique[-eta.ncol]) + +# Section 1: calculate eta(t) ------------------------------------------------ +# eta(t): average Z for patients still at risk at time point t +# eta(t) only changes when a subject fails or censored, i.e., t.unique + + +### We calculate the numerator and the denomiators of eta separtedly +### We create the spaces for the numerator and the denomiators of eta +### Z_i is a n.par x 1 vector , therefore + + +### We calcuate the numerator and the denomiators of eta +### At each unique failure time, we calcuate for each covariate(age,sex,...) +### Then we calculate for all unique failure times +z.death <- eta.num <- eta.den <- matrix(data=NA, nrow=n.par, ncol = eta.ncol) +n.death<-NULL +eta.den.vec<-NULL +for ( i in seq_along(eta.ncol)){ + idx1 <- t.fail >= t.unique[i] + idx2 <- which(t.fail == t.unique[i]) + + n.death[i] <- sum((event*wts)[idx2]) + eta.den.vec[i]<-sum(wts[idx1]) + + for (j in 1:n.par){ + eta.num[j,i] <-sum((Z*wts)[idx1,j]) + + ########################## IF we calculate Lambda ############### + ### calculate the cumulative eta + ### The sum of Z of the subjests who have died at the jth dicrete time + + z.death[j,i]<-sum((Z[,j]*event*wts)[idx2]) + } + eta.den[,i]<-eta.den.vec[i] +} + + + +### Calculate eta, which is a n.par X eta.col matrix +dLambda1<-n.death/eta.den.vec +#print(dLambda1) +eta<-eta.num/eta.den +############################################################################### + +############################################################################### +###Calculate theta. It doens't dependent on t + +### Calculate the numerator of theta + + + +###Generate a new eta matrix(vector) corresponding to each observation such that +### eta.each.person_i is the average of Z for patients still at risk at +###the failure time of individual i + +### create the space fo this n.obs x n.par matrix (vector) + +#### Individuals + + +### find which +match.eta<-match(t, t.unique) +eta.i<-matrix(as.vector(eta[,match.eta]),ncol=n.par,byrow=TRUE) + + +###Calculate theta.numerator + + +# z.cen.1<-(Z-eta.i)*wts*event +A=fit$D + + + + +#theta.denominator<-Reduce("+",theta.denominator.timepoint) + +###Calculate theta.denominator + + +# A<-matrix(0,n.par,n.par) + +#for ( i in 1:eta.ncol){ + +# center<-Z[failure.time>= time.interval[i]] +# eta.each.person[failure.time>= time.interval[i]] + +# idx1<-t>= t.unique[i] +# rep.times<-sum(idx1) +# eta.stack<-matrix(rep(as.vector(eta[,i]),rep.times), byrow=TRUE,ncol=n.par) +# temp<-(Z[idx1,]-eta.stack)*sqrt(wts[idx1]) + +#A<- +#A+t(temp)%*%temp *t.diff[i] + +#} + + + + + +# theta.denominator<-Reduce("+",theta.denominator.timepoint) + +### Calculate theta +iA<-solve(A) +theta.num<-fit$d +theta<-iA%*%theta.num + + + +###Calculate the variance +###B part + + +B<-fit$B +var<-iA%*% B %*% iA + + +eta.cum<-as.matrix(apply(t(eta)*t.diff,2,cumsum)) + + + +#for (k in 1: n.obs){ +# int.upper<-match(t[k],t.unique) +# for (l in 1: int.upper){ +# temp<-(Z[k,]-eta[,l]) +# A.i[,,k] <-A.i[,,k]+temp%*%t(temp)*t.diff[l]*wts[k] +# z.cen.2[k,]<-z.cen.2[k,]+temp*dLambda1[l]*wts[k] + +# } + +# cumhaz<-function(fit){ +# eta.cum<-matrix(data=NA,nrow=fit$n.par,ncol=fit$eta.ncol) +# Lambda1<-rep(0,fit$eta.ncol) +# eta.cum[,1]<-fit$eta[,1]*fit$t.diff[1] +# Lambda1[1]<-fit$dLambda1[1] +# for ( i in 2:fit$eta.ncol){ +# eta.cum[,i]<-eta.cum[,i-1]+fit$eta[,i]*fit$t.diff[i] +# Lambda1[i]<-Lambda1[i-1]+ fit$dLambda1[i] +# } +# print(Lambda1) +# return(Lambda1-t(eta.cum)%*%fit$coef) +# +# } + +effects<-as.vector(Z%*%theta) + +fit<-list(coef=theta, + var=var, + A=A, + iA=iA, + eta=eta, + eta.cum=eta.cum, + match.eta=match.eta, + eta.i=eta.i, + eta.den.vec=eta.den.vec, + dLambda1=dLambda1, + n.par=n.par, + n.obs=n.obs, + z.death=z.death, + n.death=n.death, + eta.ncol=eta.ncol, + event=event, + time=t, + t.diff=t.diff, + t.unique=t.unique, + effects=effects, + robust=robust) + + +lambda0<-dLambda1-as.vector(t(eta)%*%theta*t.diff) +Lambda0<-cumsum(lambda0) + + + +M<-(event-Lambda0[match.eta]-effects*t) + + + +z.resid<-Z*M +eta.resid2<-apply(t(eta)*lambda0,2,cumsum) + + +eta.resid3<-eta.cum[match.eta,]*effects + + +eta.resid<-eta.i*event-eta.resid2[match.eta,]-eta.resid3 + +###nobs x n.par +z.cen.resid<-(z.resid-eta.resid) + +#A.i<-array(rep(0,n.par*n.par*n.obs),dim=c(n.par,n.par,n.obs)) +#for (k in 1: n.obs){ +# int.upper<-match(t[k],t.unique) +# for (l in 1: int.upper){ +# temp<-(Z[k,]-eta[,l]) +# A.i[,,k] <-A.i[,,k]+temp%*%t(temp)*t.diff[l]*wts[k] +# z.cen.2[k,]<-z.cen.2[k,]+temp*dLambda1[l]*wts[k] + +#} +#} + + +fit$Lambda0<-Lambda0 +fit$lambda0<-lambda0 + +fit$M<-M +fit$resid<-z.cen.resid + +#z.cen.3<-as.matrix(apply(A.i,3,function(s){s%*%fit$coef})) +#print(A.i[,,1]%*%theta) +#print(A.i[,,2]%*%theta) +#print(dim(z.cen.3)) +#print(dim(z.cen.2)) +#fit$z.cen<-(z.cen.1-z.cen.2-z.cen.3) + +#print(z.cen.2) +return(fit) +} + + +residuals.ah<-function (object, type = c("martingale", "pseudoscore", + "pseudodfbeta","martingale.increment")){ + + type <- match.arg(type) + if (type=="martingale") return(object$M) + if (type=="pseudoscore") return(object$resid) + if (type=="pseudodfbeta") return(object$iA%*%object$resid) + #if (type=="martingale.increment"){ + # dM<-matrix(0,nrow=object$n.obs,ncol=object$eta.ncol) + #for ( i in 1: object$eta.ncol){ + # dM[object$match.eta<=i,] <- (object$event-object$lambda0[i] + # -object$effects*object$t.diff[i])[object$match.eta<=i] + # } + #return(dM) + #} +} diff -Nru addhazard-1.0.0/R/ah.R addhazard-1.1.0/R/ah.R --- addhazard-1.0.0/R/ah.R 2015-08-17 17:05:33.000000000 +0000 +++ addhazard-1.1.0/R/ah.R 2017-03-20 20:40:58.000000000 +0000 @@ -1,206 +1,166 @@ -#' Fit Additive Hazards Regression Models -#' -#' Fit a semiparametric additive hazard model \deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} The -#' estimating procedures follow Lin & Ying (1994). -#' -#' @param formula a formula object for the regression model of the form response ~ predictors. -#' The outcome is a survival object created by \code{\link[survival]{Surv}}. +####################################################### +# Author: Kate HU +# Date: July 27th, 2017 +# Task: update documentation for breaking ties +# Date: March 20, 2017 +# Task: importFrom stats model.weight +# To do: Tie handling +####################################################### + + +#' Fit Additive Hazards Regression Models +#' +#' Fit a semiparametric additive hazard model +#' '\deqn{ \lambda(t|Z=z) = \lambda_0(t) + \beta'z.} +#' The estimating procedures follow Lin & Ying (1994). +#' +#' @param formula a formula object for the regression model of the form +#' response ~ predictors. The outcome is a survival object created by +#' \code{\link[survival]{Surv}}. #' @param data a data frame. Input dataset. -#' @param robust a logical variable. Robust standard errors are provided if robust == TRUE. +#' @param ties a logical variable. FALSE if there are no ties in the censored failure times. +#' @param robust a logical variable. Robust standard errors are provided if +#' robust == TRUE. #' @param weights a numeric vector. The weight of each observation. -#' @param ... additional arguments to be passed to the low level regression fitting functions (see below). +#' @param ... additional arguments to be passed to the low level regression +#' fitting functions. #' -#' @return An object of class "ah" representing the fit. +#' @return An object of class 'ah' representing the fit. #' #' @note #' The response variable is a survival object. If there are ties in the -#' survival time, users need to break the ties first. The regression model can be univariate or multivariate. This -#' package is built upon the function \code{\link[ahaz]{ahaz}} by Anders Gorst-Rasmussen. -#' +#' survival time, in the current version we recommend users to break ties by adding +#' a small random number to the survival time. An example is provided. The regression +#' model can be univariate or multivariate. This package is built upon the function +#' \code{\link[ahaz]{ahaz}} by Anders Gorst-Rasmussen. +#' #' -#' @seealso \code{\link{predict.ah}} for prediction based on fitted \code{\link{ah}} model, \code{\link{nwtsco}} for -#' the description of nwtsco dataset +#' @seealso \code{\link{predict.ah}} for prediction based on fitted +#' \code{\link{ah}} model, \code{\link{nwtsco}} for the description of nwtsco dataset #' #' @references -#' Lin, D.Y. & Ying, Z. (1994). Semiparametric analysis of the additive risk model. Biometrika; 81:61-71. +#' Lin, D.Y. & Ying, Z. (1994). Semiparametric analysis of the additive risk model. +#' Biometrika; 81:61-71. #' #' @importFrom survival Surv -#' @importFrom stats residuals model.matrix model.extract terms +#' @importFrom stats residuals model.weights model.matrix model.extract terms #' @export -#' #' #' @examples #' library(survival) -#' ### using the first 100 rows in nwtsco to build an additive hazards model +#' ### using the first 100 rows in nwtsco to build an additive hazards model #' nwts<- nwtsco[1:100,] -#' #' #' ### fit the additive hazards model to the data #' ### the model-based standard errors are reported when setting robust = FALSE -#' fit1 <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, robust = FALSE) +#' fit1 <- ah(Surv(trel,relaps) ~ age + instit, ties = FALSE, data = nwts, robust = FALSE) #' summary(fit1) #' -#' -#' ### fit the additive hazards model to the data with robust standard errors -#' fit2 <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, robust = TRUE) +#' ### fit the additive hazards model to the data with robust standard errors +#' fit2 <- ah(Surv(trel,relaps) ~ age + instit, ties = FALSE, data = nwts, robust = TRUE) #' summary(fit2) #' -#' -#' ### when there are ties, break the ties first +#' ### when there are ties, break the ties first #' nwts_all <- nwtsco #' nwts_all$trel <- nwtsco$trel + runif(dim(nwts_all)[1],0,1)*1e-8 -#' fit3 <- ah(Surv(trel,relaps) ~ age + instit, data = nwts_all, robust = TRUE) +#' fit3 <- ah(Surv(trel,relaps) ~ age + instit, ties = FALSE, data = nwts_all, robust = TRUE) +#' summary(fit3) -ah <- function(formula,data,robust,weights,...){ - - Call <- match.call() - indx <- match(c("formula", "data" ), - names(Call), nomatch = 0) - +ah <- function(formula, data, robust, weights, ties, ...) { - if (indx[1] == 0) - stop("A formula argument is required") + Call <- match.call() + indx <- match(c("formula", "data", "weights"), names(Call), nomatch = 0) + if (indx[1] == 0) + stop("A formula argument is required") temp <- Call[c(1, indx)] temp[[1]] <- as.name("model.frame") - temp$formula <- if (missing(data)) - terms(formula) - else terms(formula, data = data) - + temp$formula <- if (missing(data)) + terms(formula) else terms(formula, data = data) + mf <- eval(temp, parent.frame()) - if (nrow(mf) == 0) + if (nrow(mf) == 0) stop("No (non-missing) observations") Terms <- terms(mf) - - # weights <- model.weights(mf) - #print(weights[1:5]) -#print(length(weights)) -#print(extraArgs) - - -#if (length(extraArgs)) { - # controlargs <- names(formals(coxph.control)) - #indx <- pmatch(names(extraArgs), controlargs, nomatch = 0L) - # if (any(indx == 0L)) - # stop(gettextf("Argument %s not matched", names(extraArgs)[indx == - # 0L]), domain = NA) -#} - - Y <- model.extract(mf, "response") - if (!inherits(Y, "Surv")) - stop("Response must be a survival object") + if (!inherits(Y, "Surv")) + stop("Response must be a survival object") type <- attr(Y, "type") - if (type != "right" ) - stop(paste("additive hazard model doesn't support \"", type, "\" survival data", - sep = "")) + if (type != "right" && type != "counting") + stop(paste("additive hazard model doesn't support \"", + type, "\" survival data", sep = "")) + weights <- model.weights(mf) nobs <- nrow(Y) - X <- model.matrix(Terms, mf)[,-1] - - - - # if (storage.mode(Y) != "double") - # storage.mode(Y) <- "double" - # attr(X, "assign") <- Xatt$assign - # xlevels <- .getXlevels(Terms, mf) - - if (missing(robust)||is.null(robust)) - robust <- FALSE - if (missing(weights) || is.null(weights)) - weights <- rep(1, nobs) - #if(missing(ties)||is.null(ties)) - #stop(paste("declare whether there are ties in survival times")) - - - - #if (!ties){ - fit <- ahaz::ahaz(Y,X, weights= weights,robust=robust) - fit$coef<-summary(fit)$coef[,1] - fit$se<-summary(fit)$coef[,2] - - #fit$var<-fit$se^2 - fit$iA<-solve(fit$D) - fit$var<-fit$iA%*%fit$B%*%fit$iA - fit$resid<-residuals(fit) - fit$npar<-fit$nvar - - #} - #else{ - # fit<-ah.fit(X, Y, wts= weights,robust=robust) - #fit$resid<-residuals(fit,type="pseudoscore")*weights - #} - - - #if (!is.null(fit$coef) && any(is.na(fit$coef))) { - # vars <- (1:length(fit$coef))[is.na(fit$coefs)] - # msg <- paste("X matrix deemed to be singular; variable", - # paste(vars, collapse = " ")) - #if (singular.ok) - # warning(msg) - #else stop(msg) - #} - - - fit$weights <- weights - #fit$ties=ties - fit$robust=robust - - fit$terms <- Terms - fit$formula <- formula(Terms) - fit$model <- mf - fit$call <- Call - - fit$nevent <- sum(Y[, ncol(Y)]) - fit$nobs<-nobs - fit$data <- data - - - fit$x <- X - fit$y <- Y - - - - # fit$xlevels <- xlevels - #fit$assign <- assign - - #if (robust) { - - - - - # fit$naive.var <- fit$var - - - #temp<-residuals.ah(fit,type="pseudoscore") - # B.rob <- t(temp*weights) %*% (temp*weights) - # fit$var<-fit$iA%*%B.rob %*%fit$iA - # fit$B.rob<-B.rob - - #} - # fit2 <- c(fit, list(x = X, y = Y, weights = weights)) - - - # temp <- residuals.coxph(fit2, type = "dfbeta", - #weighted = TRUE) - # fit2$linear.predictors <- 0 * fit$linear.predictors - # temp0 <- residuals.coxph(fit2, type = "score", - #weighted = TRUE) - #} - #fit$var <- t(temp) %*% temp - #u <- apply(as.matrix(temp0), 2, sum) - #fit$rscore <- coxph.wtest(t(temp0) %*% temp0, u, - # control$toler.chol)$test - #} - # if (length(fit$coefficients) && is.null(fit$wald.test)) { - # nabeta <- !is.na(fit$coefficients) - # if (is.null(init)) - # temp <- fit$coefficients[nabeta] - #else temp <- (fit$coefficients - init[1:length(fit$coefficients)])[nabeta] - #fit$wald.test <- coxph.wtest(fit$var[nabeta, nabeta], - # temp, control$toler.chol)$test - #} - # na.action <- attr(mf, "na.action") - # if (length(na.action)) - # fit$na.action <- na.action - class(fit) <- "ah" - fit -} \ No newline at end of file + X <- model.matrix(Terms, mf)[, -1] + + + if (missing(robust) || is.null(robust)) + robust <- FALSE + if (missing(weights) || is.null(weights)) + weights <- rep(1, nobs) + + + if(missing(ties)||is.null(ties)) stop(paste( + 'declare whether there are ties in the survival time')) + if (ties == TRUE) stop(paste('break tie by adding a small random + number to the survival time. A recommended command runif(dim(data)[1],0,1)*1e-8))')) + + fit <- ahaz::ahaz(Y, X, weights = weights, robust = robust) + fit$coef <- summary(fit)$coef[, 1] + fit$se <- summary(fit)$coef[, 2] + + fit$iA <- solve(fit$D) + fit$var <- fit$iA %*% fit$B %*% fit$iA + fit$resid <- residuals(fit) + fit$npar <- fit$nvar + + # } else{ fit<-ah.fit(X, Y, wts= weights,robust=robust) + # fit$resid<-residuals(fit,type='pseudoscore')*weights } + + # if (!is.null(fit$coef) && any(is.na(fit$coef))) { vars <- + # (1:length(fit$coef))[is.na(fit$coefs)] msg <- paste('X + # matrix deemed to be singular; variable', paste(vars, + # collapse = ' ')) if (singular.ok) warning(msg) else + # stop(msg) } + + + fit$weights <- weights + # fit$ties=ties + fit$robust = robust + + fit$terms <- Terms + fit$formula <- formula(Terms) + fit$model <- mf + fit$call <- Call + + fit$nevent <- sum(Y[, ncol(Y)]) + fit$nobs <- nobs + fit$data <- data + + fit$x <- X + fit$y <- Y + + # fit$xlevels <- xlevels fit$assign <- assign + # if (robust) { + # fit$naive.var <- fit$var + # temp<-residuals.ah(fit,type='pseudoscore') B.rob <- + # t(temp*weights) %*% (temp*weights) fit$var<-fit$iA%*%B.rob + # %*%fit$iA fit$B.rob<-B.rob + # } fit2 <- c(fit, list(x = X, y = Y, weights = weights)) + # temp <- residuals.coxph(fit2, type = 'dfbeta', weighted = + # TRUE) fit2$linear.predictors <- 0 * fit$linear.predictors + # temp0 <- residuals.coxph(fit2, type = 'score', weighted = + # TRUE) } fit$var <- t(temp) %*% temp u <- + # apply(as.matrix(temp0), 2, sum) fit$rscore <- + # coxph.wtest(t(temp0) %*% temp0, u, + # control$toler.chol)$test } if (length(fit$coefficients) && + # is.null(fit$wald.test)) { nabeta <- + # !is.na(fit$coefficients) if (is.null(init)) temp <- + # fit$coefficients[nabeta] else temp <- (fit$coefficients - + # init[1:length(fit$coefficients)])[nabeta] fit$wald.test <- + # coxph.wtest(fit$var[nabeta, nabeta], temp, + # control$toler.chol)$test } na.action <- attr(mf, + # 'na.action') if (length(na.action)) fit$na.action <- + # na.action + class(fit) <- "ah" + fit + } diff -Nru addhazard-1.0.0/R/nwts2ph.generator.R addhazard-1.1.0/R/nwts2ph.generator.R --- addhazard-1.0.0/R/nwts2ph.generator.R 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/R/nwts2ph.generator.R 2017-03-20 17:36:16.000000000 +0000 @@ -0,0 +1,53 @@ +####################################################### +# Author: Kate HU +# Date: March 13th, 2017 +# Task: Generate nwts2ph example dataset +####################################################### +#' This file genreate the example dataset nwts2ph +#' importFrom("stats", "rbinom") +#' @param data the full cohort data +#' @param seed the random seed we use for generating this dataset +#' @importFrom stats rbinom +#' @export nwts2ph.generate +nwts2ph.generate <- function(data, seed = 20){ + # Create a hypothetical two-phase sampling (stratified sampling) dataset. + # In this dataset, only people who have relapse and some of the controls have + # their samples sent to the central laboratory for histology determination + set.seed(seed) + nwts2ph <- data + + # Institutional histology is examined in the local hospital thus + # it is reasonable to assume it is measured for all the samples. + # Central histology is more expensive to obtain since the samples have to be + # sent to the central laboratory and the work requires experienced lab scientists + # thus it is reasonable to assume only some samples were tested for central + # histology. Based on the outcomes and institutional histology, we determine who + # will be selected for central histology measurement, which follows two-phase sampling* + # framework. + + # create strata based on institutional histology and disease status + nwts2ph$strt <- 1 + nwts2ph$instit ### add a stratum containing all (relapsed) cases + nwts2ph$strt[nwts2ph$relaps == 1] <- 3 + + # phase II samples are obtained by Bernoulli Sampling + # we oversample unfavorable institutional histology (instit = 1) and cases + # selection probability Pi = 0.5 for instit = 0, Pi = 0.9 for instit = 1 and relaps = 1 + # assign phase II subsampling (Bernoulli) probabilities + nwts2ph$Pi <- 0.5 * (nwts2ph$strt == 1) + + 0.9 * (nwts2ph$strt == 2) + + 0.9 * (nwts2ph$strt == 3) + + # generate phase II sampling indicators + N <- dim(nwts2ph)[1] + nwts2ph$in.ph2 <- rbinom(N, 1, nwts2ph$Pi) + + # update the central histology variable. Those who were not selected + # into phase II subsamples are assumed not have central histology measurement + nwts2ph$histol[nwts2ph$in.ph2 == 0] <- NA + return(nwts2ph) +} + +# generate the data +# nwts2ph <- nwts2ph.generate(nwtsco) +# save the data +# devtools::use_data(nwts2ph) diff -Nru addhazard-1.0.0/R/nwts2ph.R addhazard-1.1.0/R/nwts2ph.R --- addhazard-1.0.0/R/nwts2ph.R 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/R/nwts2ph.R 2017-03-20 16:59:40.000000000 +0000 @@ -0,0 +1,60 @@ +#' An hypothetical two-phase sampling dataset based on \link{nwtsco} dataset from +#' the National Wilms Tumor Study (NWTS) +#' @format A data frame with 3915 rows and 15 variables: +#' \describe{ +#' We create a hypothetical two-phase sampling (stratified sampling) dataset. +#' In this dataset, only the subjects who have relapses and some of the controls have +#' their samples sent to the central laboratory for more accurate histology examination. +#' +#' Institutional histology is examined in the local hospital. It is usually available for +#' all the samples. Central histology is more expensive to obtain since the samples have to be +#' sent to the central laboratory and the work requires experienced lab scientists. +#' It is reasonable to assume only some samples were tested for central +#' histology. Following the two-phase sampling design, we selected subjects for central histology +#' measurements based on their outcomes and institutional histology results. +#' +#' \item{trel}{Time to relapse or last date seen (yr), continuous} +#' \item{tsur}{Time to death or last date seen (yr), continuous} +#' \item{relaps}{Indicator of relapse, +#' 0 = Alive no prior relapse when last seen, +#' 1 = Relapsed after trel years} +#' \item{dead}{Indicator of death, +#' 0 = Alive when last seen, +#' 1 = Died after tsur years} +#' \item{study}{NWTS study, +#' 3 = NWTS-3, +#' 4 = NWTS-4} +#'\item{stage}{Stage of disease, +#' 1=I, +#' 2=II, +#' 3=III, +#' 4=IV} +#' \item{histol}{Central Path histology, +#' 0 = Favorable (FH) and the subject is selected into the phase II subsample (in.ph2 = 1), +#' 1 = Unfavorable (UH) and the subject is selected into the phase II subsample (in.ph2 = 1), +#' NA = subject is NOT selected into the phase II subsample (in.ph2 = 1) } +#' \item{instit}{Institutional histology, +#' 0 = Favorable (FH), +#' 1 = Unfavorable (UH)} +#' \item{age}{Age at diagnosis (yr), continuous} +#' \item{yr}{Year of diagnosis, calendar year} +#' \item{specwgt}{Weight of tumor bearing specimen, in grams (continuous)} +#' \item{tumdiam}{Diameter of tumor, in centimeters (continuous)} +#' \item{strt}{Strata, +#' 1 = Unfavorable Institutional histology and no relapse, +#' 2 = favorable Institutional histology and no relapse, +#' 3 = relapse} +#' \item{Pi}{Sampling probability, +#' 0.5 for strata =1, +#' 0.9 for strata = 2, +#' 0.9 for strata = 3} +#' \item{in.ph2}{Phase II membership, +#' 1 = selected into the phase II subsample, +#' 0 = not selected into the phase II subsample} +#' } +#' @source +#' This dataset was created based on \link{nwtsco} dataset from the National Wilms Tumor Study (NWTS) +#' @docType data +#' @keywords datasets +#' @name nwts2ph +"nwts2ph" diff -Nru addhazard-1.0.0/R/nwtsco.R addhazard-1.1.0/R/nwtsco.R --- addhazard-1.0.0/R/nwtsco.R 2015-08-01 07:10:36.000000000 +0000 +++ addhazard-1.1.0/R/nwtsco.R 2017-03-20 16:59:42.000000000 +0000 @@ -1,4 +1,4 @@ -#' Dataset from the National Wilms Tumor Study (NWTS) +#' Dataset from the National Wilms Tumor Study (NWTS) #' @format A data frame with 3915 rows and 12 variables: #' \describe{ #' \item{trel}{Time to relapse orlast date seen (yr), continuous} @@ -6,18 +6,18 @@ #' \item{relaps}{Indicator of relapse, #' 0 = Alive no prior relapse when last seen, #' 1 = Relapsed after trel years} -#' \item{dead}{Indicator of death, +#' \item{dead}{Indicator of death, #' 0 = Alive when last seen, #' 1 = Died after tsur years} -#' \item{study}{NWTS study, +#' \item{study}{NWTS study, #' 3 = NWTS-3, #' 4 = NWTS-4} -#'\item{stage}{Stage of disease, +#'\item{stage}{Stage of disease, #' 1=I, #' 2=II, #' 3=III, #' 4=IV} -#' \item{histol}{Central Path histology, +#' \item{histol}{Central Path histology, #' 0 = Favorable (FH), #' 1 = Unfavorable (UH)} #' \item{instit}{Institutional histology, @@ -28,12 +28,11 @@ #' \item{specwgt}{Weight of tumor bearing specimen, in grams (continuous)} #' \item{tumdiam}{Diameter of tumor, in centimeters (continuous)} #' } -#' @source \url{http://faculty.washington.edu/norm/datasets/nwts-share.txt} +#' @source #' Originally used by M. Kulich and D.Y. Lin: -#' Improving the efficiency of relative-risk estimation in case-cohort studies. J Amer Statis -#' Assoc 99:832-844, 2004. +#' Improving the efficiency of relative-risk estimation in case-cohort studies. +#' J Amer Statis Assoc 99:832-844, 2004. #' @docType data #' @keywords datasets #' @name nwtsco "nwtsco" - diff -Nru addhazard-1.0.0/R/pred.2ph.R addhazard-1.1.0/R/pred.2ph.R --- addhazard-1.0.0/R/pred.2ph.R 2015-08-17 17:44:25.000000000 +0000 +++ addhazard-1.1.0/R/pred.2ph.R 2016-11-08 07:31:50.000000000 +0000 @@ -1,28 +1,30 @@ -#' Prediction Based on the Additive Hazards Model Fitted from Two-phase Sampling -#' -#' This function predicts a subject's overall hazard rates at given time points based on this subject's covariate -#' values. The prediction function is an object from \code{\link{ah.2ph}}. The estimating procedures follow Hu (2014). -#' -#' @param object an object of class inhering from "ah.2ph". -#' @param newdata a dataframe of an individual's predictors. -#' @param newtime a given sequence of time points at which the prediction is performed. +#' Prediction Based on the Additive Hazards Model Fitted from Two-phase Sampling +#' +#' This function predicts a subject's overall hazard rates at given time points +#' based on this subject's covariate values. The prediction function is an object +#' from \code{\link{ah.2ph}}. The estimating procedures follow Hu (2014). +#' +#' @param object an object of class inhering from 'ah.2ph'. +#' @param newdata a dataframe of an individual's predictors. +#' @param newtime a given sequence of time points at which the prediction is performed. #' @param ... further arguments passed to or from other methods. -#' -#' @return A dataframe including the given time points, predicted hazards, their standard errors, -#' their variances, the phase I component of the variance for predicted hazards -#' and the phase II component of the variance. #' -#' @seealso \code{\link{ah.2ph}} for fitting the additive hazards model with two-phase sampling and -#' \code{\link{nwtsco}} for the description of nwtsco dataset +#' @return A dataframe including the given time points, predicted hazards, their +#' standard errors, their variances, the phase I component of the variance for +#' predicted hazards and the phase II component of the variance. +#' +#' @seealso \code{\link{ah.2ph}} for fitting the additive hazards model with two-phase +# sampling and \code{\link{nwtsco}} for the description of nwtsco dataset #' #' @importFrom survival Surv #' @importFrom stats delete.response model.matrix terms #' @export #' #' @references -#' Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications to Additive Hazards Models and -#' Epidemiologic Studies. Dissertation, University of Washington. -# +#' Jie Hu (2014) A Z-estimation System for Two-phase Sampling with Applications +#' to Additive Hazards Models and Epidemiologic Studies. Dissertation, +#' University of Washington. +#' #' @examples #' library(survival) #' ### load data @@ -35,7 +37,7 @@ #' #' ### assign phase II subsampling probabilities #' ### oversample unfavorable histology (instit =1) and cases -#' ### Pi = 0.5 for instit =0, Pi =1 for instit =1 and relaps =1 +#' ### Pi = 0.5 for instit =0, Pi =1 for instit =1 and relaps =1 #' nwts$Pi<- 0.5 * (nwts$strt == 1) + 1 * (nwts$strt == 2) + 1 * (nwts$strt == 3) #' #' ### generate phase II sampling indicators @@ -43,183 +45,196 @@ #' nwts$in.ph2 <- rbinom(N, 1, nwts$Pi) #' #' ### fit an additive hazards model to two-phase sampling data without calibration -#' fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, robust = FALSE) -#' -#' -#' +#' fit1 <- ah.2ph(Surv(trel,relaps) ~ age + histol, +#' data = nwts, +#' ties = FALSE, +#' R = in.ph2, Pi = Pi, +#' robust = FALSE) +#' #' ### input the new data for prediction -#' newdata <- nwtsco[101,] +#' newdata <- nwtsco[101,] #' ### based on the fitted model fit1, perform prediction at time points t =3 and t= 5 #' predict(fit1, newdata, newtime = c(3,5)) #' #' ### fit an additve hazards model to two-phase sampling data with calibration #' ### The calibration variable is stage -#' fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, -#' robust = FALSE, calibration.variables = stage) -#' +#' fit2 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, +#' ties = FALSE, robust = FALSE, calibration.variables = "stage") +#' #' ### based on the fitted model fit2, perform prediction at time points t =3 and t= 5 #' predict(fit2, newdata, newtime = c(3,5)) +#' +#' \dontrun{ +#' ### The calibration variable is stage, when set robust = TRUE +#' fit3 <- ah.2ph(Surv(trel,relaps) ~ age + histol, data = nwts, R = in.ph2, Pi = Pi, +#' ties = FALSE, robust = TRUE, calibration.variables = "stage") +#' +#' ### based on the fitted model fit2, perform prediction at time points t =3 and t= 5 +#' predict(fit3, newdata, newtime = c(3,5)) +#' } +predict.ah.2ph <- function(object, newdata, newtime, ...) { + # phase I complete data object + object1 <- object$fit.pha1 + ######################### creat model matrix for the new data ########## + tt <- terms(object1) + if (!inherits(object, "ah.2ph")) { + warning("calling predict.2ph() ...") + } - -predict.ah.2ph<-function(object, newdata, newtime,...){ - - # phase I complete data object - object1 <-object$fit.pha1 - - ######################### creat model matrix for the new data ########## - tt <- terms(object1) - if (!inherits(object, "ah.2ph")){ - warning("calling predict.2ph() ...") - } - - Terms <- delete.response(tt) + Terms <- delete.response(tt) # identify factor variable - factor.var.list<- names(object1$model)[sapply(object1$model,is.factor) == T] - #assign levels to newdata variable according to the original data - if (length(factor.var.list) != 0){ - for (i in 1:length(factor.var.list)){ - factor.var <- factor.var.list[i] - newdata[[factor.var]] <- factor(newdata[[factor.var]], - levels = levels(object1$model[[factor.var]])) - } + factor.var.list <- names(object1$model)[sapply(object1$model, + is.factor) == T] + # assign levels to newdata variable according to the + # original data + if (length(factor.var.list) != 0) { + for (i in 1:length(factor.var.list)) { + factor.var <- factor.var.list[i] + newdata[[factor.var]] <- factor(newdata[[factor.var]], + levels = levels(object1$model[[factor.var]])) + } } # retrieve the new model matrix based on the new data - pred <- model.matrix(Terms, newdata) + pred <- model.matrix(Terms, newdata) # delete the intercept - pred <- as.numeric(pred[,-1]) - ################################################################################### - - - - - - - - - - - - Pi.pha2<-object$Pi.pha2 - ## No calibration - if (object1$robust){ - print("If the additive hazards model does not hold, then stop prediction based on this model. No robust errors with be provided ") - }else{ - if(!length(object$calibration.variables)){ - - - ingr<-ingredients(object1) - - ############################ find the positions of a list of given newtime on the original timeline###################### - t.pos <- NULL - t.unique<-ingr$t.unique - - for (i in 1:length(newtime)){ - # newtime s is less than t_1, then we report - if (newtime[i] < t.unique[1]){ - print(paste("The data used for building the ah model does not have enough information for predicting such a small t, t=", newtime[i]) ) - }else{ - - # find the position of newtime[i] on the original timeline - c <- rank(c(newtime[i],t.unique),ties.method="max")[1] - #when newtime s equals one of the t.unique t_k, the above rank function returns k+1 on the original timelines - #when newtime s in between t_k and t_{k+1}, it returns k+1 on the original timelines - #when newtime s = t_1, it returns 1 - # thus - t.pos[i] <- ifelse(c==1,1,c-1) - - - } - } + pred <- as.numeric(pred[, -1]) + ########################################################################### - ### predicted outcomes are calculated based on fitting an ah model to only phase II data using assigned weights - pred.result <- predict.ah(object1, newdata, newtime, level = 0.95) - L <- pred.result$L - L.var.pha1 <- pred.result$L.se^2 - - - # Calculate the phase II variance - L.resid <- do.call("cook.L.resid", c(ingr, list(time.pos = ingr$match.event))) + Pi.pha2 <- object$Pi.pha2 + if (object1$robust) { + stop("When you set robust = FALSE, you assume the additive hazards model + is not the true model. Prediction based on this model is no longer valid. + We stop the prediction process. ") + } else { + ## No calibration + if (!length(object$calibration.variables)) { + pred.result <- predict.ah(object1, newdata, newtime, + level = 0.95) + L <- pred.result$L + L.var.pha1 <- pred.result$L.se^2 + + ### calculate the variance due to phase II sampling + ingr <- ingredients(object1) + + ############################ find the positions of a list of given newtime on the + ############################ original timeline###################### + t.pos <- NULL + t.unique <- ingr$t.unique + + for (i in 1:length(newtime)) { + + + # newtime s is less than t_1, then we report + if (newtime[i] < t.unique[1]) { + print(paste("The data used for building the ah model does not have + enough information for predicting such a small t, t=", + newtime[i])) + } else { + + # find the position of newtime[i] on the original timeline + c <- rank(c(newtime[i], t.unique), ties.method = "max")[1] + # when newtime s equals one of the t.unique t_k, the above + # rank function returns k+1 on the original timelines when + # newtime s in between t_k and t_{k+1}, it returns k+1 on + # the original timelines when newtime s = t_1, it returns 1 + # thus + t.pos[i] <- ifelse(c == 1, 1, c - 1) + + + } + } + + ### predicted outcomes are calculated based on fitting an ah + ### model to only phase II data using assigned weights - - ingr$wts <- sqrt(1-Pi.pha2) - L.var.pha2<-do.call("L.rvar", c(ingr, object1, list(t.pos =t.pos, pred =pred, newtime=newtime, L.resid = L.resid))) - - }else{ - #Calibration - calibration.variables<-object$calibration.variables - - wts.pha2<-as.numeric(1/Pi.pha2) - aux<-as.matrix(calibration.variables) - P<-t(aux)%*%(aux) - aux.pha2<-aux[object$R==1,] - wts.cal<-cook.wts.cal(aux=aux,aux.pha2=aux.pha2,P=P,wts.pha2=wts.pha2) - wts.pha2<-wts.cal - - - #using the calibrated refit the additive hazards model - object1<-ah(object1$formula, object1$data, weights=wts.pha2, robust=object1$robust) - - - - ingr<-ingredients(object1) - - t.pos <- NULL - t.unique<-ingr$t.unique - - for (i in 1:length(newtime)){ - # newtime s is less than t_1, then we report - if (newtime[i] < t.unique[1]){ - print(paste("The data used for building the ah model does not have enough information for predicting such a small t, t=", newtime[i]) ) - }else{ - - # find the position of newtime[i] on the original timeline - c <- rank(c(newtime[i],t.unique),ties.method="max")[1] - #when newtime s equals one of the t.unique t_k, the above rank function returns k+1 on the original timelines - #when newtime s in between t_k and t_{k+1}, it returns k+1 on the original timelines - #when newtime s = t_1, it returns 1 - # thus - t.pos[i] <- ifelse(c==1,1,c-1) - - - } - } - ### predicted outcomes are calculated based on fitting an ah model to only phase II data using calibrated weights - pred.result <- predict.ah(object1, newdata, newtime, level = 0.95) - L <- pred.result$L - - #####################Calculate the variance of the predicted value - #See page 109 of Jie Hu' thesis for the detailed formula - - ### Calculate the 1st component of the variance - L.var.pha1 <- pred.result$L.se^2 - - ## retrieve the martingale integral, i.e. part of \psi in the formula on page 109 of Jie Hu' thesis - - L.resid <- do.call("cook.L.resid", c(ingr, list(time.pos = ingr$match.event))) - - L.var.pha2<-do.call("L.rvar.calibration", c(ingr, object1, list(t.pos =t.pos, pred =pred, newtime=newtime, L.resid = L.resid, - new.wts=sqrt((1-Pi.pha2)/(wts.cal*Pi.pha2)), aux.pha2=aux.pha2, P=P, wts.cal=wts.cal))) - } - } + # Calculate the phase II variance + L.resid <- do.call("cook.L.resid", c(ingr, list(time.pos = ingr$match.event))) - #L.var.pha2<-L.rvar(match.event=match.event,new.wts=sqrt(1-Pi.pha2),L.ncol=L.ncol, eta.cum=eta.cum, resid=resid, - #L.resid=L.resid,iA=iA, B=B) - - L.var=L.var.pha1+L.var.pha2 - - L.se = sqrt(L.var) - + ingr$wts <- sqrt(1 - Pi.pha2) + L.var.pha2 <- do.call("L.rvar", c(ingr, object1, + list(t.pos = t.pos, pred = pred, newtime = newtime, + L.resid = L.resid))) + + } else { + # Calibration + + pred.result <- predict.ah(object1, newdata, newtime, + level = 0.95) + L <- pred.result$L + ### Calculate the 1st component of the variance + L.var.pha1 <- pred.result$L.se^2 + + + + wts.cal <- object1$weights + ingr <- ingredients(object1) + + aux<-as.matrix(object$calibration.variables) + aux.pha2<-aux[object$R==1,] + P<-t(aux)%*%(aux) + + + t.pos <- NULL + t.unique <- ingr$t.unique + + for (i in 1:length(newtime)) { + # newtime s is less than t_1, then we report + if (newtime[i] < t.unique[1]) { + print(paste("The data used for building the ah model does not + have enough information for predicting such a small t, t=", + newtime[i])) + } else { + + # find the position of newtime[i] on the original timeline + c <- rank(c(newtime[i], t.unique), ties.method = "max")[1] + # when newtime s equals one of the t.unique t_k, the above + # rank function returns k+1 on the original timelines when + # newtime s in between t_k and t_{k+1}, it returns k+1 on + # the original timelines when newtime s = t_1, it returns 1 + # thus + t.pos[i] <- ifelse(c == 1, 1, c - 1) + + + } + } + + + + ###Calculate the variance of the predicted value See page 109 #### + #### of Jie Hu' thesis for the detailed formula ####### + + + + ## retrieve the martingale integral, i.e. part of \psi in + ## the formula on page 109 of Jie Hu' thesis + + L.resid <- do.call("cook.L.resid", c(ingr, list(time.pos = ingr$match.event))) + + L.var.pha2 <- do.call("L.rvar.calibration", c(ingr, + object1, list(t.pos = t.pos, pred = pred, newtime = newtime, + L.resid = L.resid, new.wts = sqrt((1 - Pi.pha2)/(wts.cal * + Pi.pha2)), aux.pha2 = aux.pha2, P = P, wts.cal = wts.cal))) + } + } + + + + # L.var.pha2<-L.rvar(match.event=match.event,new.wts=sqrt(1-Pi.pha2),L.ncol=L.ncol, + # eta.cum=eta.cum, resid=resid, L.resid=L.resid,iA=iA, B=B) - return(data.frame(L=L, L.se=L.se, L.var=L.var,L.var.pha1=L.var.pha1, L.var.pha2=L.var.pha2)) + L.var = L.var.pha1 + L.var.pha2 + L.se = sqrt(L.var) + return(data.frame(L = L, L.se = L.se, L.var = L.var, L.var.pha1 = L.var.pha1, + L.var.pha2 = L.var.pha2)) } diff -Nru addhazard-1.0.0/R/pred.ah.R addhazard-1.1.0/R/pred.ah.R --- addhazard-1.0.0/R/pred.ah.R 2015-08-17 17:37:57.000000000 +0000 +++ addhazard-1.1.0/R/pred.ah.R 2016-11-08 07:22:04.000000000 +0000 @@ -1,156 +1,134 @@ -#' Prediction Based on the Fitted Additive Hazards Model +#' Prediction Based on the Fitted Additive Hazards Model #' -#' This function predicts a subject's overall hazard rates at given time points based on this subject's covariate -#' values. The prediction function is an additive hazards model fitted using \code{\link{ah}}. -#' +#' This function predicts a subject's overall hazard rates at given time points +#' based on this subject's covariate values. The prediction function is an additive +#' hazards model fitted using \code{\link{ah}}. #' #' @param object an object of class inhering from \code{\link{ah}}. -#' @param newdata a dataframe of an individual's predictors. -#' @param newtime a given sequence of time points at which the prediction is performed. +#' @param newdata a dataframe of an individual's predictors. +#' @param newtime a given sequence of time points at which the prediction is performed. #' The time should be on the same scale as the survival time in \code{\link[survival]{Surv}}. #' @param ... further arguments passed to or from other methods. #' #' @return A dataframe including the time points for prediction, predicted values and their standard errors. #' -#' @seealso \code{\link{ah}} for fitting the additive hazards model, \code{\link{nwtsco}} for +#' @seealso \code{\link{ah}} for fitting the additive hazards model, \code{\link{nwtsco}} for #' the description of nwtsco dataset #' #' @importFrom survival Surv #' @importFrom stats delete.response model.matrix terms #' @export #' -#' -#' -#' #' @examples #' library(survival) -#' ### fit the additive hazards model to the data +#' ### fit the additive hazards model to the data #' nwts<- nwtsco[1:100,] -#' fit <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, robust = FALSE) -#' -#' ### see the covariate names in the prediction function +#' fit <- ah(Surv(trel,relaps) ~ age + instit, data = nwts, ties = FALSE, robust = FALSE) +#' +#' ### see the covariate names in the prediction function #' fit$call #' ### the newdata should be a dataframe -#' ### the variable names are the same as the covariate names in +#' ### the variable names are the same as the covariate names in #' ### the prediction function -#' newdata <- data.frame(age=60, instit =1) +#' newdata <- data.frame(age=60, instit =1) +#' +#' ### an alternative way to give the newdata +#' newdata <- nwtsco[101,] #' -#' ### an alternative way to give the newdata -#' newdata <- nwtsco[101,] -#' #' ### based on this subject's covariate values, the function predicts individual specific -#' ### hazard rates at time points 3 and 5 +#' ### hazard rates at time points 3 and 5 #' predict(fit, newdata, newtime = c(3,5)) -#' - - - - +predict.ah <- function(object, newdata, newtime, ...) { + # get all the middle step results obtained in the model + # fitting step + ingr <- ingredients(object) + + # names(ingredients.list) [1] 'Z' 't.fail' 'censor' 'n.obs' + # 'wts' [6] 't.unique' 't.diff' 't1.unique' 'n.par' 'theta' + # [11] 'effects' 'eta.ncol' 'match.eta' 'eta' 'eta.den.vec' + # [16] 'eta.cum' 'idx.c' 'match.event' 'n.death' 'z.death' + # [21] 'dLambda1' 'lambda0' 'Lambda0' + + tt <- terms(object) + if (!inherits(object, "ah")) + warning("calling predict.ah() ...") + # if (missing(newdata) || is.null(newdata)){ pred <- + # object$data$X #predictor matrix pred <- as.matrix(pred) + # }else{ retrieve the columns in newdata that match the + # names of the predictors in the model - - -predict.ah <- function(object, newdata, newtime,...){ - - # get all the middle step results obtained in the model fitting step - ingr <-ingredients(object) - - - #names(ingredients.list) - #[1] "Z" "t.fail" "censor" "n.obs" "wts" - #[6] "t.unique" "t.diff" "t1.unique" "n.par" "theta" - #[11] "effects" "eta.ncol" "match.eta" "eta" "eta.den.vec" - #[16] "eta.cum" "idx.c" "match.event" "n.death" "z.death" - #[21] "dLambda1" "lambda0" "Lambda0" - - tt <- terms(object) - if (!inherits(object, "ah")) - warning("calling predict.ah() ...") - #if (missing(newdata) || is.null(newdata)){ - # pred <- object$data$X #predictor matrix - # pred <- as.matrix(pred) - # }else{ - ## retrieve the columns in newdata that match the names of the predictors in the model - - - - Terms <- delete.response(tt) + Terms <- delete.response(tt) # identify factor variable - factor.var.list<- names(object$model)[sapply(object$model,is.factor) == T] - #assign levels to newdata variable according to the original data - if (length(factor.var.list) != 0){ - for (i in 1:length(factor.var.list)){ - factor.var <- factor.var.list[i] - newdata[[factor.var]] <- factor(newdata[[factor.var]], - levels = levels(object$model[[factor.var]])) - } + factor.var.list <- names(object$model)[sapply(object$model, + is.factor) == T] + # assign levels to newdata variable according to the + # original data + if (length(factor.var.list) != 0) { + for (i in 1:length(factor.var.list)) { + factor.var <- factor.var.list[i] + newdata[[factor.var]] <- factor(newdata[[factor.var]], + levels = levels(object$model[[factor.var]])) + } } - - pred <- model.matrix(Terms, newdata) - #contrasts.arg = lapply(newdata[sapply(newdata, is.factor) ], constrats, , contrasts=FALSE - pred <- as.numeric(pred[,-1]) - - # predicted additive effect + + pred <- model.matrix(Terms, newdata) + # contrasts.arg = lapply(newdata[sapply(newdata, is.factor) + # ], constrats, , contrasts=FALSE + pred <- as.numeric(pred[, -1]) + + # predicted additive effect effect <- sum(pred * object$coef) - - # middle step results for obtaining predicted outcomes and variances - #b <- do.call("cook.basics",ingredients.list) - - - #cumulated baseline hazards - #Lambda0 <- b$Lambda0 - - # search the location of the newtime in the original timelines in the fitted model + # middle step results for obtaining predicted outcomes and + # variances b <- do.call('cook.basics',ingredients.list) + + # cumulated baseline hazards Lambda0 <- b$Lambda0 + # search the location of the newtime in the original + # timelines in the fitted model t.pos <- NULL - t.unique<-ingr$t.unique - + t.unique <- ingr$t.unique + # Store the predicted value L <- NULL - - for (i in 1:length(newtime)){ - # newtime s is less than t_1, then we report - if (newtime[i] < t.unique[1]){ - print(paste("The data used for building the ah model does not have enough information for predicting such a small t, t=", newtime[i],".") ) - print(paste("remove, t=", newtime[i]," and run the prediction again") ) - }else{ - - # find the position of newtime[i] on the original timeline - c <- rank(c(newtime[i],t.unique),ties.method="max")[1] - #when newtime s equals one of the t.unique t_k, the above rank function returns k+1 on the original timelines - #when newtime s in between t_k and t_{k+1}, it returns k+1 on the original timelines - #when newtime s = t_1, it returns 1 - # thus - t.pos[i] <- ifelse(c==1,1,c-1) - # The predicted valued - # the additive part I - L[i] <- effect*newtime[i] + ingr$Lambda0[t.pos[i]] - - } - } - - L.var <-NULL - if (!object$robust){ - # variance - # L.var<-do.call("L.nvar", c(ingredients.list, list(n.death=n.death, z.death=z.death, eta.den.vec=eta.den.vec, eta=eta, - # dLambda1=dLambda1, eta.cum=eta.cum, iA=object$iA, B=object$var, time.pos=t.pos, bhaz=FALSE, newtime=newtime))) - L.var<-do.call("L.nvar", c(ingr, object, list(t.pos= t.pos, pred = pred, newtime = newtime))) - - - }else{ - L.resid <-do.call("cook.L.resid", c(ingr, list(time.pos = ingr$match.event))) - # robust variance - L.var<-do.call("L.rvar", c(ingr, object, list(t.pos =t.pos, pred =pred, newtime=newtime, L.resid = L.resid))) + for (i in 1:length(newtime)) { + # newtime s is less than t_1, then we report + if (newtime[i] < t.unique[1]) { + print(paste("The data used for building the ah model does not have enough + information for predicting such a small t, t=", + newtime[i], ".")) + print(paste("remove, t=", newtime[i], " and run the prediction again")) + } else { + + # find the position of newtime[i] on the original timeline + c <- rank(c(newtime[i], t.unique), ties.method = "max")[1] + # when newtime s equals one of the t.unique t_k, the above + # rank function returns k+1 on the original timelines when + # newtime s in between t_k and t_{k+1}, it returns k+1 on + # the original timelines when newtime s = t_1, it returns 1 + # thus + t.pos[i] <- ifelse(c == 1, 1, c - 1) + # The predicted valued the additive part I + L[i] <- effect * newtime[i] + ingr$Lambda0[t.pos[i]] + + } + } + L.var <- NULL + if (!object$robust) { + # variance L.var<-do.call('L.nvar', c(ingredients.list, + # list(n.death=n.death, z.death=z.death, + # eta.den.vec=eta.den.vec, eta=eta, dLambda1=dLambda1, + # eta.cum=eta.cum, iA=object$iA, B=object$var, + # time.pos=t.pos, bhaz=FALSE, newtime=newtime))) + L.var <- do.call("L.nvar", c(ingr, object, list(t.pos = t.pos, + pred = pred, newtime = newtime))) + } else { + L.resid <- do.call("cook.L.resid", c(ingr, list(time.pos = ingr$match.event))) + # robust variance + L.var <- do.call("L.rvar", c(ingr, object, list(t.pos = t.pos, + pred = pred, newtime = newtime, L.resid = L.resid))) } L.se <- sqrt(L.var) - return(data.frame(time = newtime, L= L, L.se = L.se)) - + return(data.frame(time = newtime, L = L, L.se = L.se)) } - - - - - - - diff -Nru addhazard-1.0.0/R/summary.R addhazard-1.1.0/R/summary.R --- addhazard-1.0.0/R/summary.R 2015-08-15 00:29:45.000000000 +0000 +++ addhazard-1.1.0/R/summary.R 2016-11-07 03:33:52.000000000 +0000 @@ -1,62 +1,53 @@ #' @importFrom stats printCoefmat pnorm - -summary.ah <- function(object, ...){ - options(digits =5) - ci.lower <- object$coef - 1.96*object$se - ci.upper <- object$coef + 1.96*object$se +#' @export +summary.ah <- function(object, ...) { + options(digits = 5) + ci.lower <- object$coef - 1.96 * object$se + ci.upper <- object$coef + 1.96 * object$se z <- object$coef/object$se - p.value <- 2*pnorm(-abs(z)) - TAB <- cbind( coef = object$coef, se = object$se, lower.95 = ci.lower, upper.95= ci.upper, z = z, p.value = p.value ) - - res <- list(call=object$call, coefficients=TAB) - class(res) <- "summary.ah" - - res - - + p.value <- 2 * pnorm(-abs(z)) + TAB <- cbind(coef = object$coef, se = object$se, lower.95 = ci.lower, + upper.95 = ci.upper, z = z, p.value = p.value) + rownames(TAB) = colnames(object$model)[-1] + res <- list(call = object$call, coefficients = TAB) + class(res) <- "summary.ah" + res } #' @export -print.summary.ah<- function(x, digits = max(getOption("digits") - 4, 4), signif.stars = getOption("show.signif.stars"),...){ - cat("Call:\n") - print(x$call) - cat("\n") - printCoefmat(x$coefficients, digits = digits, signif.stars = signif.stars, - ...) - +print.summary.ah <- function(x, digits = max(getOption("digits") - + 4, 4), signif.stars = getOption("show.signif.stars"), ...) { + cat("Call:\n") + print(x$call) + cat("\n") + printCoefmat(x$coefficients, digits = digits, signif.stars = signif.stars, + P.values = TRUE, has.Pvalue=TRUE, ...) } #' @export -summary.ah.2ph <- function(object,...){ - options(digits =5) - ci.lower <- object$coef - 1.96*object$se - ci.upper <- object$coef + 1.96*object$se +summary.ah.2ph <- function(object, ...) { + options(digits = 5) + ci.lower <- object$coef - 1.96 * object$se + ci.upper <- object$coef + 1.96 * object$se z <- object$coef/object$se - p.value <- 2*pnorm(-abs(z)) - TAB <- cbind( coef = object$coef, se = object$se, lower.95 = ci.lower, upper.95= ci.upper, z = z, p.value = p.value) - - res <- list(call=object$call, coefficients=TAB) - class(res) <- "summary.ah" - - res - - + p.value <- 2 * pnorm(-abs(z)) + TAB <- cbind(coef = object$coef, se = object$se, lower.95 = ci.lower, + upper.95 = ci.upper, z = z, p.value = p.value) + rownames(TAB) = colnames(object$model)[-1] + res <- list(call = object$call, coefficients = TAB) + class(res) <- "summary.ah" + res } -# print.summary.ah.2ph<- function(object, ...){ -# cat("Call:\n") -# print(object$call) -# cat("\n") -# printCoefmat(object$coefficients, P.value=TRUE, has.Pvalue=TRUE, digits = 5) -# } + #' @export -print.summary.ah.2ph<- function(x, digits = max(getOption("digits") - 4, 4), signif.stars = getOption("show.signif.stars"),...){ - cat("Call:\n") - print(x$call) - cat("\n") - printCoefmat(x$coefficients, digits = digits, signif.stars = signif.stars, - ...) - -} \ No newline at end of file +print.summary.ah.2ph <- function(x, digits = max(getOption("digits") - + 4, 4), signif.stars = getOption("show.signif.stars"), ...) { + cat("Call:\n") + print(x$call) + cat("\n") + printCoefmat(x$coefficients, digits = digits, signif.stars = signif.stars, P.values = TRUE, + has.Pvalue=TRUE, ...) +} diff -Nru addhazard-1.0.0/R/tools.R addhazard-1.1.0/R/tools.R --- addhazard-1.0.0/R/tools.R 2015-08-09 22:05:35.000000000 +0000 +++ addhazard-1.1.0/R/tools.R 2016-08-30 04:52:31.000000000 +0000 @@ -2,36 +2,32 @@ ### Cook all the intertermediate results that will be used in various functions ingredients<-function(object){ - - - - Z<-object$x + Z<-object$x Y<-object$y t.fail<-Y[,1] censor<-Y[,2] if(!is.matrix(Z))Z=as.matrix(Z) - - -### for code testing purposes - # failure.time<-test.data[,1] - #censor<-test.data[,2] - #Z<-test.data[,c(3,4)] - # Force the covariate input to be a matrix - # Z<-as.matrix(Z) - - ### number of parameters , number of observtaions - n.par<-object$npar - n.obs<-object$nobs - wts<-object$weights - - theta<-object$coef - effects<-as.vector(Z%*%theta) - ### Centralized the covariates - # Z <- scale( Z,center= rep(1,n.par),scale=F) - - ### Devide the timeline to small intervals according to - ### the occurence of events or censoring - + + ### for code testing purposes + # failure.time<-test.data[,1] + #censor<-test.data[,2] + #Z<-test.data[,c(3,4)] + # Force the covariate input to be a matrix + # Z<-as.matrix(Z) + + ### number of parameters , number of observtaions + n.par<-object$npar + n.obs<-object$nobs + wts<-object$weights + + theta<-object$coef + effects<-as.vector(Z%*%theta) + ### Centralized the covariates + # Z <- scale( Z,center= rep(1,n.par),scale=F) + + ### Devide the timeline to small intervals according to + ### the occurence of events or censoring + ordered<-sort(t.fail) t.unique<-unique(ordered) eta.ncol<-length(t.unique) @@ -44,11 +40,10 @@ # mark all the failure time on the timeline match.eta<-match(t.fail, t.unique) - + # mark all the event time on the timeline match.event<- match(t1.unique, t.unique) - z.death <- eta.num <- eta.den <-matrix(NA, nrow=n.par, ncol=eta.ncol) n.death<-NULL eta.den.vec<-NULL @@ -64,63 +59,72 @@ for (j in 1:n.par){ eta.num[j,i] <-sum((Z*wts)[idx1,j]) z.death[j,i]<-sum((Z[,j]*censor*wts)[idx2]) - } + } } eta<-eta.num/eta.den - # because the eta for the first interval is 0, thus the eta.cum starts from 0 at t_1 and then eta*(t_2-t_1)... - eta.new <- cbind(rep(0, n.par), eta[, -eta.ncol]) + # because the eta for the first interval is 0, thus the eta.cum starts from 0 at t_1 and then eta*(t_2-t_1)... + eta.new <- cbind(rep(0, n.par), eta[, -eta.ncol]) eta.cum <- as.matrix(apply(t(eta.new)*t.diff,2,cumsum)) # Note eta.cum is eta.ncol x n.par while eta is n.par x eta.ncol dLambda1 <- n.death/eta.den.vec lambda0 <- dLambda1-as.vector(t(eta.new)%*%theta*t.diff) # See equation (3.8) in Jie Hu's thesis. This is the results for the points at failure times in the original data - # we do not need new data to caculated this quantity and its variance + # we do not need new data to caculated this quantity and its variance Lambda0 <- cumsum(lambda0) + return(list(Z=Z, + t.fail=t.fail, + censor=censor, + n.obs=n.obs, + wts =wts, + t.unique=t.unique, + t.diff=t.diff, + t1.unique=t1.unique, #time related + n.par=n.par,theta=theta, + effects=effects, #coef related + eta.ncol=eta.ncol, + match.eta=match.eta, + eta=eta, + eta.den.vec=eta.den.vec, + eta.cum=eta.cum, #weighted predictors + idx.c=idx.c, + match.event=match.event, + n.death=n.death, + z.death=z.death, #events related results + dLambda1=dLambda1, + lambda0=lambda0, + Lambda0=Lambda0 #baseline hazards related results + ) + ) + invisible() +} - - return(list(Z=Z, t.fail=t.fail, censor=censor, n.obs=n.obs, wts =wts, #observations - t.unique=t.unique, t.diff=t.diff, t1.unique=t1.unique, #time related - n.par=n.par,theta=theta, effects=effects, #coef related - eta.ncol=eta.ncol, match.eta=match.eta, eta=eta, eta.den.vec=eta.den.vec, eta.cum=eta.cum, #weighted predictors - idx.c=idx.c, match.event=match.event, n.death=n.death, z.death=z.death, #events related results - dLambda1=dLambda1, lambda0=lambda0, Lambda0=Lambda0 #baseline hazards related results - )) - invisible() - } - - - - -### estimate the model-based variance for predicted values +### estimate the model-based variance for predicted values L.nvar<-function(t.pos= t.pos, pred = pred, newtime=newtime, n.par=n.par,n.death=n.death,z.death=z.death, t.unique =t.unique, - eta.den.vec=eta.den.vec, eta=eta, dLambda1=dLambda1, eta.cum=eta.cum, iA=iA, var = var, ...){ - - - # See equation (3.42) on p.97 of Jie Hu's thesis for the variance formula - # step 0: find the location of s in the original timeline - - t.mark <- t.unique[t.pos] # The closed time point in the original data timeline + eta.den.vec=eta.den.vec, eta=eta, dLambda1=dLambda1, eta.cum=eta.cum, iA=iA, var = var, ...){ + + # See equation (3.42) on p.97 of Jie Hu's thesis for the variance formula + # step 0: find the location of s in the original timeline + t.mark <- t.unique[t.pos] # The closed time point in the original data timeline - # calculate s - t.mark, the last intervla - last.interval <- newtime - t.mark + # calculate s - t.mark, the last intervla + last.interval <- newtime - t.mark - #step 1 + #step 1 #calculate \int_0^t.mark PdN(t)/(PY(t)^2) - + ## L.var1 = P\int_0^{s}\frac{1}{P^2Y(t)}Y(t)\left\{d\Lambda_0(t)+Z^T\theta_0dt\right\}. ## \{d\Lambda_0(t)+Z^T\theta_0dt is replaced by dN(t) L.var1<-cumsum((1/(eta.den.vec^2)*n.death)[t.pos]) - L.cov1 <- L.cov2 <- rep(0,n.par) L.var <- L.var2 <- L.cov <- NULL - - for ( i in 1:length(t.pos)){ + for ( i in 1:length(t.pos)){ + idx <- t.pos[i] #step 2 @@ -128,140 +132,132 @@ #calculate \int_0^t.mark PdN(t)/PY(t) eta(t) #\{d\Lambda_0(t)+Z^T\theta_0dt is replaced by dN(t) - # note a different approach is to remove the component involving $dLambda_0(t)$, since the two terms cancel out. - # z.death[j,i]<-sum((Z[,j]*censor*wts)[idx2]) + # note a different approach is to remove the component involving $dLambda_0(t)$, since the two terms cancel out. + # z.death[j,i]<-sum((Z[,j]*censor*wts)[idx2]) # eta.den.vec[i]<-sum(wts[idx1]) # the increment is = PZdN(t)/PY(t) - + L.cov1 <- L.cov1+z.death[,idx]/eta.den.vec[idx] # L.cov2 = P\int_0^{\tau}\frac{PY(t)Z}{PY(t)}\frac{Y(t)}{PY(t)}\{d\Lambda_0(t)+Z^T\theta_0dt\} # dLambda1<-n.death/eta.den.vec # eta = sum((Z*wts)[idx1,j])/sum(wts[idx1]) # the increment is PdN(t)}/PY(t)}*\frac{PZY(t)}{PY(t)} + L.cov2 <- L.cov2+ dLambda1[idx]*eta[,idx] - + #step 3 calculate the variance and covariance - # calcuate D(s) where D(s) = D(t.mark[i]) + last.interval[s]*eta[,idx] + # calcuate D(s) where D(s) = D(t.mark[i]) + last.interval[s]*eta[,idx] # Note eta.cum is eta.ncol x n.par while eta is n.par x eta.ncol + Ds <- eta.cum[idx, ] + last.interval[i] * eta[, idx] - #calcuate zs + #calcuate zs # pred: n.obs x n.par + zs <-pred * newtime[i] - zs.scaled <- as.numeric(zs-Ds) + zs.scaled <- as.numeric(zs-Ds) + # L.var2(s) = \left\{zs-D(s)\right\}^TA^{-1}BA^{-1}\left\{zs-D(s)\right\} - L.var2[i]<-sum((var %*% zs.scaled)* zs.scaled) + L.var2[i]<-sum((var %*% zs.scaled)* zs.scaled) # L.cov[i](s) = \left\{zs-D(s)\right\}^TA^{-1}(L.cov1-L.cov2) L.cov[i]<-sum(iA%*%(L.cov1-L.cov2) *zs.scaled) - L.var[i] <- L.var1[i] + L.var2[i] + 2*L.cov[i] - - } - + } return(L.var) invisible() - } ### This is not useful because it does't make sense to perform prediction if we don't assume the model -### However it will be used when estimating the two-phase estimators -L.rvar<-function(newtime = newtime, pred = pred, t.pos=t.pos, wts=wts, t.unique=t.unique, eta.cum=eta.cum, eta=eta, resid=resid, L.resid=L.resid, ### resid is an output from fitted ah model +### However it will be used when estimating the two-phase estimators +L.rvar<-function(newtime = newtime, pred = pred, t.pos=t.pos, wts=wts, t.unique=t.unique, eta.cum=eta.cum, eta=eta, resid=resid, L.resid=L.resid, ### resid is an output from fitted ah model iA = iA, n.obs = n.obs, ...){ + + t.mark <- t.unique[t.pos] # The closed time point in the original data timeline - t.mark <- t.unique[t.pos] # The closed time point in the original data timeline + # calculate s - t.mark, the last intervla + last.interval <- newtime - t.mark - # calculate s - t.mark, the last intervla - last.interval <- newtime - t.mark + L.var<-NULL + L.var1<-L.var2<-matrix(0, nrow=n.obs, ncol=length(t.pos)) - L.var<-NULL - L.var1<-L.var2<-matrix(0, nrow=n.obs, ncol=length(t.pos)) + # See equantion before (3.42) for the formula for calculating the robust variance of predicted outcome + # Let L.var1 = \int_0^{s}dM(t)/PY(t) and L.var2 = (zs-D(s)_A^{-1}\int_{0}^{\tau}[Z-\bar{Z}]dM(t) + # I calcuate the robust variance by (L.var1- L.var2)^2 + for ( i in 1:length(t.pos)){ - # See equantion before (3.42) for the formula for calculating the robust variance of predicted outcome - # Let L.var1 = \int_0^{s}dM(t)/PY(t) and L.var2 = (zs-D(s)_A^{-1}\int_{0}^{\tau}[Z-\bar{Z}]dM(t) - # I calcuate the robust variance by (L.var1- L.var2)^2 - for ( i in 1:length(t.pos)){ + idx <- t.pos[i] - idx <- t.pos[i] - - L.var1[,i]<-L.resid[,i] #L.var1 is the L.resid at the closest t.fail on the left - - #calculate L.var2 - # calcuate D(s) where D(s) = D(t.mark[i]) + last.interval[s]*eta[,idx] - # Note eta.cum is eta.ncol x n.par while eta is n.par x eta.ncol - Ds <- eta.cum[idx, ] + last.interval[i] * eta[, idx] - - #calcuate zs - # pred: n.obs x n.par - zs <-pred * newtime[i] - zs.scaled <- as.numeric(zs-Ds) + L.var1[,i]<-L.resid[,i] #L.var1 is the L.resid at the closest t.fail on the left - L.var2[,i]<- resid %*% iA%*%zs.scaled + #calculate L.var2 + # calcuate D(s) where D(s) = D(t.mark[i]) + last.interval[s]*eta[,idx] + # Note eta.cum is eta.ncol x n.par while eta is n.par x eta.ncol + Ds <- eta.cum[idx, ] + last.interval[i] * eta[, idx] - L.var[i]<-sum(((L.var1[,i]+L.var2[,i])*wts)^2) - } - return(L.var) - } + #calcuate zs + # pred: n.obs x n.par + zs <-pred * newtime[i] + zs.scaled <- as.numeric(zs-Ds) + L.var2[,i]<- resid %*% iA%*%zs.scaled + L.var[i]<-sum(((L.var1[,i]+L.var2[,i])*wts)^2) + } + return(L.var) +} - - # Calculating \int_0^{s}dM(t)/PY(t) for each individual - cook.L.resid<- function(time.pos=time.pos, match.eta=match.eta, n.obs=n.obs, - eta.den.vec=eta.den.vec, lambda0=lambda0,t1.unique =t1.unique, t.diff=t.diff,t.fail=t.fail, censor=censor, - effects=effects,wts=wts,...){ - - # I treat M(t) as a step function, - - L.resid<-matrix(0, nrow=n.obs,ncol=length(time.pos)) - eta.den.lambda0<-cumsum(lambda0/eta.den.vec) - eta.den.cum<-cumsum(t.diff/eta.den.vec) - - # match.eta<-match(t.fail, t.unique) - eta.den.i<-eta.den.vec[match.eta] - #eta.den.lambda0.i<-eta.den.lambda0[match.eta] - - #eta.den.cum.i<-eta.den.cum[match.eta] - l<-length(time.pos) - L1<-rep(0,l) - L2<-eta.den.lambda0[time.pos] - L3<-eta.den.cum[time.pos] - - # calculate L.resid for each individual at each observed time point - for ( k in 1: n.obs){ - L.resid1<-L1 - L.resid2<-L2 - L.resid3<-effects[k]*L3 - - a<-which(t1.unique >=t.fail[k])[1] - if(!is.na(a)){ - idx5<-a - - # L.resid1 <- int_0^{s}dN_i(t)/PY(t) - L.resid1[idx5:l]<- censor[k]/eta.den.i[k] # when t< T_i, L.resid = 0; whent >=T_i L.resid is always - #the same since N_i(t) is an indicator function - - #L. resid2 <- int_0^{s} Y(t)d\Lambda(t)/PY(t) = int_0^{t.mark} Y(t)d\Lambda(t)/PY(t) - L.resid2[idx5:l]<-L.resid2[idx5] # when s < T_i, L.resid2= int_0^{s} dLambda(t)/PY(t), - # based on our estimators for Lambda(t), dLambda(t) =0 for any t between two marks - # when s > T_i L.resid2= int_0^{T_i} dLambda(t)/PY(t), - #L. resid3<- int_0^{s} Y(t)Z^T\theta/PY(t)dt = nt_0^{s} Y(t)Z^T\theta/PY(t)dt - - L.resid3[idx5:l]<-L.resid3[idx5] # think dt as Delta(t) which only have values at t.failures - - } - - - L.resid[k,]<-(L.resid1-L.resid2-L.resid3)*wts[k] - } - return(L.resid) - invisible() +# Calculating \int_0^{s}dM(t)/PY(t) for each individual +cook.L.resid<- function(time.pos=time.pos, match.eta=match.eta, n.obs=n.obs, + eta.den.vec=eta.den.vec, lambda0=lambda0,t1.unique =t1.unique, t.diff=t.diff,t.fail=t.fail, censor=censor, + effects=effects,wts=wts,...){ + + # I treat M(t) as a step function, + L.resid<-matrix(0, nrow=n.obs,ncol=length(time.pos)) + + eta.den.lambda0<-cumsum(lambda0/eta.den.vec) + eta.den.cum<-cumsum(t.diff/eta.den.vec) + + # match.eta<-match(t.fail, t.unique) + eta.den.i<-eta.den.vec[match.eta] + #eta.den.lambda0.i<-eta.den.lambda0[match.eta] + + #eta.den.cum.i<-eta.den.cum[match.eta] + l<-length(time.pos) + L1<-rep(0,l) + L2<-eta.den.lambda0[time.pos] + L3<-eta.den.cum[time.pos] + + # calculate L.resid for each individual at each observed time point + for ( k in 1: n.obs){ + L.resid1<-L1 + L.resid2<-L2 + L.resid3<-effects[k]*L3 + + a<-which(t1.unique >=t.fail[k])[1] + if(!is.na(a)){ + idx5<-a + + # L.resid1 <- int_0^{s}dN_i(t)/PY(t) + L.resid1[idx5:l]<- censor[k]/eta.den.i[k] # when t< T_i, L.resid = 0; whent >=T_i L.resid is always + #the same since N_i(t) is an indicator function + #L. resid2 <- int_0^{s} Y(t)d\Lambda(t)/PY(t) = int_0^{t.mark} Y(t)d\Lambda(t)/PY(t) + L.resid2[idx5:l]<-L.resid2[idx5] # when s < T_i, L.resid2= int_0^{s} dLambda(t)/PY(t), + # based on our estimators for Lambda(t), dLambda(t) =0 for any t between two marks + # when s > T_i L.resid2= int_0^{T_i} dLambda(t)/PY(t), + #L. resid3<- int_0^{s} Y(t)Z^T\theta/PY(t)dt = nt_0^{s} Y(t)Z^T\theta/PY(t)dt + L.resid3[idx5:l]<-L.resid3[idx5] # think dt as Delta(t) which only have values at t.failures + } + L.resid[k,]<-(L.resid1-L.resid2-L.resid3)*wts[k] } + return(L.resid) + invisible() +} @@ -272,112 +268,86 @@ ################################################################################################## cook.wts.cal<-function(aux,aux.pha2,P,wts.pha2){ - - - - - if(!is.matrix(aux.pha2)) aux.pha2<-as.matrix(aux.pha2) - aux.tot<-apply(aux,2,sum) - aux.tot.pha2<-apply(aux.pha2*wts.pha2,2,sum) - ### phase I total, 1 x q - - L0<-solve(P)%*%(aux.tot.pha2-aux.tot) - - - - - - model.calibration<-function(L){ - F<-NULL - wts.fish<-as.vector(exp(-aux.pha2%*%L)*wts.pha2) - for (i in 1:dim(aux)[2] ){ - F[i]<-sum(wts.fish*aux.pha2[,i])-aux.tot[i] - } - F - } - - eval<-rootSolve::multiroot(model.calibration,start=L0) - L<-eval$root - # est.acc<-eval$est.acc - wts.cal<-as.vector(exp(-aux.pha2%*%L)*wts.pha2) - return(wts.cal) - invisible() + if(!is.matrix(aux.pha2)) aux.pha2<-as.matrix(aux.pha2) + aux.tot<-apply(aux,2,sum) + aux.tot.pha2<-apply(aux.pha2*wts.pha2,2,sum) + ### phase I total, 1 x q + + L0<-solve(P)%*%(aux.tot.pha2-aux.tot) + + model.calibration<-function(L){ + F<-NULL + wts.fish<-as.vector(exp(-aux.pha2%*%L)*wts.pha2) + for (i in 1:dim(aux)[2] ){ + F[i]<-sum(wts.fish*aux.pha2[,i])-aux.tot[i] + } + F } - - + eval <- rootSolve::multiroot(model.calibration,start=L0) + L <- eval$root + # est.acc<-eval$est.acc + wts.cal<-as.vector(exp(-aux.pha2%*%L)*wts.pha2) + return(wts.cal) + invisible() +} -#### Calculate the phase II calibrated variance for predicted values +#### Calculate the phase II calibrated variance for predicted values L.rvar.calibration<-function(pred = pred, newtime = newtime, t.pos=t.pos, t.unique, eta.cum=eta.cum, eta = eta, resid=resid, L.resid=L.resid, - iA=iA, n.obs =n.obs, new.wts=new.wts, aux.pha2=aux.pha2, P=P, wts.cal=wts.cal, ...){ - - - ## See page 109 of Jie Hu' thesis for the detailed formula - ## L.var2 = Q[\sqrt{\frac{1-\pi(V)}{\pi(V)} } * (\psi - Q {\psi \tilda{V}) * Q[\tilda{V}\tilda{V}^{-1}] * \tilda{V}})]^2 - ## I devide \psi to two parts \psi = L. resid + resid%*%iA%*% zs.scaled - ## Then sqrt(L.var2 )= new.weights * (L.var1 +L.var2) = new.weights [Q1 +Q2 *%*%iA%*% zs.scaled ] - ## Q1 = L.resid - QL.resid \tilde{V} * Q[\tilda{V}\tilda{V}^{-1}] * \tilda{V}}) - ## Q2 = resid - Qresid \tilde{V} * Q[\tilda{V}\tilda{V}^{-1}] * \tilda{V}}) - - ## Note expectation Qf= sum wts.cal*f - - # Q<- t(aux.pha2*sqrt(wts.cal))%*% (resid/sqrt(wts.cal)) - # resid.adj<-resid-(aux.pha2%*%solve(P)%*% Q )*wts.cal - #temp<-resid.adj*sqrt((1-Pi.pha2)/(Pi.pha2*wts.pha2)) - - t.mark <- t.unique[t.pos] # The closed time point in the original data timeline - - # calculate s - t.mark, the last intervla - last.interval <- newtime - t.mark - - L.var<-NULL - L.var1<-L.var2<-matrix(0, nrow=n.obs,ncol=length(t.pos)) - - ## Q2: q x n X n x1 = q x1 - Q2<- t(aux.pha2*sqrt(wts.cal))%*% (resid/sqrt(wts.cal)) # resid are already mutiplied by wts.cal - # multiplied by sqrt(wts.cal) because Qf= sum wts.cal*f - - resid.adjust <- resid-(aux.pha2%*%solve(P)%*% Q2 )*wts.cal ### the 2nd term * wts.cal because resid are already weighted by wts.cal - - for (i in 1:length(t.pos)){ - - idx<-t.pos[i] - # Calculate Q1 - ## q x n times nx1 - Q1<- t(aux.pha2*sqrt(wts.cal))%*% (L.resid[,i]/sqrt(wts.cal)) # multiplied by sqrt(wts.cal) because Qf= sum wts.cal*f - # L.resid are already weighted by wts.cal - - ## L.resid[,i]: nx1, aux.pha2: n x q , P: q x q, Q1: qx1 - L.var1[,i] <- L.resid[,i]-(aux.pha2%*%solve(P)%*% Q1 ) * wts.cal ### the 2nd term * wts.cal because L.resid[,i] are already weighted by wts.cal + iA=iA, n.obs =n.obs, new.wts=new.wts, aux.pha2=aux.pha2, P=P, wts.cal=wts.cal, ...){ + ## See page 109 of Jie Hu' thesis for the detailed formula + ## L.var2 = Q[\sqrt{\frac{1-\pi(V)}{\pi(V)} } * (\psi - Q {\psi \tilda{V}) * Q[\tilda{V}\tilda{V}^{-1}] * \tilda{V}})]^2 + ## I devide \psi to two parts \psi = L. resid + resid%*%iA%*% zs.scaled + ## Then sqrt(L.var2 )= new.weights * (L.var1 +L.var2) = new.weights [Q1 +Q2 *%*%iA%*% zs.scaled ] + ## Q1 = L.resid - QL.resid \tilde{V} * Q[\tilda{V}\tilda{V}^{-1}] * \tilda{V}}) + ## Q2 = resid - Qresid \tilde{V} * Q[\tilda{V}\tilda{V}^{-1}] * \tilda{V}}) + + ## Note expectation Qf= sum wts.cal*f + + # Q<- t(aux.pha2*sqrt(wts.cal))%*% (resid/sqrt(wts.cal)) + # resid.adj<-resid-(aux.pha2%*%solve(P)%*% Q )*wts.cal + #temp<-resid.adj*sqrt((1-Pi.pha2)/(Pi.pha2*wts.pha2)) - ### Calculate zs.scaled - # calcuate D(s) where D(s) = D(t.mark[i]) + last.interval[s]*eta[,idx] - # Note eta.cum is eta.ncol x n.par while eta is n.par x eta.ncol - Ds <- eta.cum[idx, ] + last.interval[i] * eta[, idx] - - #calcuate zs - # pred: n.obs x n.par - zs <-pred * newtime[i] - zs.scaled <- as.numeric(zs-Ds) - - L.var2[,i]<- resid.adjust %*%iA%*%zs.scaled - - + t.mark <- t.unique[t.pos] # The closed time point in the original data timeline - ## because Qf= sum wts.cal*f and L.var1 and L.var are a product something and wts.cal - ## Thus new.wts=sqrt((1-Pi.pha2)/(wts.cal*Pi.pha2)) + # calculate s - t.mark, the last intervla + last.interval <- newtime - t.mark - L.var[i]<-sum(((L.var1[,i]+L.var2[,i])*new.wts)^2) - + L.var<-NULL + L.var1<-L.var2<-matrix(0, nrow=n.obs,ncol=length(t.pos)) + ## Q2: q x n X n x1 = q x1 + Q2<- t(aux.pha2*sqrt(wts.cal))%*% (resid/sqrt(wts.cal)) # resid are already mutiplied by wts.cal + # multiplied by sqrt(wts.cal) because Qf= sum wts.cal*f + resid.adjust <- resid-(aux.pha2%*%solve(P)%*% Q2 )*wts.cal ### the 2nd term * wts.cal because resid are already weighted by wts.cal + for (i in 1:length(t.pos)){ + idx<-t.pos[i] + # Calculate Q1 + ## q x n times nx1 + Q1<- t(aux.pha2*sqrt(wts.cal))%*% (L.resid[,i]/sqrt(wts.cal)) # multiplied by sqrt(wts.cal) because Qf= sum wts.cal*f + # L.resid are already weighted by wts.cal + ## L.resid[,i]: nx1, aux.pha2: n x q , P: q x q, Q1: qx1 + L.var1[,i] <- L.resid[,i]-(aux.pha2%*%solve(P)%*% Q1 ) * wts.cal ### the 2nd term * wts.cal because L.resid[,i] are already weighted by wts.cal - } + ### Calculate zs.scaled + # calcuate D(s) where D(s) = D(t.mark[i]) + last.interval[s]*eta[,idx] + # Note eta.cum is eta.ncol x n.par while eta is n.par x eta.ncol + Ds <- eta.cum[idx, ] + last.interval[i] * eta[, idx] + #calcuate zs + # pred: n.obs x n.par + zs <-pred * newtime[i] + zs.scaled <- as.numeric(zs-Ds) + L.var2[,i]<- resid.adjust %*%iA%*%zs.scaled - return(L.var) - invisible() -} + ## because Qf= sum wts.cal*f and L.var1 and L.var are a product something and wts.cal + ## Thus new.wts=sqrt((1-Pi.pha2)/(wts.cal*Pi.pha2)) + L.var[i]<-sum(((L.var1[,i]+L.var2[,i])*new.wts)^2) + } + return(L.var) + invisible() +} diff -Nru addhazard-1.0.0/README.md addhazard-1.1.0/README.md --- addhazard-1.0.0/README.md 1970-01-01 00:00:00.000000000 +0000 +++ addhazard-1.1.0/README.md 2017-03-20 20:54:28.000000000 +0000 @@ -0,0 +1,12 @@ +# addhazard + +addhazard 1.1.0 updates: + +1. add a two-phase sampling example dataset, nwts.2ph.rda, to the data folder +2. add R file nwts.2ph.generator.R to generate the nwts.2ph example dataset +3. update documentations +4. add rownames to the summary stat where there is only one covariate +5. clean the typo in ah.2ph() on updating wts.2ph +6. add importFrom stats model.weight to ah.R file +7. add ah.fit.R but currently surppress the use of this function. + It will be activated in the later version.