-
Notifications
You must be signed in to change notification settings - Fork 0
/
ShupingR-thesis.bib
963 lines (956 loc) · 84.3 KB
/
ShupingR-thesis.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@comment{model}
@article{odemodel,
author = {Zhao, Yufan and Kosorok, Michael R. and Zeng, Donglin},
doi = {10.1002/sim.3720.Reinforcement},
file = {:Users/shuping.ruan/Dropbox/rldm/reference/main-ref/sim-model.pdf:pdf},
issn = {1097-0258},
journal = {Statistics in Medicine},
keywords = {adaptive design,clinical trials,decision problems,dynamic treatment regime,extremely randomized trees,multi-stage,multistage decision problems,non-small cell lung cancer,optimal policy,reinforcement learning,support vector,support vector regression},
mendeley-groups = {rldm},
number = {26},
pages = {3294--3315},
title = {{Reinforcement learning design for cancer clinical trials}},
url = {http://dx.doi.org/10.1002/sim.3720},
volume = {28},
year = {2010}
}
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@comment{policy evaluation, lpsi}
@article{Lagoudakis2003,
abstract = {We propose a new approach to reinforcement learning for control problems which com- bines value-function approximation with linear architectures and approximate policy iter- ation. This new approach is motivated by the least-squares temporal-difference learning algorithm (LSTD) for prediction problems, which is known for its efficient use of sample experiences compared to pure temporal-difference algorithms. Heretofore, LSTD has not had a straightforward application to control problems mainly because LSTD learns the state value function of a fixed policy which cannot be used for action selection and control without a model of the underlying process. Our new algorithm, least-squares policy itera- tion (LSPI), learns the state-action value function which allows for action selection without a model and for incremental policy improvement within a policy-iteration framework. LSPI is a model-free, off-policy method which can use efficiently (and reuse in each iteration) sample experiences collected in any manner. By separating the sample collection method, the choice of the linear approximation architecture, and the solution method, LSPI allows for focused attention on the distinct elements that contribute to practical reinforcement learning. LSPI is tested on the simple task of balancing an inverted pendulum and the harder task of balancing and riding a bicycle to a target location. In both cases, LSPI learns to control the pendulum or the bicycle by merely observing a relatively small number of trials where actions are selected randomly. LSPI is also compared against Q-learning (both with and without experience replay) using the same value function architecture. While LSPI achieves good performance fairly consistently on the difficult bicycle task, Q-learning variants were rarely able to balance for more than a small fraction of the time needed to reach the target location.},
author = {Lagoudakis, Michail G. and Parr, Ronald},
doi = {10.1162/jmlr.2003.4.6.1107},
isbn = {1532-4435},
issn = {15324435},
journal = {The Journal of Machine Learning Research},
keywords = {approximate policy,iteration,least-squares methods,markov decision processes,reinforcement learning,value-function approximation},
pages = {1107--1149},
title = {{Least-squares policy iteration}},
url = {http://dl.acm.org/citation.cfm?id=964290},
volume = {4},
year = {2003}
}
@inproceedings{Lagoudakis2001,
abstract = {We propose a new approach to reinforcement learning which combines least squares function approximation with policy iteration. Our method is model-free and completely off policy. We are motivated by the least squares temporal difference learning algorithm (LSTD), which is known for its efficient use of sample experiences compared to pure temporal difference algorithms. LSTD is ideal for prediction problems, however it heretofore has not had a straightforward application to control problems. Moreover, approximations learned by LSTD are strongly influenced by the visitation distribution over states. Our new algorithm, Least Squares Policy Iteration (LSPI) addresses these issues. The result is an off-policy method which can use (or reuse) data collected from any source. We have tested LSPI on several problems, including a bicycle simulator in which it learns to guide the bicycle to a goal efficiently by merely observing a relatively small number of completely random trials.},
author = {Lagoudakis, Michail G. and Parr, Ronald},
booktitle = {Advances in Neural Information Processing Systems 14 (NIPS 2001)},
issn = {1049-5258},
keywords = { LSQ,LSPI},
pages = {1547--1554},
title = {{Model-Free Least-Squares Policy Iteration}},
year = {2001}
}
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@comment{interior-poin}
@article{Byrd1999,
abstract = {The design and implementation of a new algorithm for solving large nonlinear programming problems is described. It follows a barrier approach that employs sequential quadratic programming and trust regions to solve the subproblems occurring in the iteration. Both primal and primal-dual versions of the algorithm are developed, and their performance is illustrated in a set of numerical tests.},
author = {Byrd, Richard H. and Hribar, Mary E. and Nocedal, Jorge},
doi = {10.1137/S1052623497325107},
isbn = {10.1137/S1052623497325107},
issn = {1052-6234},
journal = {SIAM Journal on Optimization},
keywords = {49M,49N,65K10,barrier method,constrained optimization,interior point method,large-scale optimization,nonlinear programming,primal method,primal-dual method,sequential quadratic programming,trust region method},
number = {4},
pages = {877--900},
title = {{An Interior Point Algorithm for Large-Scale Nonlinear Programming}},
url = {http://epubs.siam.org/doi/abs/10.1137/S1052623497325107},
volume = {9},
year = {1999}
}
@article{Waltz2006,
abstract = {An interior-point method for nonlinear programming is presented. It enjoys the flexibility of switch- ing between a line search method that computes steps by factoring the primal-dual equations and a trust region method that uses a conjugate gradient iteration. Steps computed by direct factorization are always tried first, but if they are deemed ineffective, a trust region iteration that guarantees progress toward stationarity is invoked. To demonstrate its effectiveness, the algorithm is implemented in the Knitro 6, 28 software package and is extensively tested on a wide selection of test problems.},
author = {Waltz, R. A. and Morales, J. L. and Nocedal, J. and Orban, D.},
doi = {10.1007/s10107-004-0560-5},
isbn = {1010700405},
issn = {00255610},
journal = {Mathematical Programming},
number = {3},
pages = {391--408},
title = {{An interior algorithm for nonlinear optimization that combines line search and trust region steps}},
volume = {107},
year = {2006}
}
@book{Forsgren2002,
abstract = {Interior methods are an omnipresent, conspicuous feature of the constrained optimiza- tion landscape today, but it was not always so. Primarily in the form of barrier methods, interior-point techniques were popular during the 1960s for solving nonlinearly constrained problems. However, their use for linear programming was not even contemplated because of the total dominance of the simplex method. Vague but continuing anxiety about barrier methods eventually led to their abandonment in favor of newly emerging, apparently more efficient alternatives such as augmented Lagrangian and sequential quadratic programming methods. By the early 1980s, barrier methods were almost without exception regarded as a closed chapter in the history of optimization. This picture changed dramatically with Karmarkar's widely publicized announcement in 1984 of a fast polynomial-time interior method for linear programming; in 1985, a formal connection was established between his method and classical barrier methods. Since then, interior methods have advanced so far, so fast, that their influence has transformed both the theory and practice of constrained optimization. This article provides a condensed, se- lective look at classical material and recent research about interior methods for nonlinearly constrained optimization.},
author = {Forsgren, Anders and Gill, PE and Wright, MH},
booktitle = {Society for Industrial and Applied Mathematics (SIAM Review)},
doi = {10.1137/S0036144502414942},
isbn = {0036144502414},
issn = {0036-1445},
keywords = {barrier methods,constrained minimization,interior methods,nonlinear constraints,nonlinear programming,penalty methods,primal-dual methods},
mendeley-groups = {Current},
number = {4},
pages = {525--597},
title = {{Interior Methods for Nonlinear Optimization}},
url = {http://epubs.siam.org/doi/abs/10.1137/S0036144502414942},
volume = {44},
year = {2002}
}
@book{fiacco,
author = {Anthony V. Fiacco, Garth P. McCormick},
title = {Nonlinear Programming: Sequential Unconstrained Minimization Techniques},
}
@misc{Fletcher,
author = {R. Fletcher},
title = {{Practical Methods of Optimization}},
}
@Book{NoceWrig06,
Title = {Numerical Optimization},
Author = {Jorge Nocedal and Stephen J. Wright},
Publisher = {Springer},
Year = {2006},
Address = {New York, NY, USA},
Edition = {second}
}
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@comment{KDE}
@book{Pagan1999,
address = {Cambridge },
author = {Pagan, A. R. and Ullah, Aman.},
isbn = {seq001|0521355648},
pages = {424},
publisher = {Cambridge University Press,},
series = {Themes in modern econometrics},
title = {{Nonparametric econometrics / Adrian Pagan, Aman Ullah.}},
url = {http://catalog.lib.ncsu.edu/record/NCSU1253883},
year = {1999}
}
@book{Li2011,
abstract = {Until now, students and researchers in nonparametric and semiparametric statistics and econometrics have had to turn to the latest journal articles to keep pace with these emerging methods of economic analysis. Nonparametric Econometrics fills a major gap by gathering together the most up-to-date theory and techniques and presenting them in a remarkably straightforward and accessible format. The empirical tests, data, and exercises included in this textbook help make it the ideal introduction for graduate students and an indispensable resource for researchers. Nonparametric and semiparametric methods have attracted a great deal of attention from statisticians in recent decades. While the majority of existing books on the subject operate from the presumption that the underlying data is strictly continuous in nature, more often than not social scientists deal with categorical data--nominal and ordinal--in applied settings. The conventional nonparametric approach to dealing with the presence of discrete variables is acknowledged to be unsatisfactory. This book is tailored to the needs of applied econometricians and social scientists. Qi Li and Jeffrey Racine emphasize nonparametric techniques suited to the rich array of data types--continuous, nominal, and ordinal--within one coherent framework. They also emphasize the properties of nonparametric estimators in the presence of potentially irrelevant variables. Nonparametric Econometrics covers all the material necessary to understand and apply nonparametric methods for real-world problems.},
author = {Li, Qi and Racine, Jeffrey Scott},
isbn = {1400841062},
pages = {768},
publisher = {Princeton University Press},
title = {{Nonparametric Econometrics: Theory and Practice}},
url = {https://books.google.com/books?id=Zsa7ofamTIUC{\&}pgis=1},
year = {2011}
}
@article{Blackwell1982,
abstract = {The Cox regression model for censored survival data specifies that covariates have a proportional effect on the hazard function of the life-time distribution of an individual. In this paper we discuss how this model can be extended to a model where covariate processes have a proportional effect on the intensity process of a multivariate counting process. This permits a statistical regression analysis of the intensity of a recurrent event allowing for complicated censoring patterns and time dependent covariates. Furthermore, this formulation gives rise to proofs with very simple structure using martingale techniques for the asymptotic properties of the estimators from such a model. Finally an example of a statistical analysis is included.},
author = {Blackwell, D},
doi = {10.1214/aos/1176348654},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Blackwell{\_}1982{\_}Institute of Mathematical Statistics is collaborating with JSTOR to digitize, preserve, and extend access to The Annals.pdf:pdf},
issn = {00905364},
journal = {Statistics},
number = {2},
pages = {1100--1120},
title = {{Institute of Mathematical Statistics is collaborating with JSTOR to digitize, preserve, and extend access to The Annals of Statistics. {\textregistered} www.jstor.org}},
url = {http://projecteuclid.org/euclid.aos/1176345976},
volume = {10},
year = {1982}
}
@article{Silverman1978a,
abstract = {The estimation uniform of a density consistency is considered. Uniform consistency are studied. For suitable kernels shown that the conditions h - strong conditions consistency and its derivatives properties and uniformly of the density size and h is the "window width." Under certain are found on the density rate of strong width which are necessary and sufficient of the estimate of the density and weak consistency estimate, by the kernel method over the whole real line it is continuous conditions densities 0 and (nh)- log n - 0 are sufficient where n is the sample on the kernel, for and on the behavior of the window for weak and strong uniform derivatives. Theorems on the are also proved.},
author = {Silverman, Bernard W},
journal = {The Annals of Statistics},
number = {1},
pages = {177--184},
title = {{Weak and Strong Uniform Consistency of the Kernel Estimate of a Density and its Derivatives}},
volume = {6},
year = {1978}
}
@article{Statistics2016,
author = {Statistics, Mathematical},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Statistics{\_}2016{\_}On Estimation of a Probability Density Function and Mode Author ( s ) Emanuel Parzen Source The Annals of Mathematical.pdf:pdf},
number = {3},
pages = {1065--1076},
title = {{On Estimation of a Probability Density Function and Mode Author ( s ): Emanuel Parzen Source : The Annals of Mathematical Statistics , Vol . 33 , No . 3 ( Sep ., 1962 ), pp . 1065-1076 Published by : Institute of Mathematical Statistics Stable URL : http://www.jstor.org/stable/2237880 REFERENCES Linked references are available on JSTOR for this article : You may need to log in to JSTOR to access the linked references .}},
volume = {33},
year = {2016}
}
@article{Statistics2009,
author = {Statistics, Mathematical},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Statistics{\_}2009{\_}The L1 Convergence of Kernel Density Estimates Author ( s ) L . P . Devroye and T . J . Wagner Source The Annals of Sta.pdf:pdf},
number = {5},
pages = {1136--1139},
title = {{The L1 Convergence of Kernel Density Estimates Author ( s ): L . P . Devroye and T . J . Wagner Source : The Annals of Statistics , Vol . 7 , No . 5 ( Sep ., 1979 ), pp . 1136-1139 Published by : Institute of Mathematical Statistics Stable URL : http://ww}},
volume = {7},
year = {2009}
}
@article{Gine2002,
abstract = {Let fn denote the usual kernel density estimator in several dimensions. It is shown that if {\{}an{\}} is a regular band sequence, K is a bounded square integrable kernel of several variables, satisfying some additional mild conditions ((K1) below), and if the data consist of an i.i.d. sample from a distribution possessing a bounded density f with respect to Lebesgue measure on Rd, then. for some absolute constant C that depends only on d. With some additional but still weak conditions, it is proved that the above sequence of normalized suprema converges a.s. to 2d f ∞ ∫ K2(x) dx. Convergence of the moment generating functions is also proved. Neither of these results require f to be strictly positive. These results improve upon, and extend to several dimensions, results by Silverman [13] for univariate densities. {\textcopyright} 2002 {\'{E}}ditions scientifiques et m{\'{e}}dicales Elsevier SAS.},
author = {Gin{\'{e}}, Evarist and Guillou, Armelle},
doi = {10.1016/S0246-0203(02)01128-7},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Gin{\'{e}}, Guillou{\_}2002{\_}Rates of strong uniform consistency for multivariate kernel density estimators.pdf:pdf},
issn = {02460203},
journal = {Annales de l'institut Henri Poincare (B) Probability and Statistics},
keywords = {Kernel density estimators,Non-parametric density estimation,Uniform almost sure rates},
number = {6},
pages = {907--921},
title = {{Rates of strong uniform consistency for multivariate kernel density estimators}},
volume = {38},
year = {2002}
}
@article{Devroye1986,
author = {Devroye, L. and Penrod, C.S.},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Devroye, Penrod{\_}1986{\_}The strong uniform convergence of multivariate variable kernel estimates.pdf:pdf},
journal = {Canadian Journal of Statistics},
keywords = {62h99,ams 1980 subject classifications,and phrases,breiman,consistency,density estimation,estimate,kernel,primary 60f15,s estimate,secondary 62g99,strong convergence},
number = {3},
pages = {211--220},
title = {{The strong uniform convergence of multivariate variable kernel estimates}},
url = {http://onlinelibrary.wiley.com/doi/10.2307/3314798/abstract},
volume = {14},
year = {1986}
}
@article{Theo,
author = {Cacoullos, Theophilos},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Cacoullos{\_}1964{\_}Estimation of a Multivariate Density.pdf:pdf},
isbn = {0277538718},
pages = {251--255},
title = {{Estimation of a Multivariate Density}},
year = {1964}
}
@article{Scott2015,
abstract = {Clarifies modern data analysis through nonparametric density estimation for a complete working knowledge of the theory and methods Featuring a thoroughly revised presentation, "Multivariate Density Estimation: Theory, Practice, and Visualization, Second Edition" maintains an intuitive approach to the underlying methodology and supporting theory of density estimation. Including new material and updated research in each chapter, the "Second Edition" presents additional clarification of theoretical opportunities, new algorithms, and up-to-date coverage of the unique challenges presented in the field of data analysis. The new edition focuses on the various density estimation techniques and methods that can be used in the field of big data. Defining optimal nonparametric estimators, the "Second Edition" demonstrates the density estimation tools to use when dealing with various multivariate structures in univariate, bivariate, trivariate, and quadrivariate data analysis. Continuing to illustrate the major concepts in the context of the classical histogram, "Multivariate Density Estimation: Theory, Practice, and Visualization, Second Edition" features: Over 150 updated figures to clarify theoretical results and to show analyses of real data sets An updated presentation of graphic visualization using computer software such as R A clear discussion of selections of important research during the past decade, including mixture estimation, robust parametric modeling algorithms, and clustering Over 130 problems to help readers reinforce the main concepts and ideas presented Boxed theorems and results allowing easy identification of crucial ideas " Multivariate Density Estimation: Theory, Practice, and Visualization, Second Edition" is an ideal reference for theoretical and applied statisticians, practicing engineers, as well as all readers interested in the theoretical aspects of nonparametric estimation and the application of these methods to multivariate data. The "Second Edition" is also a useful as a textbook for introductory courses in kernel statistics, smoothing, advanced computational statistics, and general forms of statistical distributions. "David W. Scott, PhD, " is Noah Harding Professor in the Department of Statistics at Rice University. The author of over 100 published articles, papers, and book chapters, Dr. Scott is also Fellow of the American Statistical Association (ASA) and the Institute of Mathematical Statistics. He is recipient of the ASA Founder's Award and the Army Wilks Award. His research interests include computational statistics, data visualization, and density estimation. Dr. Scott is also Coeditor of "Wiley Interdisciplinary Reviews: Computational Statistics" and previous Editor of the "Journal of Computational and Graphical Statistics."},
author = {Scott, David W.},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Scott{\_}2015{\_}Multivariate Density estimation Theory, Practice, and Visualization.pdf:pdf},
isbn = {1118575539},
pages = {360},
title = {{Multivariate Density estimation : Theory, Practice, and Visualization}},
volume = {1},
year = {2015}
}
@misc{Devroye1980,
author = {Devroye, Luc P. and Wagner, T. J.},
booktitle = {Multivariate analysis},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Devroye, Wagner{\_}1980{\_}The Strong Uniform Consistency of Kernel Density Estimates.pdf:pdf},
pages = {59--77},
title = {{The Strong Uniform Consistency of Kernel Density Estimates}},
year = {1980}
}
@article{If2010,
author = {If, Terence Tao},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/If{\_}2010{\_}245A , Notes 4 Modes of convergence.pdf:pdf},
pages = {1--20},
title = {{245A , Notes 4 : Modes of convergence}},
year = {2010}
}
@article{There2009,
author = {There, Definition and Big, Terminology and Analysis, Asymptotic},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/There, Big, Analysis{\_}2009{\_}Asymptotic notations 2.1.pdf:pdf},
journal = {Asymptotic Analysis},
pages = {9--31},
title = {{Asymptotic notations 2.1}},
year = {2009}
}
@misc{Rosenblatt1956,
abstract = {This note discusses some aspects of the estimation of the density function of a univariate probability distribution. All estimates of the density function satisfying relatively mild conditions are shown to be biased. The asymptotic mean square error of a particular class of estimates is evaluated.},
author = {Rosenblatt, Murray},
booktitle = {The Annals of Mathematical Statistics},
doi = {10.1214/aoms/1177728190},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Rosenblatt{\_}1956{\_}Remarks on Some Nonparametric Estimates of a Density Function.pdf:pdf},
isbn = {00034851},
issn = {0003-4851},
pages = {832--837},
title = {{Remarks on Some Nonparametric Estimates of a Density Function}},
volume = {27},
year = {1956}
}
@article{Lindsay1995,
abstract = {PROBABILITY AND MEASURE Third Edition Now in its new third edition, Probability and Measure offers advanced students, scientists, and engineers an integrated introduction to measure theory and probability. Retaining the unique approach of the previous editions, this text interweaves material on probability and measure, so that probability problems generate an interest in measure theory and measure theory is then developed and applied to probability. Probability and Measure provides thorough coverage of probability, measure, integration, random variables and expected values, convergence of distributions, derivatives and conditional probability, and stochastic processes. The Third Edition features an improved treatment of Brownian motion and the replacement of queuing theory with ergodic theory. Like the previous editions, this new edition will be well received by students of mathematics, statistics, economics, and a wide variety of disciplines that require a solid understanding of probability theory.},
author = {Billingsley, Patrick},
doi = {10.1016/0167-9473(95)90197-3},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Billingsley{\_}1995{\_}Probability {\&} Measure.pdf:pdf},
isbn = {0471007102},
issn = {01679473},
mendeley-groups = {One Stage Proof},
pages = {362},
title = {{Probability {\&} Measure}},
year = {1995}
}
@article{Hunter2014,
author = {Hunter, David R},
file = {:Users/shuping.ruan/Dropbox/constrained-optimal-regime-1/Citation/Hunter{\_}2014{\_}Notes for a graduate-level course in asymptotics for statisticians.pdf:pdf},
mendeley-groups = {One Stage Proof},
pages = {97},
title = {{Notes for a graduate-level course in asymptotics for statisticians}},
year = {2014}
}
@incollection{Linn2014a,
author = {Linn, Kristin A. and Laber, Eric B. and Stefanski, Leonard A.},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Linn, Laber, Stefanski - 2014 - Constrained estimation for competing outcomes.pdf:pdf},
title = {{Constrained estimation for competing outcomes}},
year = {2014}
}
@article{Henderson2010,
abstract = {We consider optimal dynamic treatment regime determination in practice. Model building, checking, and comparison have had little or no attention so far in this literature. Motivated by an application on optimal dosage of anticoagulants, we propose a modeling and estimation strategy that incorporates the regret functions of Murphy (2003, Journal of the Royal Statistical Society, Series B 65, 331-366) into a regression model for observed responses. Estimation is quick and diagnostics are available, meaning a variety of candidate models can be compared. The method is illustrated using simulation and the anticoagulation application.},
author = {Henderson, Robin and Ansell, Phil and Alshibani, Deyadeen},
doi = {10.1111/j.1541-0420.2009.01368.x},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Henderson, Ansell, Alshibani - 2010 - Regret-regression for optimal dynamic treatment regimes.pdf:pdf},
issn = {1541-0420},
journal = {Biometrics},
keywords = {Anticoagulants,Anticoagulants: administration \& dosage,Biometry,Biometry: methods,Computer Simulation,Drug Dosage Calculations,Humans},
mendeley-groups = {ResearchOne,SlideForPrelim},
month = dec,
number = {4},
pages = {1192--201},
pmid = {20002404},
title = {{Regret-regression for optimal dynamic treatment regimes.}},
volume = {66},
year = {2010}
}
@article{Zhao2012,
abstract = {There is increasing interest in discovering individualized treatment rules for patients who have heterogeneous responses to treatment. In particular, one aims to find an optimal individualized treatment rule which is a deterministic function of patient specific characteristics maximizing expected clinical outcome. In this paper, we first show that estimating such an optimal treatment rule is equivalent to a classification problem where each subject is weighted proportional to his or her clinical outcome. We then propose an outcome weighted learning approach based on the support vector machine framework. We show that the resulting estimator of the treatment rule is consistent. We further obtain a finite sample bound for the difference between the expected outcome using the estimated individualized treatment rule and that of the optimal treatment rule. The performance of the proposed approach is demonstrated via simulation studies and an analysis of chronic depression data.},
author = {Zhao, Yingqi and Zeng, Donglin and Rush, a John and Kosorok, Michael R},
doi = {10.1080/01621459.2012.695674},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Zhao et al. - 2012 - Estimating Individualized Treatment Rules Using Outcome Weighted Learning.pdf:pdf},
isbn = {0162-1459},
issn = {0162-1459},
journal = {Journal of the American Statistical Association},
keywords = {bayes classifier,cross-validation,dynamic treatment regime,individualized treatment rule,risk bound,rkhs,support vector machine,weighted},
mendeley-groups = {SlideForPrelim},
number = {449},
pages = {1106--1118},
pmid = {23630406},
title = {{Estimating Individualized Treatment Rules Using Outcome Weighted Learning}},
volume = {107},
year = {2012}
}
@article{Orellana2010a,
abstract = {Dynamic treatment regimes are set rules for sequential decision making based on patient covariate history. Observational studies are well suited for the investigation of the effects of dynamic treatment regimes because of the variability in treatment decisions found in them. This variability exists because different physicians make different decisions in the face of similar patient histories. In this article we describe an approach to estimate the optimal dynamic treatment regime among a set of enforceable regimes. This set is comprised by regimes defined by simple rules based on a subset of past information. The regimes in the set are indexed by a Euclidean vector. The optimal regime is the one that maximizes the expected counterfactual utility over all regimes in the set. We discuss assumptions under which it is possible to identify the optimal regime from observational longitudinal data. Murphy et al. (2001) developed efficient augmented inverse probability weighted estimators of the expected utility of one fixed regime. Our methods are based on an extension of the marginal structural mean model of Robins (1998, 1999) which incorporate the estimation ideas of Murphy et al. (2001). Our models, which we call dynamic regime marginal structural mean models, are specially suitable for estimating the optimal treatment regime in a moderately small class of enforceable regimes of interest. We consider both parametric and semiparametric dynamic regime marginal structural models. We discuss locally efficient, double-robust estimation of the model parameters and of the index of the optimal treatment regime in the set. In a companion paper in this issue of the journal we provide proofs of the main results.},
author = {Orellana, Liliana and Rotnitzky, Andrea and Robins, James M},
issn = {1557-4679},
journal = {The international journal of biostatistics},
keywords = {Algorithms,Clinical Trials as Topic,Clinical Trials as Topic: statistics \& numerical d,Decision Making,Longitudinal Studies,Models, Statistical,Probability,Research Design,Research Design: statistics \& numerical data},
month = jan,
number = {2},
pages = {Article 8},
pmid = {21969994},
title = {{Dynamic regime marginal structural mean models for estimation of optimal dynamic treatment regimes, Part I: main content.}},
volume = {6},
year = {2010}
}
@article{Song2011,
abstract = {A dynamic treatment regime effectively incorporates both accrued information and long-term effects of treatment from specially designed clinical trials. As these become more and more popular in conjunction with longitudinal data from clinical studies, the development of statistical inference for optimal dynamic treatment regimes is a high priority. This is very challenging due to the difficulties arising form non-regularities in the treatment effect parameters. In this paper, we propose a new reinforcement learning framework called penalized Q-learning (PQ-learning), under which the non-regularities can be resolved and valid statistical inference established. We also propose a new statistical procedure---individual selection---and corresponding methods for incorporating individual selection within PQ-learning. Extensive numerical studies are presented which compare the proposed methods with existing methods, under a variety of non-regular scenarios, and demonstrate that the proposed approach is both inferentially and computationally superior. The proposed method is demonstrated with the data from a depression clinical trial study.},
archivePrefix = {arXiv},
arxivId = {1108.5338},
author = {Song, Rui and Wang, Weiwei and Zeng, Donglin and Kosorok, Michael R.},
eprint = {1108.5338},
month = aug,
title = {{Penalized Q-Learning for Dynamic Treatment Regimes}},
year = {2011}
}
@article{Division2015,
author = {Luedtke, Alexander R and {Van Der Laan}, Mark J},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Luedtke, Van Der Laan - 2015 - Optimal Dynamic Treatments in Resource-Limited Settings Optimal Dynamic Treatments in Resource-Limited Se.pdf:pdf},
mendeley-groups = {ResearchOne},
title = {{Optimal Dynamic Treatments in Resource-Limited Settings Optimal Dynamic Treatments in Resource-Limited Settings}},
year = {2015}
}
@article{LaberTwo2014,
abstract = {Dynamic treatment regimes (DTRs) operationalize the clinical decision process as a sequence of functions, one for each clinical decision, where each function maps up-to-date patient information to a single recommended treatment. Current methods for estimating optimal DTRs, for example Q-learning, require the specification of a single outcome by which the "goodness" of competing dynamic treatment regimes is measured. However, this is an over-simplification of the goal of clinical decision making, which aims to balance several potentially competing outcomes, for example, symptom relief and side-effect burden. When there are competing outcomes and patients do not know or cannot communicate their preferences, formation of a single composite outcome that correctly balances the competing outcomes is not possible. This problem also occurs when patient preferences evolve over time. We propose a method for constructing DTRs that accommodates competing outcomes by recommending sets of treatments at each decision point. Formally, we construct a sequence of set-valued functions that take as input up-to-date patient information and give as output a recommended subset of the possible treatments. For a given patient history, the recommended set of treatments contains all treatments that produce non-inferior outcome vectors. Constructing these set-valued functions requires solving a non-trivial enumeration problem. We offer an exact enumeration algorithm by recasting the problem as a linear mixed integer program. The proposed methods are illustrated using data from the CATIE schizophrenia study.},
archivePrefix = {arXiv},
arxivId = {arXiv:1207.3100v2},
author = {Laber, Eric B. and Lizotte, Daniel J. and Ferguson, Bradley},
doi = {10.1111/biom.12132},
eprint = {arXiv:1207.3100v2},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Laber, Lizotte, Ferguson - 2014 - Set-valued dynamic treatment regimes for competing outcomes.pdf:pdf},
issn = {15410420},
journal = {Biometrics},
keywords = {Competing outcomes,Composite outcomes,Dynamic treatment regimes,Personalized medicine,Preference elicitation},
mendeley-groups = {ResearchOne},
number = {1},
pages = {53--61},
pmid = {24400912},
title = {{Set-valued dynamic treatment regimes for competing outcomes}},
volume = {70},
year = {2014}
}
@article{Wang2012,
abstract = {We present new statistical analyses of data arising from a clinical trial designed to compare two-stage dynamic treatment regimes (DTRs) for advanced prostate cancer. The trial protocol mandated that patients were to be initially randomized among four chemotherapies, and that those who responded poorly were to be rerandomized to one of the remaining candidate therapies. The primary aim was to compare the DTRs' overall success rates, with success defined by the occurrence of successful responses in each of two consecutive courses of the patient's therapy. Of the one hundred and fifty study participants, forty seven did not complete their therapy per the algorithm. However, thirty five of them did so for reasons that precluded further chemotherapy; i.e. toxicity and/or progressive disease. Consequently, rather than comparing the overall success rates of the DTRs in the unrealistic event that these patients had remained on their assigned chemotherapies, we conducted an analysis that compared viable switch rules defined by the per-protocol rules but with the additional provision that patients who developed toxicity or progressive disease switch to a non-prespecified therapeutic or palliative strategy. This modification involved consideration of bivariate per-course outcomes encoding both efficacy and toxicity. We used numerical scores elicited from the trial's Principal Investigator to quantify the clinical desirability of each bivariate per-course outcome, and defined one endpoint as their average over all courses of treatment. Two other simpler sets of scores as well as log survival time also were used as endpoints. Estimation of each DTR-specific mean score was conducted using inverse probability weighted methods that assumed that missingness in the twelve remaining drop-outs was informative but explainable in that it only depended on past recorded data. We conducted additional worst-best case analyses to evaluate sensitivity of our findings to extreme departures from the explainable drop-out assumption.},
author = {Wang, Lu and Rotnitzky, Andrea and Lin, Xihong and Millikan, Randall E and Thall, Peter F},
doi = {10.1080/01621459.2011.641416},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Wang et al. - 2012 - Evaluation of Viable Dynamic Treatment Regimes in a Sequentially Randomized Trial of Advanced Prostate Cancer.pdf:pdf},
issn = {0162-1459},
journal = {Journal of the American Statistical Association},
mendeley-groups = {ResearchOne},
month = jun,
number = {498},
pages = {493--508},
pmid = {22956855},
title = {{Evaluation of Viable Dynamic Treatment Regimes in a Sequentially Randomized Trial of Advanced Prostate Cancer.}},
volume = {107},
year = {2012}
}
@article{Murphy2003,
author = {Murphy, S. A.},
doi = {10.1111/1467-9868.00389},
issn = {1369-7412},
journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
mendeley-groups = {ResearchOne},
month = may,
number = {2},
pages = {331--355},
title = {{Optimal dynamic treatment regimes}},
volume = {65},
year = {2003}
}
@INPROCEEDINGS{Robins04optimalstructural,
author = {James M. Robins},
title = {Optimal Structural Nested Models for Optimal Sequential Decisions},
booktitle = {In Proceedings of the Second Seattle Symposium on Biostatistics},
year = {2004},
publisher = {Springer}
}
@article{Murphy2005,
abstract = {Planning problems that involve learning a policy from a single training set of finite horizon trajectories arise in both social science and medical fields. We consider Q-learning with function approximation for this setting and derive an upper bound on the generalization error. This upper bound is in terms of quantities minimized by a Q-learning algorithm, the complexity of the approximation space and an approximation term due to the mismatch between Q-learning and the goal of learning a policy that maximizes the value function.},
author = {Murphy, Susan A},
file = {:home/sruan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Murphy - 2005 - A Generalization Error for Q-Learning.pdf:pdf},
issn = {1532-4435},
journal = {Journal of machine learning research : JMLR},
mendeley-groups = {ResearchOne},
month = jul,
pages = {1073--1097},
pmid = {16763665},
title = {{A Generalization Error for Q-Learning.}},
volume = {6},
year = {2005}
}
@article{Robins2000,
abstract = {In observational studies with exposures or treatments that vary over time, standard approaches for adjustment of confounding are biased when there exist time-dependent confounders that are also affected by previous treatment. This paper introduces marginal structural models, a new class of causal models that allow for improved adjustment of confounding in those situations. The parameters of a marginal structural model can be consistently estimated using a new class of estimators, the inverse-probability-of-treatment weighted estimators.},
author = {Robins, J M and Hern{\'{a}}n, M A and Brumback, B},
doi = {10.1097/00001648-200009000-00011},
isbn = {1044-3983},
issn = {1044-3983},
journal = {Epidemiology},
number = {5},
pages = {550--560},
pmid = {10955408},
title = {{Marginal structural models and causal inference in epidemiology.}},
volume = {11},
year = {2000}
}
@article{Zhao2015,
abstract = {Dynamic treatment regimes (DTRs) are sequential decision rules for individual patients that can adapt over time to an evolving illness. The goal is to accommodate heterogeneity among patients and find the DTR which will produce the best long term outcome if implemented. We introduce two new statistical learning methods for estimating the optimal DTR, termed backward outcome weighted learning (BOWL), and simultaneous outcome weighted learning (SOWL). These approaches convert individualized treatment selection into an either sequential or simultaneous classification problem, and can thus be applied by modifying existing machine learning techniques. The proposed methods are based on directly maximizing over all DTRs a nonparametric estimator of the expected long-term outcome; this is fundamentally different than regression-based methods, for example Q-learning, which indirectly attempt such maximization and rely heavily on the correctness of postulated regression models. We prove that the resulting rules are consistent, and provide finite sample bounds for the errors using the estimated rules. Simulation results suggest the proposed methods produce superior DTRs compared with Q- learning especially in small samples. We illustrate the methods using data from a clinical trial for smoking cessation. Keywords},
archivePrefix = {arXiv},
arxivId = {15334406},
author = {Zhao, Ying-Qi and Zeng, Donglin and Laber, Eric B. and Kosorok, Michael R.},
doi = {10.1080/01621459.2014.937488},
eprint = {15334406},
isbn = {0000000000000},
issn = {0162-1459},
journal = {Journal of the American Statistical Association},
number = {510},
pages = {583--598},
pmid = {26236062},
title = {{New Statistical Learning Methods for Estimating Optimal Dynamic Treatment Regimes}},
url = {http://www.tandfonline.com/doi/full/10.1080/01621459.2014.937488},
volume = {110},
year = {2015}
}
@article{Zhang2012,
abstract = {A treatment regime maps observed patient characteristics to a recommended treatment. Recent technological advances have increased the quality, accessibility, and volume of patient-level data; consequently, there is a growing need for powerful and flexible estimators of an optimal treatment regime that can be used with either observational or randomized clinical trial data. We propose a novel and general framework that transforms the problem of estimating an optimal treatment regime into a classification problem wherein the optimal classifier corresponds to the optimal treatment regime. We show that commonly employed parametric and semi-parametric regression estimators, as well as recently proposed robust estimators of an optimal treatment regime can be represented as special cases within our framework. Furthermore, our approach allows any classification procedure that can accommodate case weights to be used without modification to estimate an optimal treatment regime. This introduces a wealth of new and powerful learning algorithms for use in estimating treatment regimes. We illustrate our approach using data from a breast cancer clinical trial.},
author = {Zhang, Baqun and Tsiatis, Anastasios A. and Davidian, Marie and Zhang, Min and Laber, Eric},
doi = {10.1002/sta.411},
issn = {20491573},
journal = {Stat},
keywords = {Classification,Doubly robust estimator,Inverse probability weighting,Personalized medicine,Potential outcomes,Propensity score},
number = {1},
pages = {103--114},
pmid = {23645940},
title = {{Estimating optimal treatment regimes from a classification perspective}},
volume = {1},
year = {2012}
}
@article{Zhang2012b,
abstract = {A treatment regime is a rule that assigns a treatment, among a set of possible treatments, to a patient as a function of his/her observed characteristics, hence "personalizing" treatment to the patient. The goal is to identify the optimal treatment regime that, if followed by the entire population of patients, would lead to the best outcome on average. Given data from a clinical trial or observational study, for a single treatment decision, the optimal regime can be found by assuming a regression model for the expected outcome conditional on treatment and covariates, where, for a given set of covariates, the optimal treatment is the one that yields the most favorable expected outcome. However, treatment assignment via such a regime is suspect if the regression model is incorrectly specified. Recognizing that, even if misspecified, such a regression model defines a class of regimes, we instead consider finding the optimal regime within such a class by finding the regime that optimizes an estimator of overall population mean outcome. To take into account possible confounding in an observational study and to increase precision, we use a doubly robust augmented inverse probability weighted estimator for this purpose. Simulations and application to data from a breast cancer clinical trial demonstrate the performance of the method.},
author = {Zhang, Baqun and Tsiatis, Anastasios a. and Laber, Eric B. and Davidian, Marie},
doi = {10.1111/j.1541-0420.2012.01763.x},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Treatment regimes subgroup identification/Zhang.pdf:pdf},
issn = {0006341X},
journal = {Biometrics},
keywords = {Doubly robust estimator,Inverse probability weighting,Outcome regression,Personalized medicine,Potential outcomes,Propensity score},
mendeley-groups = {ResearchOne},
number = {4},
pages = {1010--1018},
pmid = {22550953},
title = {{A Robust Method for Estimating Optimal Treatment Regimes}},
volume = {68},
year = {2012}
}
@article{Laber2014,
author = {Laber, Eric B and Lizotte, Daniel J and Qian, Min and Pelham, William E and Murphy, Susan A},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Laber et al. - 2014 - Dynamic treatment regimes technical challenges and applications.pdf:pdf},
mendeley-groups = {ResearchOne},
title = {{Dynamic treatment regimes : technical challenges and applications}},
year = {2014}
}
@article{Moodie2004,
abstract = {In recent years, treatment and intervention scientists increasingly realize that individual heterogeneity in disorder severity, background characteristics and co-occurring problems translates into heterogeneity in response to various aspects of any treatment program. Accordingly, research in this area is shifting from the traditional “one-size-fits-all” treatment to dynamic treatment regimes, which allow greater individualization in programming over time. A dynamic treatment regime is a sequence of decision rules that specify how the dosage and/or type of treatment should be adjusted through time in response to an individual’s changing needs, aiming to optimize the effectiveness of the program. In the chapter we review the Sequential Multiple Assignment Randomized Trials (SMART), which is an experimental design useful for the development of dynamic treatment regimes. We compare the SMART approach with other experimental approaches and discuss data analyses methods for constructing a high quality dynamic treatment regime as well as other secondary analyses.},
author = {Moodie, Erica},
doi = {10.1146/annurev-statistics-022513-115553},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Others/annurev-statistics-022513-115553.pdf:pdf},
issn = {2326-8298},
journal = {Clinical trials (London, England)},
keywords = {dynamic treatment regime,nonregularity,q-learning,randomization,reinforcement learning,sequential},
mendeley-groups = {ResearchOne},
number = {5},
pages = {471},
pmid = {16279286},
title = {{Dynamic treatment regimes.}},
volume = {1},
year = {2004}
}
@article{Linn2014,
archivePrefix = {arXiv},
arxivId = {arXiv:1407.3414v1},
author = {Linn, Kristin A and Laber, Eric B and Stefanski, Leonard A},
eprint = {arXiv:1407.3414v1},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Others/iQlearning.pdf:pdf},
mendeley-groups = {ResearchOne},
title = {{Interactive Q-learning for Probabilities and Quantiles}},
year = {2014}
}
@article{Geyer1994,
abstract = {Limit theorems for an M-estimate constrained to lie in a closed subset of Rd are given under two different sets of regularity conditions. A consistent sequence of global optimizers converges under Chernoff regularity of the parameter set. A \$\backslash sqrt n\$ -consistent sequence of local optimizers converges under Clarke regularity of the parameter set. In either case the asymptotic distribution is a projection of a normal random vector on the tangent cone of the parameter set at the true parameter value. Limit theorems for the optimal value are also obtained, agreeing with Chernoff's result in the case of maximum likelihood with global optimizers.},
author = {Geyer, Charles J.},
doi = {10.1214/aos/1176325768},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Constrained Estimation/Constrained M-Estimation.pdf:pdf},
issn = {0090-5364},
journal = {The Annals of Statistics},
mendeley-groups = {ResearchOne},
number = {4},
pages = {1993--2010},
title = {{On the Asymptotics of Constrained \$M\$-Estimation}},
volume = {22},
year = {1994}
}
@article{Qian2011,
abstract = {Because many illnesses show heterogeneous response to treatment, there is increasing interest in individualizing treatment to patients [Arch. Gen. Psychiatry 66 (2009) 128--133]. An individualized treatment rule is a decision rule that recommends treatment according to patient characteristics. We consider the use of clinical trial data in the construction of an individualized treatment rule leading to highest mean response. This is a difficult computational problem because the objective function is the expectation of a weighted indicator function that is nonconcave in the parameters. Furthermore, there are frequently many pretreatment variables that may or may not be useful in constructing an optimal individualized treatment rule, yet cost and interpretability considerations imply that only a few variables should be used by the individualized treatment rule. To address these challenges, we consider estimation based on \$l\_1\$-penalized least squares. This approach is justified via a finite sample upper bound on the difference between the mean response due to the estimated individualized treatment rule and the mean response due to the optimal individualized treatment rule.},
archivePrefix = {arXiv},
arxivId = {1105.3369},
author = {Qian, Min and Murphy, Susan a.},
doi = {10.1214/10-AOS864},
eprint = {1105.3369},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Treatment regimes subgroup identification/Qian\&Murphy.pdf:pdf},
issn = {0090-5364},
mendeley-groups = {ResearchOne},
number = {2},
pages = {1180--1210},
pmid = {21666835},
title = {{Performance guarantees for individualized treatment rules}},
volume = {39},
year = {2011}
}
@article{Taylor2015,
author = {Taylor, Jeremy M. G. and Cheng, Wenting and Foster, Jared C.},
doi = {10.1111/biom.12228},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Treatment regimes subgroup identification/Reader ReactionZhang.pdf:pdf},
issn = {0006341X},
journal = {Biometrics},
keywords = {optimal treatment regime,random forests},
mendeley-groups = {ResearchOne},
number = {1},
pages = {267--273},
title = {{Reader reaction to “A robust method for estimating optimal treatment regimes” by Zhang et al. (2012)}},
volume = {71},
year = {2015}
}
@article{Laber2014,
abstract = {Dynamic treatment regimes (DTRs) operationalize the clinical decision process as a sequence of functions, one for each clinical decision, where each function maps up-to-date patient information to a single recommended treatment. Current methods for estimating optimal DTRs, for example Q-learning, require the specification of a single outcome by which the "goodness" of competing dynamic treatment regimes is measured. However, this is an over-simplification of the goal of clinical decision making, which aims to balance several potentially competing outcomes, for example, symptom relief and side-effect burden. When there are competing outcomes and patients do not know or cannot communicate their preferences, formation of a single composite outcome that correctly balances the competing outcomes is not possible. This problem also occurs when patient preferences evolve over time. We propose a method for constructing DTRs that accommodates competing outcomes by recommending sets of treatments at each decision point. Formally, we construct a sequence of set-valued functions that take as input up-to-date patient information and give as output a recommended subset of the possible treatments. For a given patient history, the recommended set of treatments contains all treatments that produce non-inferior outcome vectors. Constructing these set-valued functions requires solving a non-trivial enumeration problem. We offer an exact enumeration algorithm by recasting the problem as a linear mixed integer program. The proposed methods are illustrated using data from the CATIE schizophrenia study.},
archivePrefix = {arXiv},
arxivId = {arXiv:1207.3100v2},
author = {Laber, Eric B. and Lizotte, Daniel J. and Ferguson, Bradley},
doi = {10.1111/biom.12132},
eprint = {arXiv:1207.3100v2},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Treatment regimes subgroup identification/set-value regimes.pdf:pdf},
issn = {15410420},
journal = {Biometrics},
keywords = {Competing outcomes,Composite outcomes,Dynamic treatment regimes,Personalized medicine,Preference elicitation},
mendeley-groups = {ResearchOne},
number = {1},
pages = {53--61},
pmid = {24400912},
title = {{Set-valued dynamic treatment regimes for competing outcomes}},
volume = {70},
year = {2014}
}
@article{Foster2011,
abstract = {We consider the problem of identifying a subgroup of patients who may have an enhanced treatment effect in a randomized clinical trial, and it is desirable that the subgroup be defined by a limited number of covariates. For this problem, the development of a standard, pre-determined strategy may help to avoid the well-known dangers of subgroup analysis. We present a method developed to find subgroups of enhanced treatment effect. This method, referred to as 'Virtual Twins', involves predicting response probabilities for treatment and control 'twins' for each subject. The difference in these probabilities is then used as the outcome in a classification or regression tree, which can potentially include any set of the covariates. We define a measure Q(\^{A}) to be the difference between the treatment effect in estimated subgroup \^{A} and the marginal treatment effect. We present several methods developed to obtain an estimate of Q(\^{A}), including estimation of Q(\^{A}) using estimated probabilities in the original data, using estimated probabilities in newly simulated data, two cross-validation-based approaches, and a bootstrap-based bias-corrected approach. Results of a simulation study indicate that the Virtual Twins method noticeably outperforms logistic regression with forward selection when a true subgroup of enhanced treatment effect exists. Generally, large sample sizes or strong enhanced treatment effects are needed for subgroup estimation. As an illustration, we apply the proposed methods to data from a randomized clinical trial.},
author = {Foster, Jared C. and Taylor, Jeremy M G and Ruberg, Stephen J.},
doi = {10.1002/sim.4322},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Treatment regimes subgroup identification/Subgroup-Jeremy.pdf:pdf},
isbn = {1097-0258 (Electronic)
0277-6715 (Linking)},
issn = {02776715},
journal = {Statistics in Medicine},
keywords = {Random forests,Randomized clinical trials,Regression trees,Subgroups,Tailored therapeutics},
mendeley-groups = {ResearchOne},
number = {24},
pages = {2867--2880},
pmid = {21815180},
title = {{Subgroup identification from randomized clinical trial data}},
volume = {30},
year = {2011}
}
@article{Division2015,
author = {Division, U C Berkeley and Paper, Working and Luedtke, Alexander R and Laan, Mark J Van Der},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Constrained Estimation/Resource-Limited.pdf:pdf},
mendeley-groups = {ResearchOne},
title = {{University of California , Berkeley Optimal Dynamic Treatments in Resource-Limited Settings Optimal Dynamic Treatments in Resource-Limited Settings}},
year = {2015}
}
@book{Chakraborty2013,
author = {Chakraborty, Bibhas and Moodie, Erica E.M.},
doi = {10.1007/978-1-4614-7428-9},
file = {:home/sruan/Dropbox/constrained-optimal-regime-1/ReferenceIntoMed/Chakraborty, Moodie - 2013 - Statistical Methods for Dynamic Treatment Regimes.pdf:pdf},
isbn = {978-1-4614-7427-2},
mendeley-groups = {SlideForPrelim},
title = {{Statistical Methods for Dynamic Treatment Regimes}},
year = {2013}
}
@article{Rubin1980,
author = {Rubin, D. B.},
journal = {Journal of the American Statistical Association},
number = {75},
pages = {591--593},
title = {{Discussion of Randomized analysis of experimental data: The Fisher randomization test by D. Basu}},
year = {1980}
}
@misc{Robins1997,
author = {Robins, James M.},
booktitle = {LATENT VARIABLE MODELING AND APPLICATIONS TO CAUSALITY},
doi = {10.1007/978-1-4612-1842-5},
title = {{Causal Inference from Complex Longitudinal Data}},
urldate = {2015-08-28},
year = {1997}
}
@article{Hernan2006,
author = {Hernan, Miguel A and Robins, James M},
doi = {10.1136/jech.2004.029496},
issn = {0143-005X},
journal = {Journal of epidemiology and community health},
keywords = {Causality,Confounding Factors (Epidemiology),Data Interpretation, Statistical,Effect Modifier, Epidemiologic,Epidemiologic Research Design,Epidemiologic Studies,Humans,Probability,Research Design},
month = jul,
number = {7},
pages = {578--86},
pmid = {16790829},
title = {{Estimating causal effects from epidemiological data.}},
url = volume = {60},
year = {2006}
}
@book{nonparm,
address = {New York, NY},
doi = {10.1007/0-387-30623-4},
isbn = {978-0-387-25145-5},
mendeley-groups = {PaperDraft},
publisher = {Springer New York},
series = {Springer Texts in Statistics},
title = {{All of Nonparametric Statistics}},
year = {2006}
}
@article{JeffreyC.Lagarias,
author = {{Jeffrey C. Lagarias}, James A. Reeds, Margaret H. Wright, Paul E. Wright},
mendeley-groups = {PaperDraft},
title = {{Convergence Properties of the Nelder-Mead Simplex Method in Low Dimensions}}
}
@book{MATLAB,
year = {2014},
author = {MATLAB},
title = {version 7.10.0 (R2014a)},
publisher = {The MathWorks Inc.},
address = {Natick, Massachusetts}
}
@book{matlabKDE,
title = {Kernel Density Estimation Toolbox for MATLAB (R13)},
url = http://www.ics.uci.edu/~ihler/code/kde.html
}
@article{Markowitz1952,
author = {Markowitz, Harry},
doi = {10.1111/j.1540-6261.1952.tb01525.x},
issn = {00221082},
journal = {The Journal of Finance},
mendeley-groups = {PaperDraft},
month = mar,
number = {1},
pages = {77--91},
title = {{PORTFOLIO SELECTION}},
volume = {7},
year = {1952}
}
@book{altman99cmdp,
title={Constrained Markov Decision Processes},
author={Altman, E.},
isbn={9780849303821},
lccn={99210415},
series={Stochastic Modeling Series},
url={https://books.google.com/books?id=3X9S1NM2iOgC},
year={1999},
publisher={Taylor \& Francis}
}
@article{Geramifard2013,
abstract = {A Markov Decision Process (MDP) is a natural framework for formulating sequential decision-making problems under uncertainty. In recent years, researchers have greatly advanced algorithms for learning and acting in MDPs. This article reviews such algorithms, beginning with well-known dynamic programming methods for solving MDPs such as policy iteration and value iteration, then describes approximate dynamic programming methods such as trajectory based value iteration, and finally moves to reinforcement learning methods such as Q-Learning, SARSA, and least-squares policy iteration. We describe algorithms in a unified framework, giving pseudocode together with memory and iteration complexity analysis for each. Empirical evaluations of these techniques with four representations across four domains, provide insight into how these algorithms perform with various feature sets in terms of running time and performance.{\textcopyright} 2013 A. Geramifard, T. J. Walsh, S. Tellex, G. Chowdhary.},
author = {Geramifard, Alborz},
doi = {10.1561/2200000042},
file = {:Users/shuping.ruan/Dropbox/rldm/reference/main-ref/linear fun approximators.pdf:pdf},
isbn = {9781601987617},
issn = {1935-8237},
journal = {Foundations and Trends{\textregistered} in Machine Learning},
mendeley-groups = {rldm},
number = {4},
pages = {375--451},
title = {{A Tutorial on Linear Function Approximators for Dynamic Programming and Reinforcement Learning}},
url = {http://www.nowpublishers.com/articles/foundations-and-trends-in-machine-learning/MAL-042},
volume = {6},
year = {2013}
}
@article{Luckett2016,
abstract = {The vision for precision medicine is to use individual patient characteristics to inform a personalized treatment plan that leads to the best healthcare possible for each patient. Mobile technologies have an important role to play in this vision as they offer a means to monitor a patient's health status in real-time and subsequently to deliver interventions if, when, and in the dose that they are needed. Dynamic treatment regimes formalize individualized treatment plans as sequences of decision rules, one per stage of clinical intervention, that map current patient information to a recommended treatment. However, existing methods for estimating optimal dynamic treatment regimes are designed for a small number of fixed decision points occurring on a coarse time-scale. We propose a new reinforcement learning method for estimating an optimal treatment regime that is applicable to data collected using mobile technologies in an outpatient setting. The proposed method accommodates an indefinite time horizon and minute-by-minute decision making that are common in mobile health applications. We show the proposed estimators are consistent and asymptotically normal under mild conditions. The proposed methods are applied to estimate an optimal dynamic treatment regime for controlling blood glucose levels in patients with type 1 diabetes.},
archivePrefix = {arXiv},
arxivId = {1611.03531},
author = {Luckett, Daniel J. and Laber, Eric B. and Kahkoska, Anna R. and Maahs, David M. and Mayer-Davis, Elizabeth and Kosorok, Michael R.},
eprint = {1611.03531},
file = {:Users/shuping.ruan/Dropbox/rldm/reference/main-ref/vlearn.pdf:pdf},
mendeley-groups = {rldm},
title = {{Estimating Dynamic Treatment Regimes in Mobile Health Using V-learning}},
url = {http://arxiv.org/abs/1611.03531},
year = {2016}
}
@article{Ertefaie2014,
abstract = {The application of existing methods for constructing optimal dynamic treatment regimes is limited to cases where investigators are interested in optimizing a utility function over a fixed period of time (finite horizon). In this manuscript, we develop an inferential procedure based on temporal difference residuals for optimal dynamic treatment regimes in infinite-horizon settings, where there is no a priori fixed end of follow-up point. The proposed method can be used to determine the optimal regime in chronic diseases where patients are monitored and treated throughout their life. We derive large sample results necessary for conducting inference. We also simulate a cohort of patients with diabetes to mimic the third wave of the National Health and Nutrition Examination Survey, and we examine the performance of the proposed method in controlling the level of hemoglobin A1c. Supplementary materials for this article are available online.},
archivePrefix = {arXiv},
arxivId = {1406.0764},
author = {Ertefaie, Ashkan},
eprint = {1406.0764},
file = {:Users/shuping.ruan/Dropbox/rldm/reference/main-ref/infinite horizon.pdf:pdf},
keywords = {action-value function,backward induction,causal inference,temporal dif-},
mendeley-groups = {rldm},
pages = {1--39},
title = {{Constructing Dynamic Treatment Regimes in Infinite-Horizon Settings}},
url = {http://arxiv.org/abs/1406.0764},
year = {2014}
}
@article{blatt2004learning,
title={A-learning for approximate planning},
author={Blatt, D and Murphy, SA and Zhu, J},
journal={Ann Arbor},
volume={1001},
pages={48109--2122},
year={2004}
}
@article{Lizotte2012,
abstract = {We present a general and detailed development of an algorithm for finite-horizon fitted-Q iteration with an arbitrary number of reward signals and linear value function approximation using an ar-bitrary number of state features. This includes a detailed treatment of the 3-reward function case using triangulation primitives from computational geometry and a method for identifying globally dominated actions. We also present an example of how our methods can be used to construct a real-world decision aid by considering symptom reduction, weight gain, and quality of life in sequential treatments for schizophrenia. Finally, we discuss future directions in which to take this work that will further enable our methods to make a positive impact on the field of evidence-based clinical decision support.},
author = {Lizotte, Daniel J. and {Bowling}, Michael H. and {Murphy}, Susan A.},
isbn = {9781577356097},
issn = {1532-4435},
journal = {Journal of Machine Learning Research},
keywords = {decision making,dynamic programming,linear regression,preference elicitation,reinforcement learning},
pages = {3253--3295},
pmid = {23741197},
title = {{Linear Fitted-Q Iteration with Multiple Reward Functions}},
volume = {13},
year = {2012}
}
@article{Lizotte2010,
abstract = {We introduce new, efficient algorithms for value iteration with multiple reward functions and continuous state. We also give an algorithm for finding the set of all non-dominated actions in the continuous state setting. This novel extension is appropriate for environments with continuous or finely discretized states where generalization is required, as is the case for data analysis of randomized controlled trials. Copyright 2010 by the author(s)/owner(s).},
author = {Lizotte, Daniel J and Bowling, Michael H. and Murphy, Susan A.},
isbn = {9781605589077},
journal = {in Proc. of Int. Conf. on Machine Learning},
pages = {695--702},
title = {{Efficient reinforcement learning with multiple reward functions for randomized controlled trial analysis}},
year = {2010}
}
@article{Lizotte2016,
author = {Lizotte, Daniel J. and Laber, Eric B.},
title = {Multi-objective Markov Decision Processes for Data-driven Decision Support},
journal = {J. Mach. Learn. Res.},
issue_date = {January 2016},
volume = {17},
number = {1},
month = jan,
year = {2016},
issn = {1532-4435},
pages = {7378--7405},
numpages = {28},
url = {http://dl.acm.org/citation.cfm?id=2946645.3053492},
acmid = {3053492},
publisher = {JMLR.org},
keywords = {Markov decision processes, clinical decision support, evidence-based medicine, multi-objective optimization, reinforcement learning},
}
@article{Murphy2016,
abstract = {We develop an off-policy actor-critic algorithm for learning an optimal policy from a training set composed of data from multiple individuals. This algorithm is developed with a view towards its use in mobile health.},
archivePrefix = {arXiv},
arxivId = {1607.05047},
author = {Murphy, S A and Deng, Y and Laber, E B and Maei, H R and Sutton, R S and Witkiewitz, K.},
eprint = {1607.05047},
journal = {arXiv},
pages = {1--18},
title = {{A Batch, Off-Policy, Actor-Critic Algorithm for Optimizing the Average Reward}},
url = {http://arxiv.org/abs/1607.05047},
year = {2016}
}
@article{Luckett2016,
abstract = {The vision for precision medicine is to use individual patient characteristics to inform a personalized treatment plan that leads to the best healthcare possible for each patient. Mobile technologies have an important role to play in this vision as they offer a means to monitor a patient's health status in real-time and subsequently to deliver interventions if, when, and in the dose that they are needed. Dynamic treatment regimes formalize individualized treatment plans as sequences of decision rules, one per stage of clinical intervention, that map current patient information to a recommended treatment. However, existing methods for estimating optimal dynamic treatment regimes are designed for a small number of fixed decision points occurring on a coarse time-scale. We propose a new reinforcement learning method for estimating an optimal treatment regime that is applicable to data collected using mobile technologies in an outpatient setting. The proposed method accommodates an indefinite time horizon and minute-by-minute decision making that are common in mobile health applications. We show the proposed estimators are consistent and asymptotically normal under mild conditions. The proposed methods are applied to estimate an optimal dynamic treatment regime for controlling blood glucose levels in patients with type 1 diabetes.},
archivePrefix = {arXiv},
arxivId = {1611.03531},
author = {Luckett, Daniel J. and Laber, Eric B. and Kahkoska, Anna R. and Maahs, David M. and Mayer-Davis, Elizabeth and Kosorok, Michael R.},
eprint = {1611.03531},
file = {:Users/shuping.ruan/Dropbox/rldm/reference/main-ref/vlearn.pdf:pdf},
mendeley-groups = {rldm},
title = {{Estimating Dynamic Treatment Regimes in Mobile Health Using V-learning}},
url = {http://arxiv.org/abs/1611.03531},
year = {2016}
}
@article{Achiam2017,
abstract = {For many applications of reinforcement learn-ing it can be more convenient to specify both a reward function and constraints, rather than trying to design behavior through the reward function. For example, systems that physically interact with or around humans should satisfy safety constraints. Recent advances in policy search algorithms (Mnih et al., 2016; Schulman et al., 2015; Lillicrap et al., 2016; Levine et al., 2016) have enabled new capabilities in high-dimensional control, but do not consider the con-strained setting. We propose Constrained Policy Optimization (CPO), the first general-purpose policy search al-gorithm for constrained reinforcement learning with guarantees for near-constraint satisfaction at each iteration. Our method allows us to train neu-ral network policies for high-dimensional control while making guarantees about policy behavior all throughout training. Our guarantees are based on a new theoretical result, which is of indepen-dent interest: we prove a bound relating the ex-pected returns of two policies to an average diver-gence between them. We demonstrate the effec-tiveness of our approach on simulated robot lo-comotion tasks where the agent must satisfy con-straints motivated by safety.},
archivePrefix = {arXiv},
arxivId = {1705.10528},
author = {Achiam, Joshua and Held, David and Tamar, Aviv and Abbeel, Pieter},
eprint = {1705.10528},
issn = {1938-7228},
journal = {Proceedings of the 34th International Conference on Machine Learning},
title = {{Constrained Policy Optimization}},
url = {https://arxiv.org/pdf/1705.10528.pdf},
year = {2017}
}
@article{Lei2012,
abstract = {Interventions often involve a sequence of decisions. For example, clinicians frequently adapt the intervention to an individual's outcomes. Altering the intensity and type of intervention over time is crucial for many reasons, such as to obtain improvement if the individual is not responding or to reduce costs and burden when intensive treatment is no longer necessary. Adaptive interventions utilize individual variables (severity, preferences) to adapt the intervention and then dynamically utilize individual outcomes (response to treatment, adherence) to readapt the intervention. The Sequential Multiple Assignment Randomized Trial (SMART) provides high-quality data that can be used to construct adaptive interventions. We review the SMART and highlight its advantages in constructing and revising adaptive interventions as compared to alternative experimental designs. Selected examples of SMART studies are described and compared. A data analysis method is provided and illustrated using data from the Extending Treatment Effectiveness of Naltrexone SMART study.},
author = {Lei, H. and Nahum-Shani, I. and Lynch, K. and Oslin, D. and Murphy, S.A.},
doi = {10.1146/annurev-clinpsy-032511-143152},
issn = {1548-5943},
journal = {Annual Review of Clinical Psychology},
number = {1},
pages = {21--48},
pmid = {22224838},
title = {{A "SMART" Design for Building Individualized Treatment Sequences}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22224838{\%}5Cnhttp://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3887122{\%}5Cnhttp://www.annualreviews.org/doi/10.1146/annurev-clinpsy-032511-143152},
volume = {8},
year = {2012}
}
@article{Gill2001,
abstract = {We extend Robins' theory of causal inference for complex longitudinal data to the case of continuously varying as opposed to discrete covariates and treatments. In particular we establish versions of the key results of the discrete theory: the g-computation formula and a collection of powerful characterizations of the g-null hypothesis of no treatment effect. This is accomplished under natural continuity hypotheses concerning the conditional distributions of the outcome variable and of the covariates given the past. We also show that our assumptions concerning counterfactual variables place no restriction on the joint distribution of the observed variables: thus in a precise sense, these assumptions are "for free," or if you prefer, harmless. CR - Copyright {\&}{\#}169; 2001 Institute of Mathematical Statistics},
author = {Gill, Richard D. and Robins, James M.},
doi = {10.1214/aos/1015345962},
isbn = {0090-5364},
issn = {00905364},
journal = {Annals of Statistics},
keywords = {Causality,Counterfactuals,Longitudinal data,Observational studies},
number = {6},
pages = {1785--1811},
title = {{Causal inference for complex longitudinal data: The continuous case}},
volume = {29},
year = {2001}
}
@article{constrained,
abstract = {...},
author = {Linn KA and Laber EB and Stefanski LA},
doi = {doi:10.1214/aos/1015345962},
journal = {Chapter in Adaptive Treatment Strategies In Practice, ASA-SIAM Statistics and Applied Probability Series, 2015},
title = {Constrained estimation for competing outcomes},
volume = {29},
year = {2001}
}
@article{Chakraborty2014,
abstract = {A dynamic treatment regime consists of a sequence of decision rules, one per stage of intervention, that dictate how to individualize treatments to patients based on evolving treatment and covariate history. These regimes are particularly useful for managing chronic disorders, and fit well into the larger paradigm of personalized medicine. They provide one way to operationalize a clinical decision support system. Statistics plays a key role in the construction of evidence-based dynamic treatment regimes - informing best study design as well as efficient estimation and valid inference. Due to the many novel methodological challenges it offers, this area has been growing in popularity among statisticians in recent years. In this article, we review the key developments in this exciting field of research. In particular, we discuss the sequential multiple assignment randomized trial designs, estimation techniques like Q-learning and marginal structural models, and several inference techniques designed to address the associated non-standard asymptotics. We reference software, whenever available. We also outline some important future directions.},
archivePrefix = {arXiv},
arxivId = {NIHMS150003},
author = {Chakraborty, Bibhas and Murphy, Susan A.},
doi = {10.1146/annurev-statistics-022513-115553},
eprint = {NIHMS150003},
isbn = {8015815967},
issn = {2326-8298},
journal = {Annual Review of Statistics and Its Application},
number = {1},
pages = {447--464},
pmid = {21959306},
title = {{Dynamic Treatment Regimes}},
url = {http://www.annualreviews.org/doi/10.1146/annurev-statistics-022513-115553},
volume = {1},
year = {2014}
}
@article{Nahum2012,
abstract = {Increasing interest in individualizing and adapting intervention services over time has led to the development of adaptive interventions. Adaptive interventions operationalize the individualization of a sequence of intervention options over time via the use of decision rules that input participant information and output intervention recommendations. We introduce Q-learning, which is a generalization of regression analysis to settings in which a sequence of decisions regarding intervention options or services is made. The use of Q is to indicate that this method is used to assess the relative quality of the intervention options. In particular, we use Q-learning with linear regression to estimate the optimal (i.e., most effective) sequence of decision rules. We illustrate how Q-learning can be used with data from sequential multiple assignment randomized trials (SMARTs; Murphy, 2005) to inform the construction of a more deeply tailored sequence of decision rules than those embedded in the SMART design. We also discuss the advantages of Q-learning compared to other data analysis approaches. Finally, we use the Adaptive Interventions for Children With ADHD SMART study (Center for Children and Families, University at Buffalo, State University of New York, William E. Pelham as principal investigator) for illustration.},
archivePrefix = {arXiv},
arxivId = {NIHMS150003},
author = {Nahum-Shani, Inbal and Qian, Min and Almirall, Daniel and Pelham, William E. and Gnagy, Beth and Fabiano, Gregory A. and Waxmonsky, James G. and Yu, Jihnhee and Murphy, Susan A.},
doi = {10.1037/a0029373},
eprint = {NIHMS150003},
isbn = {1939-1463 (Electronic)$\backslash$r1082-989X (Linking)},
issn = {1939-1463},
journal = {Psychological Methods},
number = {4},
pages = {478--494},
pmid = {23025434},
title = {{Q-learning: A data analysis method for constructing adaptive interventions.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/a0029373},
volume = {17},
year = {2012}
}
@article{Schulte2014,
abstract = {In clinical practice, physicians make a series of treatment decisions over the course of a patient's disease based on his/her baseline and evolving characteristics. A dynamic treatment regime is a set of sequential decision rules that operationalizes this process. Each rule corresponds to a decision point and dictates the next treatment action based on the accrued information. Using existing data, a key goal is estimating the optimal regime, that, if followed by the patient population, would yield the most favorable outcome on average. Q- and A-learning are two main approaches for this purpose. We provide a detailed account of these methods, study their performance, and illustrate them using data from a depression study.},
archivePrefix = {arXiv},
arxivId = {arXiv:1202.4177v3},
author = {Schulte, Phillip J. and Tsiatis, Anastasios A. and Laber, Eric B. and Davidian, Marie},
doi = {10.1214/13-STS450},
eprint = {arXiv:1202.4177v3},
isbn = {3143627344},
issn = {0883-4237},
journal = {Statistical Science},
number = {4},
pages = {640--661},
pmid = {25620840},
title = {{Q- and A-Learning Methods for Estimating Optimal Dynamic Treatment Regimes}},
url = {http://projecteuclid.org/euclid.ss/1421330551},
volume = {29},
year = {2014}
}
@article{gestimation,
ISSN = {10443983},
URL = {http://www.jstor.org/stable/3702734},
abstract = {AIDS Clinical Trial Group Randomized Trial 002 compared the effect of high-dose with low-dose 3-azido-3-deoxythymidine (AZT) on the survival of AIDS patients. Embedded within the trial was an essentially uncontrolled observational study of the effect of prophylaxis therapy for pneumocystis carinii pneumonia on survival. In this paper, we estimate the causal effect of prophylaxis therapy on survival by using the method of G-estimation to estimate the parameters of a structural nested failure time model (SNFTM). Our SNFTM relates a subject's observed time of death and observed prophylaxis history to the time the subject would have died if, possibly contrary to fact, prophylaxis therapy had been withheld. We find that, under our assumptions, the data are consistent with prophylaxis therapy increasing survival by 16% or decreasing survival by 18% at the α = 0.05 level. The analytic approach proposed in this paper will be necessary to control bias in any epidemiologic study in which there exists a time-dependent risk factor for death, such as pneumocystis carinii pneumonia history, that (A1) influences subsequent exposure to the agent under study, for example, prophylaxis therapy, and (A2) is itself influenced by past exposure to the study agent. Conditions A1 and A2 will be true whenever there exists a time-dependent risk factor that is simultaneously a confounder and an intermediate variable.},
author = {James M. Robins and Donald Blevins and Grant Ritter and Michael Wulfsohn},
journal = {Epidemiology},
number = {4},
pages = {319-336},
publisher = {Lippincott Williams & Wilkins},
title = {G-Estimation of the Effect of Prophylaxis Therapy for Pneumocystis carinii Pneumonia on the Survival of AIDS Patients},
volume = {3},
year = {1992}
}
@article{Rubin2005,
abstract = {Causal effects are defined as comparisons of potential outcomes under different treatments on a common set of units. Observed values of the potential outcomes are revealed by the assignment mechanism?a probabilistic model for the treatment each unit receives as a function of covariates and potential outcomes. Fisher made tremendous contributions to causal inference through his work on the design of randomized experiments, but the potential outcomes perspective applies to other complex experiments and nonrandomized studies as well. As noted by Kempthorne in his 1976 discussion of Savage's Fisher lecture, Fisher never bridged his work on experimental design and his work on parametric modeling, a bridge that appears nearly automatic with an appropriate view of the potential outcomes framework, where the potential outcomes and covariates are given a Bayesian distribution to complete the model specification. Also, this framework crisply separates scientific inference for causal effects and decisions based on such inference, a distinction evident in Fisher's discussion of tests of significance versus tests in an accept/reject framework. But Fisher never used the potential outcomes framework, originally proposed by Neyman in the context of randomized experiments, and as a result he provided generally flawed advice concerning the use of the analysis of covariance to adjust for posttreatment concomitants in randomized trials.},
author = {Rubin, Donald B.},
doi = {10.1198/01621450400000188O},
isbn = {01621459},
issn = {0162-1459},
journal = {Journal of the American Statistical Association},
keywords = {Analysis fo covariance,Baysian inference,Fieller-Creasy,Fischer,Neyman,Observational studies,Principle stratification,Randomized experiments,Rubin causal model,assignment mechanism,assignment-based causal inference,direct causal effects},
number = {469},
pages = {322--331},
title = {{Causal inference using potential outcomes: Design, modeling, decisions}},
volume = {100},
year = {2005}
}
@article{Ventola2014,
abstract = {Health care professionals' use of mobile devices is transforming clinical practice. Numerous medical software applications can now help with tasks ranging from information and time management to clinical decision-making at the point of care.},
author = {Ventola, C Lee},
isbn = {1052-1372 (Print)$\backslash$r1052-1372 (Linking)},
issn = {1052-1372},
journal = {P {\&} T : a peer-reviewed journal for formulary management},
number = {5},
pages = {356--64},
pmid = {24883008},
title = {{Mobile devices and apps for health care professionals: uses and benefits.}},
volume = {39},
year = {2014}
}
@@@@@@@
@article{Neyman,
abstract = {In the portion of the paper translated here, Neyman introduces a model for the analysis of field experiments conducted for the purpose of comparing a number of crop varieties, which makes use of a double-indexed array of unknown potential yields, one index corresponding to varieties and the other to plots. The yield corresponding to only one variety will be observed on any given plot, but through an urn model embodying sampling without replacement from this doubly indexed array, Neyman obtains a formula for the variance of the difference between the averages of the observed yields of two varieties. This variance involves the variance over all plots of the potential yields and the correlation coefficient r between the potential yields of the two varieties on the same plot. Since it is impossible to estimate r directly, Neyman advises taking r=1, observing that in practice this may lead to using too large an estimated standard deviation, when comparing two variety means.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {{Jerzy Splawa-Neyman, D. M. Dabrowska}, and T. P. Speed},
doi = {10.1214/ss/1177012032},
eprint = {arXiv:1011.1669v3},
isbn = {08834237},
issn = {0883-4237},
journal = {Statistical Science},
number = {4},
pages = {465--472},
pmid = {20948974},
title = {{On the Application of Probability Theory to Agricultural Experiments. Essay on Principles. Section 9}},
volume = {5},
year = {1990}
}