https://github.com/trinker/wakefield
if (!require("pacman")) install.packages("pacman") pacman::p_load_gh("trinker/wakefield") pacman::p_load(dplyr, tidyr, ggplot2)
library(wakefield)
race(n=10)
attributes(race(n=10))
#임의의 데이터 프레임 만들기 (49개 항목) r_data_frame()
r_data_frame( n = 500, race )
r_data_frame( n = 500, id, race, age, sex, hour, iq, height, died )
r_data_frame( n = 500, id, age, age, age, grade, grade, grade )
r_data_frame( n = 500, id, Scoring = rnorm, Smoker = valid, race, age, sex, hour, iq, height, died )
r_data_frame(
n = 500,
id,
Scoring = rnorm,
Smoker = valid,
Reading(mins)
= rpois(lambda=20), #포아송 분포, 평균 20
race,
age(x = 8:14),
sex,
hour,
iq,
height(mean=50, sd = 10),
died
)
r_data_frame( n = 30, id, race, age, sex, hour, iq, height, died, Scoring = rnorm, Smoker = valid ) %>% r_na(prob=.4)
set.seed(10) r_series(likert, j = 3, n=10) help("likert") # 설문조사시 5종, 강한긍정 ~ 강한부정
set.seed(10) as_integer(r_series(likert, j = 5, n=10, name = "Item")) # 숫자로
set.seed(10) r_data_frame(n=100, id, age, sex, r_series(likert, 3, name = "Question") #3종 설문조사 질문을 넣어 만듦 )
set.seed(10) r_data_frame(n=100, id, age, sex, r_series(likert, 5, name = "Item", integer = TRUE) #숫자로 5종 설문조사 대답 )
#r_data_frame 이 아니라 r_seriese 로도 만들수 있음
r_series(grade, j = 5, n = 100, relate = "+1_6") %>% ggplot(aes(Grade_1))+geom_histogram()
#relate : fM_sd : f(+, -, *, /), M(mean), sd(stard deviation) r_series(age, 5, 100, relate = "+5_0") r_series(likert, 5, 100, name ="Item", relate = "-.5_.1") r_series(grade, j = 5, n = 100, relate = "*1.05_.1")
library(corrplot) corrplot(round(cor(r_series(grade, 8, 10, relate = "-1_2")), 2)) round(cor(r_series(grade, 8, 10, relate = "+1_2")), 2) round(cor(r_series(grade, 8, 10, relate = "+1_0")), 2)
round(cor(r_series(grade, 8, 10, relate = "+1_20")), 2) round(cor(r_series(grade, 8, 10, relate = "+15_20")), 2)
dat <- r_data_frame(12, name, r_series(grade, 100, relate = "-3_6") )
#시각화 (Pivot_longer 대신 gather를 사용) dat %>% gather(Time, Grade, -c(Name)) %>% mutate(Time = as.numeric(gsub("\D", "", Time))) %>% ggplot(aes(x = Time, y = Grade, color = Name, group = Name)) + geom_line(size=.8) + theme_bw()
set.seed(10) r_data_frame(n=100, id, age, r_dummy(sex, prefix = TRUE), #0, 1로 dummy 변수로 만듦 r_dummy(political) )
set.seed(10)
r_data_frame(n=100, id, #문자 dob, #생일 날짜 animal,#범주(동물) grade, grade, #숫자/ 학점 death, #사망여부 dummy, #0, 1 grade_letter, #A+ ~ F gender,#Male, Female paragraph, #단락 sentence #문장 ) %>% r_na() %>% #중간중간 NA 넣기 plot(palette = "Set1") #컬럼별 변주 종류 확인, NA 포함