Data Info
data <- read.csv('creditcard.csv',stringsAsFactors = T)
data <- as_tibble(data)
str(data)
## tibble [30,000 x 25] (S3: tbl_df/tbl/data.frame)
## $ Customer.ID : int [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
## $ Credit_Amount : num [1:30000] 20000 220000 90000 50000 50000 50000 500000 200000 240000 20000 ...
## $ Gender : int [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
## $ Academic_Qualification: int [1:30000] 2 2 2 2 2 1 1 2 3 3 ...
## $ Marital : int [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
## $ Age_Years : int [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
## $ Repayment_Status_Jan : int [1:30000] 2 0 0 0 0 0 0 0 0 0 ...
## $ Repayment_Status_Feb : int [1:30000] 2 2 0 0 0 0 0 0 0 0 ...
## $ Repayment_Status_March: int [1:30000] 0 0 0 0 0 0 0 0 2 0 ...
## $ Repayment_Status_April: int [1:30000] 0 0 0 0 0 0 0 0 0 0 ...
## $ Repayment_Status_May : int [1:30000] 0 0 0 0 0 0 0 0 0 0 ...
## $ Repayment_Status_June : int [1:30000] 0 2 0 0 0 0 0 0 0 0 ...
## $ Jan_Bill_Amount : num [1:30000] 3933 3683 39339 46990 8637 ...
## $ Feb_Bill_Amount : num [1:30000] 3103 1735 14037 48333 5570 ...
## $ March_Bill_Amount : num [1:30000] 689 2682 23559 49292 35835 ...
## $ April_Bill_Amount : num [1:30000] 0 3272 24332 29324 20940 ...
## $ May_Bill_Amount : num [1:30000] 0 3455 14848 28858 18146 ...
## $ June_Bill_Amount : num [1:30000] 0 3261 15548 28547 18131 ...
## $ Previous_Payment_Jan : num [1:30000] 0 0 1619 3000 3000 ...
## $ Previous_Payment_Feb : num [1:30000] 679 2000 2500 2029 36672 ...
## $ Previous_Payment_March: num [1:30000] 0 1000 1000 1200 10000 657 59000 0 552 0 ...
## $ Previous_Payment_April: num [1:30000] 0 1000 1000 1100 9000 ...
## $ Previous_Payment_May : num [1:30000] 0 0 1000 1069 689 ...
## $ Previous_Payment_June : num [1:30000] 0 2000 5000 1000 679 ...
## $ Default_Payment : int [1:30000] 1 1 0 0 0 0 0 0 0 0 ...
propotion of event is smaller than proportion of non event
prop.table(table(data$Default_Payment))
##
## 0 1
## 0.7788 0.2212
no of rows and columns
## [1] 30000
## [1] 25