1 Data Info

data <- read.csv('creditcard.csv',stringsAsFactors = T)
data <- as_tibble(data)

str(data)
## tibble [30,000 x 25] (S3: tbl_df/tbl/data.frame)
##  $ Customer.ID           : int [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Credit_Amount         : num [1:30000] 20000 220000 90000 50000 50000 50000 500000 200000 240000 20000 ...
##  $ Gender                : int [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
##  $ Academic_Qualification: int [1:30000] 2 2 2 2 2 1 1 2 3 3 ...
##  $ Marital               : int [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
##  $ Age_Years             : int [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
##  $ Repayment_Status_Jan  : int [1:30000] 2 0 0 0 0 0 0 0 0 0 ...
##  $ Repayment_Status_Feb  : int [1:30000] 2 2 0 0 0 0 0 0 0 0 ...
##  $ Repayment_Status_March: int [1:30000] 0 0 0 0 0 0 0 0 2 0 ...
##  $ Repayment_Status_April: int [1:30000] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Repayment_Status_May  : int [1:30000] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Repayment_Status_June : int [1:30000] 0 2 0 0 0 0 0 0 0 0 ...
##  $ Jan_Bill_Amount       : num [1:30000] 3933 3683 39339 46990 8637 ...
##  $ Feb_Bill_Amount       : num [1:30000] 3103 1735 14037 48333 5570 ...
##  $ March_Bill_Amount     : num [1:30000] 689 2682 23559 49292 35835 ...
##  $ April_Bill_Amount     : num [1:30000] 0 3272 24332 29324 20940 ...
##  $ May_Bill_Amount       : num [1:30000] 0 3455 14848 28858 18146 ...
##  $ June_Bill_Amount      : num [1:30000] 0 3261 15548 28547 18131 ...
##  $ Previous_Payment_Jan  : num [1:30000] 0 0 1619 3000 3000 ...
##  $ Previous_Payment_Feb  : num [1:30000] 679 2000 2500 2029 36672 ...
##  $ Previous_Payment_March: num [1:30000] 0 1000 1000 1200 10000 657 59000 0 552 0 ...
##  $ Previous_Payment_April: num [1:30000] 0 1000 1000 1100 9000 ...
##  $ Previous_Payment_May  : num [1:30000] 0 0 1000 1069 689 ...
##  $ Previous_Payment_June : num [1:30000] 0 2000 5000 1000 679 ...
##  $ Default_Payment       : int [1:30000] 1 1 0 0 0 0 0 0 0 0 ...

1.1 propotion of event is smaller than proportion of non event

prop.table(table(data$Default_Payment))
## 
##      0      1 
## 0.7788 0.2212

1.2 no of rows and columns

nrow(data)
## [1] 30000
ncol(data)
## [1] 25