import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('~/Desktop/MGTA463/NY property data.csv')
df.shape
(1070994, 32)
df.describe()
RECORD | B | BLOCK | LOT | LTFRONT | LTDEPTH | STORIES | FULLVAL | AVLAND | AVTOT | ... | EXTOT | EXCD1 | ZIP | BLDFRONT | BLDDEPTH | AVLAND2 | AVTOT2 | EXLAND2 | EXTOT2 | EXCD2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1.070994e+06 | 1.070994e+06 | 1.070994e+06 | 1.070994e+06 | 1.070994e+06 | 1.070994e+06 | 1.014730e+06 | 1.070994e+06 | 1.070994e+06 | 1.070994e+06 | ... | 1.070994e+06 | 638488.000000 | 1.041104e+06 | 1.070994e+06 | 1.070994e+06 | 2.827260e+05 | 2.827320e+05 | 8.744900e+04 | 1.308280e+05 | 92948.000000 |
mean | 5.354975e+05 | 3.215427e+00 | 4.757239e+03 | 3.647221e+02 | 3.663530e+01 | 8.886159e+01 | 5.006918e+00 | 8.742645e+05 | 8.506792e+04 | 2.272382e+05 | ... | 9.118698e+04 | 1602.014232 | 1.092396e+04 | 2.304277e+01 | 3.992284e+01 | 2.462357e+05 | 7.139114e+05 | 3.512357e+05 | 6.567683e+05 | 1364.041679 |
std | 3.091695e+05 | 1.201197e+00 | 3.676579e+03 | 8.532152e+02 | 7.403284e+01 | 7.639628e+01 | 8.365707e+00 | 1.158243e+07 | 4.057260e+06 | 6.877529e+06 | ... | 6.508403e+06 | 1384.226741 | 5.284208e+02 | 3.557970e+01 | 4.270715e+01 | 6.178963e+06 | 1.165253e+07 | 1.080221e+07 | 1.607251e+07 | 1094.705653 |
min | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ... | 0.000000e+00 | 1010.000000 | 1.000100e+04 | 0.000000e+00 | 0.000000e+00 | 3.000000e+00 | 3.000000e+00 | 1.000000e+00 | 7.000000e+00 | 1011.000000 |
25% | 2.677492e+05 | 3.000000e+00 | 1.555000e+03 | 2.300000e+01 | 1.900000e+01 | 8.000000e+01 | 2.000000e+00 | 3.040000e+05 | 9.180000e+03 | 1.837400e+04 | ... | 0.000000e+00 | 1017.000000 | 1.031400e+04 | 1.500000e+01 | 2.600000e+01 | 5.705000e+03 | 3.391200e+04 | 2.090000e+03 | 2.870000e+03 | 1017.000000 |
50% | 5.354975e+05 | 3.000000e+00 | 4.047000e+03 | 4.900000e+01 | 2.500000e+01 | 1.000000e+02 | 2.000000e+00 | 4.470000e+05 | 1.367800e+04 | 2.534000e+04 | ... | 1.620000e+03 | 1017.000000 | 1.121400e+04 | 2.000000e+01 | 3.900000e+01 | 2.014500e+04 | 7.996250e+04 | 3.048000e+03 | 3.706200e+04 | 1017.000000 |
75% | 8.032458e+05 | 4.000000e+00 | 6.898000e+03 | 1.430000e+02 | 4.000000e+01 | 1.000000e+02 | 3.000000e+00 | 6.190000e+05 | 1.974000e+04 | 4.543800e+04 | ... | 2.090000e+03 | 1017.000000 | 1.136400e+04 | 2.400000e+01 | 5.000000e+01 | 6.264000e+04 | 2.405510e+05 | 3.177900e+04 | 1.068408e+05 | 1017.000000 |
max | 1.070994e+06 | 5.000000e+00 | 1.635000e+04 | 9.978000e+03 | 9.999000e+03 | 9.999000e+03 | 1.190000e+02 | 6.150000e+09 | 2.668500e+09 | 4.668309e+09 | ... | 4.668309e+09 | 7170.000000 | 3.380300e+04 | 7.575000e+03 | 9.393000e+03 | 2.371005e+09 | 4.501180e+09 | 2.371005e+09 | 4.501180e+09 | 7160.000000 |
8 rows × 21 columns
# build a list to remove those properties that are not interested, mainly property owned by city, state or government
remove_list = ['PARKCHESTER PRESERVAT',
'PARKS AND RECREATION',
'DCAS',
'HOUSING PRESERVATION',
'CITY OF NEW YORK',
'DEPT OF ENVIRONMENTAL',
'BOARD OF EDUCATION',
'NEW YORK CITY HOUSING',
'CNY/NYCTA',
'NYC HOUSING PARTNERSH',
'DEPARTMENT OF BUSINES',
'DEPT OF TRANSPORTATIO',
'MTA/LIRR',
'PARCKHESTER PRESERVAT',
'MH RESIDENTIAL 1, LLC',
'LINCOLN PLAZA ASSOCIA',
'UNITED STATES OF AMER',
'U S GOVERNMENT OWNRD',
'THE CITY OF NEW YORK',
'NYS URBAN DEVELOPMENT',
'NYS DEPT OF ENVIRONME',
'CULTURAL AFFAIRS',
'DEPT OF GENERAL SERVI',
'DEPT RE-CITY OF NY']
# create a new dataset that has already remove all the unwanted property
property_data = df[~df['OWNER'].isin(remove_list)].reset_index(drop=True)
# check whether the unwanted record is removed
missing_zips = np.where(pd.isnull(property_data["ZIP"]))[0]
len(missing_zips)
21772
for i in range(len(missing_zips)):
if(property_data.loc[missing_zips[i] + 1, "ZIP"] == property_data.loc[missing_zips[i] - 1, "ZIP"]):
property_data.loc[missing_zips[i] , "ZIP"] = property_data.loc[missing_zips[i] - 1, "ZIP"]
for i in range(len(missing_zips)):
property_data.loc[missing_zips[i] , "ZIP"] = property_data.loc[missing_zips[i] - 1, "ZIP"]
missing_zips = np.where(pd.isnull(property_data["ZIP"]))[0]
len(missing_zips)
0
# filling missing value for FULLVAL, AVLAND, AVTOT
property_data["FULLVAL"].replace("NaN", 0)
temp = property_data[property_data["FULLVAL"]!= 0]
mean_fullval = temp.groupby("TAXCLASS")["FULLVAL"].mean()
print(mean_fullval)
TAXCLASS 1 5.700059e+05 1A 3.352942e+05 1B 3.738399e+05 1C 7.615359e+05 1D 2.233614e+07 2 8.005832e+05 2A 8.639066e+05 2B 1.254525e+06 2C 7.723493e+05 3 8.510005e+04 4 2.772747e+06 Name: FULLVAL, dtype: float64
property_data["AVLAND"].replace("NaN", 0)
temp_avland = property_data[property_data["AVLAND"]!=0]
mean_avland = temp_avland.groupby("TAXCLASS")["AVLAND"].mean()
property_data["AVTOT"].replace("NaN", 0)
temp_avtot = property_data[property_data["AVTOT"]!=0]
mean_avtot = temp_avtot.groupby("TAXCLASS")["AVTOT"].mean()
%%time
for index in mean_fullval.index:
property_data.loc[(property_data["FULLVAL"] == 0)& (property_data["TAXCLASS"] == index) , "FULLVAL"] = mean_fullval[index]
property_data.loc[(property_data["AVLAND"] == 0)& (property_data["TAXCLASS"] == index) , "AVLAND"] = mean_avland[index]
property_data.loc[(property_data["AVTOT"] == 0)& (property_data["TAXCLASS"] == index) , "AVTOT"] = mean_avtot[index]
CPU times: user 1.61 s, sys: 10.1 ms, total: 1.62 s Wall time: 1.59 s
# Filling missing value for STORIES
temp = property_data[property_data["STORIES"].isnull()]
len(temp)
43968
temp['TAXCLASS'].value_counts()
1B 19055 4 17979 2 3434 3 2416 1 877 2C 138 2B 34 2A 30 1A 5 Name: TAXCLASS, dtype: int64
mean_stories = property_data.groupby("TAXCLASS")["STORIES"].mean()
print(mean_stories)
TAXCLASS 1 2.111633 1A 1.656867 1B 4.000000 1C 3.052748 1D 1.068966 2 16.310549 2A 2.844032 2B 4.001635 2C 4.744977 3 1.000000 4 5.517257 Name: STORIES, dtype: float64
len(property_data[property_data["STORIES"] == 0])
0
property_data["STORIES"] = property_data["STORIES"].fillna(value=0)
for index in mean_stories.index:
property_data.loc[ (property_data["STORIES"] == 0) & ( property_data["TAXCLASS"] == index), "STORIES"] = mean_stories[index]
property_data.head().transpose()
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
RECORD | 1 | 2 | 9 | 10 | 11 |
BBLE | 1000010101 | 1000010201 | 1000041001 | 1000041002 | 1000041003 |
B | 1 | 1 | 1 | 1 | 1 |
BLOCK | 1 | 1 | 4 | 4 | 4 |
LOT | 101 | 201 | 1001 | 1002 | 1003 |
EASEMENT | NaN | NaN | NaN | NaN | NaN |
OWNER | U S GOVT LAND & BLDGS | U S GOVT LAND & BLDGS | TRZ HOLDINGS, LLC | TRZ HOLDINGS, LLC | TRZ HOLDINGS, LLC |
BLDGCL | P7 | Z9 | R5 | R5 | R5 |
TAXCLASS | 4 | 4 | 4 | 4 | 4 |
LTFRONT | 500 | 27 | 0 | 0 | 0 |
LTDEPTH | 1046 | 0 | 0 | 0 | 0 |
EXT | NaN | NaN | NaN | NaN | NaN |
STORIES | 5.517257 | 5.517257 | 50.0 | 50.0 | 50.0 |
FULLVAL | 21400000.0 | 193800000.0 | 3944762.0 | 5700930.0 | 10600000.0 |
AVLAND | 4225500.0 | 14310000.0 | 636093.0 | 919276.0 | 967500.0 |
AVTOT | 9630000.0 | 87210000.0 | 1775143.0 | 2565419.0 | 4770000.0 |
EXLAND | 4225500.0 | 14310000.0 | 0.0 | 0.0 | 0.0 |
EXTOT | 9630000.0 | 87210000.0 | 0.0 | 0.0 | 0.0 |
EXCD1 | 4600.0 | 4600.0 | NaN | NaN | NaN |
STADDR | 1 LIBERTY ISLAND | 1 ELLIS ISLAND | 1 WATER STREET | 1 WATER STREET | 1 WATER STREET |
ZIP | 10004.0 | 10004.0 | 10004.0 | 10004.0 | 10004.0 |
EXMPTCL | X3 | X3 | NaN | NaN | NaN |
BLDFRONT | 0 | 0 | 0 | 0 | 0 |
BLDDEPTH | 0 | 0 | 0 | 0 | 0 |
AVLAND2 | 3775500.0 | 11111400.0 | 636093.0 | 919276.0 | 967500.0 |
AVTOT2 | 8613000.0 | 80690400.0 | 2049290.0 | 2961617.0 | 5483912.0 |
EXLAND2 | 3775500.0 | 11111400.0 | NaN | NaN | NaN |
EXTOT2 | 8613000.0 | 80690400.0 | NaN | NaN | NaN |
EXCD2 | NaN | NaN | NaN | NaN | NaN |
PERIOD | FINAL | FINAL | FINAL | FINAL | FINAL |
YEAR | 2010/11 | 2010/11 | 2010/11 | 2010/11 | 2010/11 |
VALTYPE | AC-TR | AC-TR | AC-TR | AC-TR | AC-TR |
# property_data.loc[property_data['LTFRONT']==0,'LTFRONT']=np.nan
# property_data.loc[property_data['LTDEPTH']==0,'LTDEPTH']=np.nan
# property_data.loc[property_data['BLDFRONT']==0,'BLDFRONT']=np.nan
# property_data.loc[property_data['BLDDEPTH']==0,'BLDDEPTH']=np.nan
# property_data.loc[property_data['LTFRONT']==1,'LTFRONT']=np.nan
# property_data.loc[property_data['LTDEPTH']==1,'LTDEPTH']=np.nan
# property_data.loc[property_data['BLDFRONT']==1,'BLDFRONT']=np.nan
# property_data.loc[property_data['BLDDEPTH']==1,'BLDDEPTH']=np.nan
# Filling missing value for Building Sizes
f = property_data[['LTFRONT','LTDEPTH','BLDFRONT','BLDDEPTH']]
# replace 0 and 1 values in LTFRONT,LTDEPTH,BLDFRONT,BLDDEPTH with np.nan(not taken into account in mean)
f = f.replace(0,np.nan)
f = f.replace(1,np.nan)
property_data[['LTFRONT','LTDEPTH','BLDFRONT','BLDDEPTH']] = f
property_data
RECORD | BBLE | B | BLOCK | LOT | EASEMENT | OWNER | BLDGCL | TAXCLASS | LTFRONT | ... | BLDFRONT | BLDDEPTH | AVLAND2 | AVTOT2 | EXLAND2 | EXTOT2 | EXCD2 | PERIOD | YEAR | VALTYPE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1000010101 | 1 | 1 | 101 | NaN | U S GOVT LAND & BLDGS | P7 | 4 | 500.0 | ... | NaN | NaN | 3775500.0 | 8613000.0 | 3775500.0 | 8613000.0 | NaN | FINAL | 2010/11 | AC-TR |
1 | 2 | 1000010201 | 1 | 1 | 201 | NaN | U S GOVT LAND & BLDGS | Z9 | 4 | 27.0 | ... | NaN | NaN | 11111400.0 | 80690400.0 | 11111400.0 | 80690400.0 | NaN | FINAL | 2010/11 | AC-TR |
2 | 9 | 1000041001 | 1 | 4 | 1001 | NaN | TRZ HOLDINGS, LLC | R5 | 4 | NaN | ... | NaN | NaN | 636093.0 | 2049290.0 | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
3 | 10 | 1000041002 | 1 | 4 | 1002 | NaN | TRZ HOLDINGS, LLC | R5 | 4 | NaN | ... | NaN | NaN | 919276.0 | 2961617.0 | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
4 | 11 | 1000041003 | 1 | 4 | 1003 | NaN | TRZ HOLDINGS, LLC | R5 | 4 | NaN | ... | NaN | NaN | 967500.0 | 5483912.0 | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1046821 | 1070990 | 5080500083 | 5 | 8050 | 83 | NaN | TOBIN, GALE | A1 | 1 | 60.0 | ... | 30.0 | 26.0 | NaN | NaN | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
1046822 | 1070991 | 5080500086 | 5 | 8050 | 86 | NaN | SHERRI MILINAZZO | A1 | 1 | 62.0 | ... | 32.0 | 26.0 | NaN | NaN | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
1046823 | 1070992 | 5080500089 | 5 | 8050 | 89 | NaN | JOHN GERVASI | A1 | 1 | 53.0 | ... | 24.0 | 32.0 | NaN | NaN | NaN | NaN | 1017.0 | FINAL | 2010/11 | AC-TR |
1046824 | 1070993 | 5080500092 | 5 | 8050 | 92 | NaN | RITA M MOOG | A1 | 1 | 52.0 | ... | 32.0 | 21.0 | NaN | NaN | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
1046825 | 1070994 | 5080500094 | 5 | 8050 | 94 | NaN | EDWARD DONOHUE | A1 | 1 | 50.0 | ... | 30.0 | 24.0 | NaN | NaN | NaN | NaN | NaN | FINAL | 2010/11 | AC-TR |
1046826 rows × 32 columns
mean_LTFRONT = property_data.groupby(property_data['TAXCLASS'])['LTFRONT'].mean()
mean_LTDEPTH = property_data.groupby(property_data['TAXCLASS'])['LTDEPTH'].mean()
mean_BLDFRONT = property_data.groupby(property_data['TAXCLASS'])['BLDFRONT'].mean()
mean_BLDDEPTH = property_data.groupby(property_data['TAXCLASS'])['BLDDEPTH'].mean()
# fill in missing value with the mean value obtained above
for index in mean_LTFRONT.index:
property_data.loc[(property_data['LTFRONT'].isnull())&(property_data['TAXCLASS'] == index),'LTFRONT'] = mean_LTFRONT[index]
property_data.loc[(property_data['LTDEPTH'].isnull())&(property_data['TAXCLASS'] == index),'LTDEPTH'] = mean_LTDEPTH[index]
property_data.loc[(property_data['BLDFRONT'].isnull())&(property_data['TAXCLASS'] == index),'BLDFRONT'] = mean_BLDFRONT[index]
property_data.loc[(property_data['BLDDEPTH'].isnull())&(property_data['TAXCLASS'] == index),'BLDDEPTH'] = mean_BLDDEPTH[index]
# convert zip into int from float
property_data["ZIP"] = property_data["ZIP"].astype(str)
property_data["zip3"] = property_data["ZIP"].str[:3]
property_data["ltsize"] = property_data["LTFRONT"] * property_data["LTDEPTH"]
property_data["bldsize"] = property_data["BLDFRONT"] * property_data["BLDDEPTH"]
property_data["bldvol"] = property_data["bldsize"] * property_data["STORIES"]
property_data["r1"] = property_data["FULLVAL"]/ property_data["ltsize"]
property_data["r2"] = property_data["FULLVAL"]/ property_data["bldsize"]
property_data["r3"] = property_data["FULLVAL"]/ property_data["bldvol"]
property_data["r4"] = property_data["AVLAND"] / property_data["ltsize"]
property_data["r5"] = property_data["AVLAND"] / property_data["bldsize"]
property_data["r6"] = property_data["AVLAND"] / property_data["bldvol"]
property_data["r7"] = property_data["AVTOT"] / property_data["ltsize"]
property_data["r8"] = property_data["AVTOT"] / property_data["bldsize"]
property_data["r9"] = property_data["AVTOT"] / property_data["bldvol"]
ninevars = ["r1","r2","r3","r4","r5","r6","r7","r8","r9"]
zip5_mean = property_data.groupby("ZIP")[ninevars].mean()
zip3_mean = property_data.groupby("zip3")[ninevars].mean()
taxclass_mean = property_data.groupby("TAXCLASS")[ninevars].mean()
borough_mean = property_data.groupby("B")[ninevars].mean()
property_data = property_data.join(zip5_mean, on = "ZIP", rsuffix = '_zip5')
property_data = property_data.join(zip3_mean, on = "zip3", rsuffix = '_zip3')
property_data = property_data.join(taxclass_mean, on = "TAXCLASS", rsuffix = '_taxclass')
property_data = property_data.join(borough_mean, on = "B", rsuffix = '_boro')
rsuffix = ['_zip5', '_zip3' , '_taxclass' , '_boro' ]
for var in ninevars:
for r in rsuffix:
property_data[str(var)+r] = property_data[var] /property_data[str(var)+r]
property_data.describe()
RECORD | B | BLOCK | LOT | LTFRONT | LTDEPTH | STORIES | FULLVAL | AVLAND | AVTOT | ... | r9_taxclass | r1_boro | r2_boro | r3_boro | r4_boro | r5_boro | r6_boro | r7_boro | r8_boro | r9_boro | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | ... | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 | 1.046826e+06 |
mean | 5.371221e+05 | 3.221716e+00 | 4.757325e+03 | 3.498403e+02 | 5.031814e+01 | 1.071913e+02 | 4.972769e+00 | 8.286345e+05 | 6.857326e+04 | 2.055624e+05 | ... | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 |
std | 3.080878e+05 | 1.198952e+00 | 3.677504e+03 | 8.258228e+02 | 6.073837e+01 | 5.472731e+01 | 8.213881e+00 | 8.277944e+06 | 2.051986e+06 | 5.883099e+06 | ... | 5.627885e+00 | 2.365937e+00 | 2.002136e+00 | 2.542284e+00 | 6.308802e+00 | 1.714687e+01 | 1.604233e+01 | 7.276792e+00 | 2.193179e+01 | 1.572688e+01 |
min | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 2.000000e+00 | 2.000000e+00 | 1.000000e+00 | 4.000000e+00 | 1.000000e+00 | 1.000000e+00 | ... | 4.896768e-06 | 2.317386e-06 | 1.323943e-06 | 3.128685e-07 | 9.337982e-07 | 4.413439e-06 | 9.937551e-07 | 3.875516e-07 | 8.187498e-06 | 2.777226e-06 |
25% | 2.731562e+05 | 3.000000e+00 | 1.542000e+03 | 2.300000e+01 | 2.100000e+01 | 9.900000e+01 | 2.000000e+00 | 3.180000e+05 | 9.698000e+03 | 1.892600e+04 | ... | 4.510961e-01 | 4.192226e-01 | 3.972038e-01 | 3.481865e-01 | 2.677718e-01 | 2.964940e-01 | 2.167336e-01 | 2.576625e-01 | 4.147249e-01 | 3.906492e-01 |
50% | 5.390825e+05 | 3.000000e+00 | 4.078000e+03 | 4.900000e+01 | 3.000000e+01 | 1.000000e+02 | 2.000000e+00 | 4.540000e+05 | 1.390400e+04 | 2.581500e+04 | ... | 8.398807e-01 | 8.383637e-01 | 9.485483e-01 | 9.177087e-01 | 6.692810e-01 | 7.748539e-01 | 6.965961e-01 | 5.143176e-01 | 6.964140e-01 | 6.729240e-01 |
75% | 8.027368e+05 | 4.000000e+00 | 6.920000e+03 | 1.390000e+02 | 5.500000e+01 | 1.100000e+02 | 4.000000e+00 | 6.280000e+05 | 2.008000e+04 | 4.704000e+04 | ... | 1.145319e+00 | 1.235402e+00 | 1.271407e+00 | 1.280152e+00 | 1.061934e+00 | 1.052615e+00 | 1.000643e+00 | 8.141981e-01 | 9.299754e-01 | 9.393327e-01 |
max | 1.070994e+06 | 5.000000e+00 | 1.635000e+04 | 9.502000e+03 | 9.999000e+03 | 9.619000e+03 | 1.190000e+02 | 5.279000e+09 | 1.792809e+09 | 4.668309e+09 | ... | 4.204082e+03 | 7.271440e+02 | 7.565810e+02 | 9.328696e+02 | 2.054622e+03 | 1.546654e+04 | 9.483223e+03 | 2.407601e+03 | 2.124127e+04 | 1.381933e+04 |
8 rows × 68 columns
t = property_data[['RECORD']]
property_data[['RECORD']]
RECORD | |
---|---|
0 | 1 |
1 | 2 |
2 | 9 |
3 | 10 |
4 | 11 |
... | ... |
1046821 | 1070990 |
1046822 | 1070991 |
1046823 | 1070992 |
1046824 | 1070993 |
1046825 | 1070994 |
1046826 rows × 1 columns
variables = property_data.loc[:,'r1':'r9_boro']
variables
r1 | r2 | r3 | r4 | r5 | r6 | r7 | r8 | r9 | r1_zip5 | ... | r9_taxclass | r1_boro | r2_boro | r3_boro | r4_boro | r5_boro | r6_boro | r7_boro | r8_boro | r9_boro | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 40.917782 | 3994.412456 | 723.985188 | 8.079350 | 788.709805 | 142.953243 | 18.413002 | 1797.485605 | 325.793334 | 0.096029 | ... | 4.715588 | 0.113902 | 7.712077 | 6.469193 | 0.241395 | 16.606462 | 13.685555 | 0.198288 | 14.042014 | 14.957255 |
1 | 57678.882190 | 36173.697854 | 6556.463990 | 4258.951518 | 2671.030012 | 484.122805 | 25955.496986 | 16278.164034 | 2950.408795 | 135.364878 | ... | 42.704717 | 160.560217 | 69.841147 | 58.585497 | 127.249178 | 56.239136 | 46.347248 | 279.513149 | 127.165526 | 135.454020 |
2 | 385.182796 | 736.308714 | 14.726174 | 62.110738 | 118.729804 | 2.374596 | 173.332268 | 331.338940 | 6.626779 | 0.903974 | ... | 0.095917 | 1.072230 | 1.421603 | 0.131586 | 1.855748 | 2.499883 | 0.227331 | 1.866605 | 2.588430 | 0.304237 |
3 | 556.662267 | 1064.105879 | 21.282118 | 89.761892 | 171.587267 | 3.431745 | 250.498069 | 478.847739 | 9.576955 | 1.306414 | ... | 0.138618 | 1.549576 | 2.054487 | 0.190167 | 2.681911 | 3.612808 | 0.328536 | 2.697598 | 3.740773 | 0.439680 |
4 | 1035.027624 | 1978.540749 | 39.570815 | 94.470682 | 180.588507 | 3.611770 | 465.762431 | 890.343337 | 17.806867 | 2.429076 | ... | 0.257740 | 2.881198 | 3.820001 | 0.353586 | 2.822600 | 3.802332 | 0.345771 | 5.015767 | 6.955390 | 0.817518 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1046821 | 60.833333 | 561.538462 | 224.615385 | 2.018889 | 18.635897 | 7.454359 | 3.239167 | 29.900000 | 11.960000 | 0.423922 | ... | 0.824086 | 0.398834 | 1.200624 | 0.879509 | 0.226228 | 1.008640 | 0.738326 | 0.150641 | 0.942288 | 0.685258 |
1046822 | 38.483305 | 653.846154 | 261.538462 | 0.658107 | 11.181490 | 4.472596 | 1.056098 | 17.943510 | 7.177404 | 0.268174 | ... | 0.494548 | 0.252303 | 1.397986 | 1.024085 | 0.073745 | 0.605181 | 0.442994 | 0.049115 | 0.565484 | 0.411235 |
1046823 | 33.174373 | 625.000000 | 312.500000 | 1.022185 | 19.257812 | 9.628906 | 1.587878 | 29.915365 | 14.957682 | 0.231178 | ... | 1.030636 | 0.217497 | 1.336310 | 1.223631 | 0.114542 | 1.042300 | 0.953706 | 0.073846 | 0.942772 | 0.857013 |
1046824 | 76.781674 | 808.035714 | 404.017857 | 1.751697 | 18.434524 | 9.217262 | 3.257494 | 34.281250 | 17.140625 | 0.535059 | ... | 1.181049 | 0.503394 | 1.727658 | 1.581981 | 0.196288 | 0.997741 | 0.912935 | 0.151494 | 1.080362 | 0.982087 |
1046825 | 118.250000 | 656.944444 | 328.472222 | 3.515250 | 19.529167 | 9.764583 | 5.598500 | 31.102778 | 15.551389 | 0.824034 | ... | 1.071545 | 0.775267 | 1.404611 | 1.286173 | 0.393904 | 1.056987 | 0.967145 | 0.260365 | 0.980193 | 0.891030 |
1046826 rows × 45 columns
ft = t.join(variables, how='outer')
ft.to_csv('NY_45variables.csv')
# round to 2 decimal point
stats = variables.describe().transpose().round(2)
#stats.to_csv('stats_on_vars.csv')