Skip to the content.

Csvdatabase_ipynb_2_ • 13 min read

import pandas as pd

# Replace 'path_to_your_csv_file' with the actual path where your CSV file is located
file_path = '/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv'

# Load the CSV file into a pandas DataFrame
lebron_career_df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the data was loaded correctly
print(lebron_career_df.head())
   game        date     age team  opp   result     mp  fg  fga    fgp  ...  \
0     1  2003-10-29  18-303  CLE  SAC  L (-14)  42:00  12   20  0.600  ...   
1     2  2003-10-30  18-304  CLE  PHO   L (-9)  41:00   8   17  0.471  ...   
2     3  2003-11-01  18-306  CLE  POR  L (-19)  39:00   3   12  0.250  ...   
3     4  2003-11-05  18-310  CLE  DEN   L (-4)  41:00   3   11  0.273  ...   
4     5  2003-11-07  18-312  CLE  IND   L (-1)  44:00   8   18  0.444  ...   

   orb  drb  trb  ast  stl  blk  tov  pts  game_score  minus_plus  
0    2    4    6    9    4    0    2   25        24.7          -9  
1    2   10   12    8    1    0    7   21        14.7          -3  
2    0    4    4    6    2    0    2    8         5.0         -21  
3    2    9   11    7    2    3    2    7        11.2          -3  
4    0    5    5    3    0    0    7   23         9.0          -7  

[5 rows x 26 columns]

Clean Data

# Selecting only required columns
selected_columns = ['opp', 'mp', 'pts', 'fga', 'fgp', 'ast', 'orb', 'drb', 'stl', 'blk', 'tov']
lebron_career_cleaned = lebron_career_df[selected_columns].copy()

# You may need to handle missing values if any
lebron_career_cleaned.dropna(inplace=True)

# You might need to convert minutes_played to a numeric format if it's not already
# lebron_career_cleaned['minutes_played'] = pd.to_numeric(lebron_career_cleaned['minutes_played'], errors='coerce')

# Optionally, you can perform additional preprocessing if required
# For example, encoding categorical variables

print(lebron_career_cleaned.columns)
display(lebron_career_cleaned)

Index(['opp', 'mp', 'pts', 'fga', 'fgp', 'ast', 'orb', 'drb', 'stl', 'blk',
       'tov'],
      dtype='object')
opp mp pts fga fgp ast orb drb stl blk tov
0 SAC 2520 25 20 0.600 9 2 4 4 0 2
1 PHO 2460 21 17 0.471 8 2 10 1 0 7
2 POR 2340 8 12 0.250 6 0 4 2 0 2
3 DEN 2460 7 11 0.273 7 2 9 2 3 2
4 IND 2640 23 18 0.444 3 0 5 0 0 7
... ... ... ... ... ... ... ... ... ... ... ...
1127 PHO 1993 28 14 0.500 11 0 12 3 2 5
1128 POR 2465 35 25 0.600 6 0 14 2 3 6
1129 CHI 2393 33 26 0.577 12 1 12 1 2 6
1130 MIL 2401 40 29 0.552 10 5 7 2 1 6
1131 TOR 2364 35 19 0.579 17 1 6 1 0 0

1132 rows × 11 columns

Machine Learning

import pandas as pd

# Load LeBron career data
lebron_career_df = pd.read_csv('/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv')

# Select relevant columns
selected_columns = ['opp', 'mp', 'pts', 'ast', 'orb', 'drb', 'stl', 'blk', 'tov', 'fg', 'fga']

# Filter data for a specific opponent
def filter_opponent(data, opponent):
    return data[data['opp'] == opponent]

# Get LeBron's last 10 matchups against a specific opponent
def last_10_matchups(data):
    return data.tail(10)

# Ask for the opponent team
opponent = input("Enter the opponent team: ")

# Filter data for the specified opponent
opponent_data = filter_opponent(lebron_career_df, opponent)

# Get LeBron's last 10 matchups against the opponent
last_10_games = last_10_matchups(opponent_data)

# Convert 'mp' to minutes
def convert_to_minutes(time_str):
    hours, minutes = map(int, time_str.split(':'))
    return hours * 60 + minutes

last_10_games['mp'] = last_10_games['mp'].apply(convert_to_minutes)

# Calculate rebounds as offensive and defensive combined
last_10_games['rebounds'] = last_10_games['drb'] + last_10_games['orb']

# Calculate field goal percentage
last_10_games['fg_percentage'] = (last_10_games['fg'] / last_10_games['fga']) * 100

# Calculate average stats for minutes played, points, assists, rebounds, steals, blocks, turnovers, and field goal percentage
average_stats = last_10_games[['mp', 'pts', 'ast', 'rebounds', 'stl', 'blk', 'tov', 'fg_percentage']].mean()

# Round average stats to the nearest tenth value
average_stats_rounded = average_stats.round(1)

# Convert average minutes played to hours
average_minutes_played_hours = round(average_stats_rounded['mp'] / 60, 1)

# Print average stats
print("Average stats for the last 10 matchups against", opponent, ":")
print("Minutes played:", average_minutes_played_hours)
print("Points:", average_stats_rounded['pts'])
print("Field Goal Percentage:", average_stats_rounded['fg_percentage'])
print("Assists:", average_stats_rounded['ast'])
print("Rebounds:", average_stats_rounded['rebounds'])
print("Steals:", average_stats_rounded['stl'])
print("Blocks:", average_stats_rounded['blk'])
print("Turnovers:", average_stats_rounded['tov'])

Average stats for the last 10 matchups against LAL :
Minutes played: 36.0
Points: 26.6
Field Goal Percentage: 51.5
Assists: 7.6
Rebounds: 8.2
Steals: 1.1
Blocks: 0.7
Turnovers: 3.0


/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:31: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['mp'] = last_10_games['mp'].apply(convert_to_minutes)
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:34: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['rebounds'] = last_10_games['drb'] + last_10_games['orb']
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:37: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['fg_percentage'] = (last_10_games['fg'] / last_10_games['fga']) * 100