import pandas as pd
# Replace 'path_to_your_csv_file' with the actual path where your CSV file is located
file_path = '/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv'
# Load the CSV file into a pandas DataFrame
lebron_career_df = pd.read_csv(file_path)
# Display the first few rows of the DataFrame to verify the data was loaded correctly
print(lebron_career_df.head())
game date age team opp result mp fg fga fgp ... \
0 1 2003-10-29 18-303 CLE SAC L (-14) 42:00 12 20 0.600 ...
1 2 2003-10-30 18-304 CLE PHO L (-9) 41:00 8 17 0.471 ...
2 3 2003-11-01 18-306 CLE POR L (-19) 39:00 3 12 0.250 ...
3 4 2003-11-05 18-310 CLE DEN L (-4) 41:00 3 11 0.273 ...
4 5 2003-11-07 18-312 CLE IND L (-1) 44:00 8 18 0.444 ...
orb drb trb ast stl blk tov pts game_score minus_plus
0 2 4 6 9 4 0 2 25 24.7 -9
1 2 10 12 8 1 0 7 21 14.7 -3
2 0 4 4 6 2 0 2 8 5.0 -21
3 2 9 11 7 2 3 2 7 11.2 -3
4 0 5 5 3 0 0 7 23 9.0 -7
[5 rows x 26 columns]
Clean Data
# Selecting only required columns
selected_columns = ['opp', 'mp', 'pts', 'fga', 'fgp', 'ast', 'orb', 'drb', 'stl', 'blk', 'tov']
lebron_career_cleaned = lebron_career_df[selected_columns].copy()
# You may need to handle missing values if any
lebron_career_cleaned.dropna(inplace=True)
# You might need to convert minutes_played to a numeric format if it's not already
# lebron_career_cleaned['minutes_played'] = pd.to_numeric(lebron_career_cleaned['minutes_played'], errors='coerce')
# Optionally, you can perform additional preprocessing if required
# For example, encoding categorical variables
print(lebron_career_cleaned.columns)
display(lebron_career_cleaned)
Index(['opp', 'mp', 'pts', 'fga', 'fgp', 'ast', 'orb', 'drb', 'stl', 'blk',
'tov'],
dtype='object')
opp | mp | pts | fga | fgp | ast | orb | drb | stl | blk | tov | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | SAC | 2520 | 25 | 20 | 0.600 | 9 | 2 | 4 | 4 | 0 | 2 |
1 | PHO | 2460 | 21 | 17 | 0.471 | 8 | 2 | 10 | 1 | 0 | 7 |
2 | POR | 2340 | 8 | 12 | 0.250 | 6 | 0 | 4 | 2 | 0 | 2 |
3 | DEN | 2460 | 7 | 11 | 0.273 | 7 | 2 | 9 | 2 | 3 | 2 |
4 | IND | 2640 | 23 | 18 | 0.444 | 3 | 0 | 5 | 0 | 0 | 7 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1127 | PHO | 1993 | 28 | 14 | 0.500 | 11 | 0 | 12 | 3 | 2 | 5 |
1128 | POR | 2465 | 35 | 25 | 0.600 | 6 | 0 | 14 | 2 | 3 | 6 |
1129 | CHI | 2393 | 33 | 26 | 0.577 | 12 | 1 | 12 | 1 | 2 | 6 |
1130 | MIL | 2401 | 40 | 29 | 0.552 | 10 | 5 | 7 | 2 | 1 | 6 |
1131 | TOR | 2364 | 35 | 19 | 0.579 | 17 | 1 | 6 | 1 | 0 | 0 |
1132 rows × 11 columns
Machine Learning
import pandas as pd
# Load LeBron career data
lebron_career_df = pd.read_csv('/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv')
# Select relevant columns
selected_columns = ['opp', 'mp', 'pts', 'ast', 'orb', 'drb', 'stl', 'blk', 'tov', 'fg', 'fga']
# Filter data for a specific opponent
def filter_opponent(data, opponent):
return data[data['opp'] == opponent]
# Get LeBron's last 10 matchups against a specific opponent
def last_10_matchups(data):
return data.tail(10)
# Ask for the opponent team
opponent = input("Enter the opponent team: ")
# Filter data for the specified opponent
opponent_data = filter_opponent(lebron_career_df, opponent)
# Get LeBron's last 10 matchups against the opponent
last_10_games = last_10_matchups(opponent_data)
# Convert 'mp' to minutes
def convert_to_minutes(time_str):
hours, minutes = map(int, time_str.split(':'))
return hours * 60 + minutes
last_10_games['mp'] = last_10_games['mp'].apply(convert_to_minutes)
# Calculate rebounds as offensive and defensive combined
last_10_games['rebounds'] = last_10_games['drb'] + last_10_games['orb']
# Calculate field goal percentage
last_10_games['fg_percentage'] = (last_10_games['fg'] / last_10_games['fga']) * 100
# Calculate average stats for minutes played, points, assists, rebounds, steals, blocks, turnovers, and field goal percentage
average_stats = last_10_games[['mp', 'pts', 'ast', 'rebounds', 'stl', 'blk', 'tov', 'fg_percentage']].mean()
# Round average stats to the nearest tenth value
average_stats_rounded = average_stats.round(1)
# Convert average minutes played to hours
average_minutes_played_hours = round(average_stats_rounded['mp'] / 60, 1)
# Print average stats
print("Average stats for the last 10 matchups against", opponent, ":")
print("Minutes played:", average_minutes_played_hours)
print("Points:", average_stats_rounded['pts'])
print("Field Goal Percentage:", average_stats_rounded['fg_percentage'])
print("Assists:", average_stats_rounded['ast'])
print("Rebounds:", average_stats_rounded['rebounds'])
print("Steals:", average_stats_rounded['stl'])
print("Blocks:", average_stats_rounded['blk'])
print("Turnovers:", average_stats_rounded['tov'])
Average stats for the last 10 matchups against LAL :
Minutes played: 36.0
Points: 26.6
Field Goal Percentage: 51.5
Assists: 7.6
Rebounds: 8.2
Steals: 1.1
Blocks: 0.7
Turnovers: 3.0
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:31: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
last_10_games['mp'] = last_10_games['mp'].apply(convert_to_minutes)
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:34: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
last_10_games['rebounds'] = last_10_games['drb'] + last_10_games['orb']
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:37: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
last_10_games['fg_percentage'] = (last_10_games['fg'] / last_10_games['fga']) * 100