Csvdatabase_ipynb_2_

import pandas as pd

# Replace 'path_to_your_csv_file' with the actual path where your CSV file is located
file_path = '/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv'

# Load the CSV file into a pandas DataFrame
lebron_career_df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the data was loaded correctly
print(lebron_career_df.head())

   game        date     age team  opp   result     mp  fg  fga    fgp  ...  \
   1  2003-10-29  18-303  CLE  SAC  L (-14)  42:00  12   20  0.600  ...   
   2  2003-10-30  18-304  CLE  PHO   L (-9)  41:00   8   17  0.471  ...   
   3  2003-11-01  18-306  CLE  POR  L (-19)  39:00   3   12  0.250  ...   
   4  2003-11-05  18-310  CLE  DEN   L (-4)  41:00   3   11  0.273  ...   
   5  2003-11-07  18-312  CLE  IND   L (-1)  44:00   8   18  0.444  ...   

   orb  drb  trb  ast  stl  blk  tov  pts  game_score  minus_plus  
  2    4    6    9    4    0    2   25        24.7          -9  
  2   10   12    8    1    0    7   21        14.7          -3  
  0    4    4    6    2    0    2    8         5.0         -21  
  2    9   11    7    2    3    2    7        11.2          -3  
  0    5    5    3    0    0    7   23         9.0          -7  

[5 rows x 26 columns]

Clean Data

# Selecting only required columns
selected_columns = ['opp', 'mp', 'pts', 'fga', 'fgp', 'ast', 'orb', 'drb', 'stl', 'blk', 'tov']
lebron_career_cleaned = lebron_career_df[selected_columns].copy()

# You may need to handle missing values if any
lebron_career_cleaned.dropna(inplace=True)

# You might need to convert minutes_played to a numeric format if it's not already
# lebron_career_cleaned['minutes_played'] = pd.to_numeric(lebron_career_cleaned['minutes_played'], errors='coerce')

# Optionally, you can perform additional preprocessing if required
# For example, encoding categorical variables

print(lebron_career_cleaned.columns)
display(lebron_career_cleaned)

Index(['opp', 'mp', 'pts', 'fga', 'fgp', 'ast', 'orb', 'drb', 'stl', 'blk',
       'tov'],
      dtype='object')

	opp	mp	pts	fga	fgp	ast	orb	drb	stl	blk	tov
0	SAC	2520	25	20	0.600	9	2	4	4	0	2
1	PHO	2460	21	17	0.471	8	2	10	1	0	7
2	POR	2340	8	12	0.250	6	0	4	2	0	2
3	DEN	2460	7	11	0.273	7	2	9	2	3	2
4	IND	2640	23	18	0.444	3	0	5	0	0	7
...	...	...	...	...	...	...	...	...	...	...	...
1127	PHO	1993	28	14	0.500	11	0	12	3	2	5
1128	POR	2465	35	25	0.600	6	0	14	2	3	6
1129	CHI	2393	33	26	0.577	12	1	12	1	2	6
1130	MIL	2401	40	29	0.552	10	5	7	2	1	6
1131	TOR	2364	35	19	0.579	17	1	6	1	0	0

1132 rows × 11 columns

Machine Learning

import pandas as pd

# Load LeBron career data
lebron_career_df = pd.read_csv('/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv')

# Select relevant columns
selected_columns = ['opp', 'mp', 'pts', 'ast', 'orb', 'drb', 'stl', 'blk', 'tov', 'fg', 'fga']

# Filter data for a specific opponent
def filter_opponent(data, opponent):
    return data[data['opp'] == opponent]

# Get LeBron's last 10 matchups against a specific opponent
def last_10_matchups(data):
    return data.tail(10)

# Ask for the opponent team
opponent = input("Enter the opponent team: ")

# Filter data for the specified opponent
opponent_data = filter_opponent(lebron_career_df, opponent)

# Get LeBron's last 10 matchups against the opponent
last_10_games = last_10_matchups(opponent_data)

# Convert 'mp' to minutes
def convert_to_minutes(time_str):
    hours, minutes = map(int, time_str.split(':'))
    return hours * 60 + minutes

last_10_games['mp'] = last_10_games['mp'].apply(convert_to_minutes)

# Calculate rebounds as offensive and defensive combined
last_10_games['rebounds'] = last_10_games['drb'] + last_10_games['orb']

# Calculate field goal percentage
last_10_games['fg_percentage'] = (last_10_games['fg'] / last_10_games['fga']) * 100

# Calculate average stats for minutes played, points, assists, rebounds, steals, blocks, turnovers, and field goal percentage
average_stats = last_10_games[['mp', 'pts', 'ast', 'rebounds', 'stl', 'blk', 'tov', 'fg_percentage']].mean()

# Round average stats to the nearest tenth value
average_stats_rounded = average_stats.round(1)

# Convert average minutes played to hours
average_minutes_played_hours = round(average_stats_rounded['mp'] / 60, 1)

# Print average stats
print("Average stats for the last 10 matchups against", opponent, ":")
print("Minutes played:", average_minutes_played_hours)
print("Points:", average_stats_rounded['pts'])
print("Field Goal Percentage:", average_stats_rounded['fg_percentage'])
print("Assists:", average_stats_rounded['ast'])
print("Rebounds:", average_stats_rounded['rebounds'])
print("Steals:", average_stats_rounded['stl'])
print("Blocks:", average_stats_rounded['blk'])
print("Turnovers:", average_stats_rounded['tov'])

Average stats for the last 10 matchups against LAL :
Minutes played: 36.0
Points: 26.6
Field Goal Percentage: 51.5
Assists: 7.6
Rebounds: 8.2
Steals: 1.1
Blocks: 0.7
Turnovers: 3.0


/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:31: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['mp'] = last_10_games['mp'].apply(convert_to_minutes)
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:34: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['rebounds'] = last_10_games['drb'] + last_10_games['orb']
/var/folders/3l/zzpb17n16zq0yrbx8c40rps40000gp/T/ipykernel_71181/1574377123.py:37: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['fg_percentage'] = (last_10_games['fg'] / last_10_games['fga']) * 100

Csvdatabase_ipynb_2_

August 2023 to June 2024

Csvdatabase_ipynb_2_ • 13 min read

Clean Data

Machine Learning