Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from tqdm.auto import tqdm | |
| import plotly.express as px | |
| import gradio as gr | |
| from gradio_client import Client | |
| import os | |
| import re | |
| from translate import translate_pa_outcome, translate_pitch_outcome, jp_pitch_to_en_pitch, jp_pitch_to_pitch_code, translate_pitch_outcome | |
| # load game data | |
| game_df = pd.read_csv('game.csv').drop_duplicates() | |
| assert len(game_df) == len(game_df['game_pk'].unique()) | |
| # load pa data | |
| pa_df = [] | |
| for game_pk in tqdm(game_df['game_pk']): | |
| pa_df.append(pd.read_csv(os.path.join('pa', f'{game_pk}.csv'), dtype={'pa_pk': str})) | |
| pa_df = pd.concat(pa_df, axis='rows') | |
| # load pitch data | |
| pitch_df = [] | |
| for game_pk in tqdm(game_df['game_pk']): | |
| pitch_df.append(pd.read_csv(os.path.join('pitch', f'{game_pk}.csv'), dtype={'pa_pk': str})) | |
| pitch_df = pd.concat(pitch_df, axis='rows') | |
| pitch_df | |
| # load player data | |
| player_df = pd.read_csv('player.csv') | |
| player_df | |
| # translate pa data | |
| pa_df['_des'] = pa_df['des'].str.strip() | |
| pa_df['des'] = pa_df['des'].str.strip() | |
| pa_df['des_more'] = pa_df['des_more'].str.strip() | |
| pa_df.loc[pa_df['des'].isna(), 'des'] = pa_df[pa_df['des'].isna()]['des_more'] | |
| pa_df.loc[:, 'des'] = pa_df['des'].apply(lambda item: item.split()[0] if (len(item.split()) > 1 and re.search(r'+\d+点', item)) else item) | |
| non_home_plate_outcome = (pa_df['des'].isin(['ボール', '見逃し', '空振り'])) | (pa_df['des'].str.endswith('塁けん制')) | |
| pa_df.loc[non_home_plate_outcome, 'des'] = pa_df.loc[non_home_plate_outcome, 'des_more'] | |
| pa_df['des'] = pa_df['des'].apply(translate_pa_outcome) | |
| # translate pitch data | |
| pitch_df = pitch_df[~pitch_df['pitch_name'].isna()] | |
| pitch_df.loc[:, 'jp_pitch_name'] = pitch_df['pitch_name'] | |
| pitch_df.loc[:, 'pitch_name'] = pitch_df['jp_pitch_name'].apply(lambda pitch_name: jp_pitch_to_en_pitch[pitch_name]) | |
| pitch_df.loc[:, 'pitch_type'] = pitch_df['jp_pitch_name'].apply(lambda pitch_name: jp_pitch_to_pitch_code[pitch_name]) | |
| pitch_df.loc[:, 'description'] = pitch_df['description'].apply(lambda item: item.split()[0] if len(item.split()) > 1 else item) | |
| pitch_df.loc[:, 'description'] = pitch_df['description'].apply(translate_pitch_outcome) | |
| # translate player data | |
| client = Client("Ramos-Ramos/npb_name_translator") | |
| en_names = client.predict( | |
| jp_names='\n'.join(player_df.name.tolist()), | |
| api_name="/predict" | |
| ) | |
| player_df['jp_name'] = player_df['name'] | |
| player_df['name'] = [name if name != 'nan' else np.nan for name in en_names.splitlines()] | |
| # merge pitch and pa data | |
| df = pd.merge(pitch_df, pa_df, 'inner', on=['game_pk', 'pa_pk']) | |
| df = pd.merge(df, player_df.rename(columns={'player_id': 'pitcher'}), 'inner', on='pitcher') | |
| df['whiff'] = df['description'].isin(['SS', 'K']) | |
| df['swing'] = ~df['description'].isin(['B', 'BB', 'LS', 'inv_K', 'bunt_K', 'HBP', 'SH', 'SH E', 'SH FC', 'obstruction', 'illegal_pitch', 'defensive_interference']) | |
| # gradio function(s) | |
| def get_usage(player): | |
| _df = df.set_index('name').loc[player, 'pitch_name'].value_counts() | |
| fig = px.pie(df.set_index('name').loc[player, 'pitch_name'], names='pitch_name') | |
| fig.update_traces(texttemplate='%{percent:.1%}', hovertemplate=f'<b>{player}</b><br>' + 'threw a <b>%{label}</b><br><b>%{percent:.1%}</b> of the time<br>(<b>%{value}</b> pitches)') | |
| return fig | |
| # demo | |
| with gr.Blocks(fill_height=True) as demo: | |
| gr.Markdown(''' | |
| # NPB data visualization demo | |
| [Data from SportsNavi](https://sports.yahoo.co.jp/) | |
| ''') | |
| player = gr.Dropdown(choices=sorted(player_df['name'].dropna().tolist()), label='Player') | |
| player_info = gr.Markdown() | |
| usage = gr.Plot(label='Pitch Distribution') | |
| player.input(get_usage, inputs=player, outputs=usage) | |
| player.input(lambda player: f'# {player}', inputs=player, outputs=player_info) | |
| demo.launch(share=True) | |