InĀ [21]:
import pandas as pd
import os
from dotenv import load_dotenv
from lida import Manager, llm
from llmx import TextGenerationConfig 
import matplotlib.pyplot as plt
import gradio as gr
import seaborn as sns
from openai import OpenAI
import io
from PIL import Image

# Load environment variables from .env
load_dotenv()

# LLM + Manager setup
text_gen = llm(provider="openai", api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
manager = Manager(text_gen=text_gen)
config = TextGenerationConfig(model="gpt-4o-mini", temperature=0.2)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

50 Over Batting¶

InĀ [22]:
# Load the data from both Excel files
# df_regionalliga_batting = pd.read_excel('F50/BCV-Regionalliga-Bayern 2024 Batting Records - Bayerischer Cricket Verband e.V..xlsx')
df_verbandsliga_batting = pd.read_excel('F50/BCV-Verbandsliga-Bayern 2025 Batting Records - Bayerischer Cricket Verband e.V..xlsx')

# Standardize column names
# df_regionalliga_batting.columns = df_regionalliga_batting.columns.str.strip().str.lower()
df_verbandsliga_batting.columns = df_verbandsliga_batting.columns.str.strip().str.lower()

# Ensure both DataFrames have the same columns
# df_regionalliga_batting = df_regionalliga_batting[df_verbandsliga_batting.columns]

# Merge the two DataFrames along rows
df_combined_batting_odi = df_verbandsliga_batting.copy()  # Start with Verbandsliga data
InĀ [23]:
output_file = 'F50/Combined_BCV_Batting_Records_2025.xlsx'
df_combined_batting_odi.to_excel(output_file, index=False)

Top 20 by runs¶

InĀ [24]:
# Convert columns 'runs' and 'avg' to numeric, forcing errors to NaN (if any)
df_combined_batting_odi['runs'] = pd.to_numeric(df_combined_batting_odi['runs'], errors='coerce')
df_combined_batting_odi['avg'] = pd.to_numeric(df_combined_batting_odi['avg'], errors='coerce')

# Sort by 'runs' (descending)
top_20_by_runs = df_combined_batting_odi.sort_values(by=['runs'], ascending=[False]).head(20)
top_20_by_runs
Out[24]:
# player group team mat inns no runs 4's 6's 50's 100's hs sr avg points
0 1 Ashish Joshi South Centre - II BATCC-II 8 8 0 243 21 19 2 0 66 129.95 30.38 356
1 2 Avi Patel South Centre - I BATCC-I 8 8 0 176 16 7 2 0 65 92.63 22.00 233
2 3 Ankit Gupta South Centre - II BATCC-II 8 8 0 172 13 0 1 0 64 51.19 21.50 186
3 4 Sanjeev Sivaraman South Centre - I BATCC-I 6 6 0 167 18 0 1 0 59 61.17 27.83 198
4 5 Muhammad Talha South Centre - I BATCC-I 10 10 1 164 16 4 1 0 62 67.49 18.22 188
5 6 Mohammad Essa Haqbin South Centre - I BATCC-I 9 9 0 145 12 1 1 0 54 69.38 16.11 167
6 7 Sasi Kiran Alur South Centre - I BATCC-I 5 5 0 126 12 0 1 0 66 67.02 25.20 145
7 8 Rajesh Kumar Srivastava South Centre - I BATCC-I 10 10 0 122 6 3 0 0 41 61.00 12.20 124
8 9 Vineep Bhat South Centre - I BATCC-I 8 8 0 120 6 0 0 0 45 61.86 15.00 122
9 10 Bhavin Kantilal Solanki South Centre - II BATCC-II 7 7 0 113 8 4 0 0 27 58.25 16.14 128
10 11 Venkat Vinay Ayyagari South Centre - II BATCC-II 7 7 0 100 13 2 0 0 38 89.29 14.29 127
11 12 Zeeshan Sami South Centre - I BATCC-I 4 4 1 95 9 0 0 0 35 63.33 31.67 106
12 13 Haris Ali South Centre - I BATCC-I 8 8 3 89 8 1 0 0 22 65.44 17.80 98
13 14 Arjun Nadol South Centre - II BATCC-II 5 5 0 85 5 8 0 0 34 108.97 17.00 120
14 15 Rohit Verma South Centre - I BATCC-I 5 5 0 77 11 2 0 0 42 79.38 15.40 102
15 16 Satya Prakash Sahoo South Centre - I BATCC-I 5 5 0 74 3 2 0 0 32 54.81 14.80 83
16 17 Muhammad Omer Farooq South Centre - II BATCC-II 5 5 1 61 3 0 0 0 48 36.75 15.25 52
17 18 Deepak Kamal South Centre - II BATCC-II 7 7 0 59 6 0 0 0 39 46.09 8.43 57
18 19 Paras Jain South Centre - II BATCC-II 3 3 0 53 6 2 0 0 25 86.89 17.67 67
19 20 Sam Antony Vathezhath Judy South Centre - II BATCC-II 7 7 0 44 6 0 0 0 15 45.36 6.29 34

Top 20 by average. Only those considered who have played more matches than the team mean.¶

InĀ [25]:
# Calculate the mean of 'mat' column to use as 'n'
n = df_combined_batting_odi['mat'].mean()
print('Mean of matches across players:', n)
# Filter players who have played at least 'n' matches
filtered_df = df_combined_batting_odi[df_combined_batting_odi['mat'] >= n]

# Sort by 'avg' in descending order to find the top 10 players by average
top_20_by_avg = filtered_df.sort_values(by=['avg'], ascending=[False]).head(20)
top_20_by_avg
Mean of matches across players: 5.685714285714286
Out[25]:
# player group team mat inns no runs 4's 6's 50's 100's hs sr avg points
0 1 Ashish Joshi South Centre - II BATCC-II 8 8 0 243 21 19 2 0 66 129.95 30.38 356
3 4 Sanjeev Sivaraman South Centre - I BATCC-I 6 6 0 167 18 0 1 0 59 61.17 27.83 198
1 2 Avi Patel South Centre - I BATCC-I 8 8 0 176 16 7 2 0 65 92.63 22.00 233
2 3 Ankit Gupta South Centre - II BATCC-II 8 8 0 172 13 0 1 0 64 51.19 21.50 186
4 5 Muhammad Talha South Centre - I BATCC-I 10 10 1 164 16 4 1 0 62 67.49 18.22 188
12 13 Haris Ali South Centre - I BATCC-I 8 8 3 89 8 1 0 0 22 65.44 17.80 98
9 10 Bhavin Kantilal Solanki South Centre - II BATCC-II 7 7 0 113 8 4 0 0 27 58.25 16.14 128
5 6 Mohammad Essa Haqbin South Centre - I BATCC-I 9 9 0 145 12 1 1 0 54 69.38 16.11 167
8 9 Vineep Bhat South Centre - I BATCC-I 8 8 0 120 6 0 0 0 45 61.86 15.00 122
10 11 Venkat Vinay Ayyagari South Centre - II BATCC-II 7 7 0 100 13 2 0 0 38 89.29 14.29 127
7 8 Rajesh Kumar Srivastava South Centre - I BATCC-I 10 10 0 122 6 3 0 0 41 61.00 12.20 124
17 18 Deepak Kamal South Centre - II BATCC-II 7 7 0 59 6 0 0 0 39 46.09 8.43 57
19 20 Sam Antony Vathezhath Judy South Centre - II BATCC-II 7 7 0 44 6 0 0 0 15 45.36 6.29 34
21 22 Abdul Nasir Kunari South Centre - I BATCC-I 8 8 0 41 7 1 0 0 27 110.81 5.12 45
25 26 Muhammed Minhaj Madathodi South Centre - II BATCC-II 8 8 4 19 1 0 0 0 10 45.24 4.75 16
30 31 Amit Rajendra Desai South Centre - I BATCC-I 7 7 4 12 0 0 0 0 7 25.00 4.00 -3
31 32 Sankara N Muthukrishnan South Centre - I BATCC-I 6 5 1 8 1 0 0 0 6 20.00 2.00 -5

T20 Batting¶

InĀ [26]:
# 20 Over Batting
# Load the data from both Excel files
# df_regionalliga_batting = pd.read_excel('T20/T20 1.Verbandsliga - 2024 Batting Records - Bayerischer Cricket Verband e.V..xlsx')
df_verbandsliga_batting = pd.read_excel('T20/T20 2. Verbandsliga - 2025 Batting Records - Bayerischer Cricket Verband e.V..xlsx')

# Standardize column names
# df_regionalliga_batting.columns = df_regionalliga_batting.columns.str.strip().str.lower()
df_verbandsliga_batting.columns = df_verbandsliga_batting.columns.str.strip().str.lower()

# Ensure both DataFrames have the same columns
# df_regionalliga_batting = df_regionalliga_batting[df_verbandsliga_batting.columns]

# Merge the two DataFrames along rows
# df_combined_batting = pd.concat([df_regionalliga_batting, df_verbandsliga_batting], ignore_index=True)
df_combined_batting_t20 = df_verbandsliga_batting.copy()
InĀ [27]:
output_file = 'T20/Combined_BCV_Batting_Records_2025.xlsx'
df_combined_batting_t20.to_excel(output_file, index=False)

Top 20 by runs¶

InĀ [28]:
# Convert columns 'runs' and 'avg' to numeric, forcing errors to NaN (if any)
df_combined_batting_t20['runs'] = pd.to_numeric(df_combined_batting_t20['runs'], errors='coerce')
df_combined_batting_t20['avg'] = pd.to_numeric(df_combined_batting_t20['avg'], errors='coerce')

# Sort by 'runs' (descending) 
top_20_by_runs = df_combined_batting_t20.sort_values(by=['runs'], ascending=[False]).head(20)
top_20_by_runs
Out[28]:
# player group team mat inns no runs 4's 6's 50's 100's hs sr avg points
0 1 Rohit Verma South Centre - I BATCC-I 10 10 2 198 21 1 0 0 47 76.74 24.75 220
1 2 Ashish Joshi South Centre - I BATCC-I 10 10 1 173 16 12 0 0 42 132.06 19.22 256
2 3 Venkat Vinay Ayyagari South Centre - II BATCC-II 12 12 1 167 15 7 0 0 42 104.38 15.18 211
3 4 Mohammad Essa Haqbin South Centre - I BATCC-I 10 8 1 150 11 5 1 0 67 111.11 21.43 194
4 5 Rajesh Kumar Srivastava South Centre - II BATCC-II 12 12 0 140 21 0 0 0 35 80.92 11.67 159
5 6 Deepak Kamal South Centre - II BATCC-II 11 11 0 123 21 0 0 0 37 84.83 11.18 145
6 7 Vineep Bhat South Centre - I BATCC-I 10 8 1 114 12 0 0 0 44 82.01 16.29 128
7 8 Arjun Nadol South Centre - II BATCC-II 12 12 0 109 10 5 0 0 33 75.69 9.08 125
8 9 Ankit Gupta South Centre - II BATCC-II 10 10 4 109 4 1 0 0 31 49.10 18.17 97
9 10 Bhavin Kantilal Solanki South Centre - II BATCC-II 9 9 3 91 6 3 0 0 21 81.25 15.17 96
10 11 Sanjeev Sivaraman South Centre - II BATCC-II 4 4 0 87 8 1 0 0 45 83.65 21.75 104
11 12 Abdul Nasir Kunari South Centre - I BATCC-I 10 6 2 83 4 7 0 0 34 197.62 20.75 148
12 13 Sam Antony Vathezhath Judy South Centre - I BATCC-I 9 8 1 81 6 2 0 0 41 101.25 11.57 96
13 14 Sasi Kiran Alur South Centre - I BATCC-I 4 4 0 69 7 0 0 0 32 83.13 17.25 73
14 15 Satya Prakash Sahoo South Centre - I BATCC-I 8 5 0 55 4 1 0 0 33 76.39 11.00 63
15 16 Avi Patel South Centre - I BATCC-I 6 6 0 53 3 2 0 0 18 86.89 8.83 59
16 17 Muhammed Minhaj Madathodi South Centre - II BATCC-II 12 7 3 37 2 1 0 0 12 66.07 9.25 28
18 19 Mohammed Shakir South Centre - II BATCC-II 10 8 0 26 2 1 0 0 12 53.06 3.25 17
17 18 Mohammed Gouse Shaik South Centre - II BATCC-II 4 3 2 26 4 0 0 0 15 144.44 26.00 40
19 20 Dinesh Sasi Kumar South Centre - II BATCC-II 3 2 1 25 1 1 0 0 22 80.65 25.00 25

Top 20 by average. Only those conisdered who have played more matches than the team mean.¶

InĀ [29]:
# Calculate the mean of 'mat' column to use as 'n'
n = df_combined_batting_t20['mat'].mean()
print('Mean of matches across players:', n)
# Filter players who have played at least 'n' matches
filtered_df = df_combined_batting_t20[df_combined_batting_t20['mat'] >= n]

# Sort by 'avg' in descending order to find the top 10 players by average
top_20_by_avg = filtered_df.sort_values(by=['avg'], ascending=[False]).head(20)
top_20_by_avg
Mean of matches across players: 7.0
Out[29]:
# player group team mat inns no runs 4's 6's 50's 100's hs sr avg points
0 1 Rohit Verma South Centre - I BATCC-I 10 10 2 198 21 1 0 0 47 76.74 24.75 220
3 4 Mohammad Essa Haqbin South Centre - I BATCC-I 10 8 1 150 11 5 1 0 67 111.11 21.43 194
11 12 Abdul Nasir Kunari South Centre - I BATCC-I 10 6 2 83 4 7 0 0 34 197.62 20.75 148
1 2 Ashish Joshi South Centre - I BATCC-I 10 10 1 173 16 12 0 0 42 132.06 19.22 256
8 9 Ankit Gupta South Centre - II BATCC-II 10 10 4 109 4 1 0 0 31 49.10 18.17 97
6 7 Vineep Bhat South Centre - I BATCC-I 10 8 1 114 12 0 0 0 44 82.01 16.29 128
2 3 Venkat Vinay Ayyagari South Centre - II BATCC-II 12 12 1 167 15 7 0 0 42 104.38 15.18 211
9 10 Bhavin Kantilal Solanki South Centre - II BATCC-II 9 9 3 91 6 3 0 0 21 81.25 15.17 96
4 5 Rajesh Kumar Srivastava South Centre - II BATCC-II 12 12 0 140 21 0 0 0 35 80.92 11.67 159
12 13 Sam Antony Vathezhath Judy South Centre - I BATCC-I 9 8 1 81 6 2 0 0 41 101.25 11.57 96
5 6 Deepak Kamal South Centre - II BATCC-II 11 11 0 123 21 0 0 0 37 84.83 11.18 145
14 15 Satya Prakash Sahoo South Centre - I BATCC-I 8 5 0 55 4 1 0 0 33 76.39 11.00 63
16 17 Muhammed Minhaj Madathodi South Centre - II BATCC-II 12 7 3 37 2 1 0 0 12 66.07 9.25 28
7 8 Arjun Nadol South Centre - II BATCC-II 12 12 0 109 10 5 0 0 33 75.69 9.08 125
24 25 Abhijeesh Marol South Centre - II BATCC-II 7 5 3 17 2 0 0 0 11 43.59 8.50 8
20 21 Muhammad Talha South Centre - I BATCC-I 8 7 2 24 4 0 0 0 7 72.73 4.80 22
18 19 Mohammed Shakir South Centre - II BATCC-II 10 8 0 26 2 1 0 0 12 53.06 3.25 17
25 26 Ashish Satpathy South Centre - II BATCC-II 7 5 0 15 2 0 0 0 12 35.71 3.00 2
32 33 Amit Rajendra Desai South Centre - I BATCC-I 9 4 3 2 0 0 0 0 1 14.29 2.00 0
27 28 Apurva Sharma South Centre - II BATCC-II 7 6 0 11 0 0 0 0 6 33.33 1.83 1

Top 20 by strike rate. Only those conisdered who have scored more than the team mean.¶

InĀ [30]:
# Calculate the mean of 'mat' column to use as 'n'
n = df_combined_batting_t20['runs'].mean()
print('Mean of runs across players:', n)
# Filter players who have scored atleast n runs
filtered_df = df_combined_batting_t20[df_combined_batting_t20['runs'] >= n]

# Sort by 'avg' in descending order to find the top 10 players by strike rate
top_20_by_strikerate = filtered_df.sort_values(by=['sr'], ascending=[False]).head(20)
top_20_by_strikerate
Mean of runs across players: 61.11764705882353
Out[30]:
# player group team mat inns no runs 4's 6's 50's 100's hs sr avg points
11 12 Abdul Nasir Kunari South Centre - I BATCC-I 10 6 2 83 4 7 0 0 34 197.62 20.75 148
1 2 Ashish Joshi South Centre - I BATCC-I 10 10 1 173 16 12 0 0 42 132.06 19.22 256
3 4 Mohammad Essa Haqbin South Centre - I BATCC-I 10 8 1 150 11 5 1 0 67 111.11 21.43 194
2 3 Venkat Vinay Ayyagari South Centre - II BATCC-II 12 12 1 167 15 7 0 0 42 104.38 15.18 211
12 13 Sam Antony Vathezhath Judy South Centre - I BATCC-I 9 8 1 81 6 2 0 0 41 101.25 11.57 96
5 6 Deepak Kamal South Centre - II BATCC-II 11 11 0 123 21 0 0 0 37 84.83 11.18 145
10 11 Sanjeev Sivaraman South Centre - II BATCC-II 4 4 0 87 8 1 0 0 45 83.65 21.75 104
13 14 Sasi Kiran Alur South Centre - I BATCC-I 4 4 0 69 7 0 0 0 32 83.13 17.25 73
6 7 Vineep Bhat South Centre - I BATCC-I 10 8 1 114 12 0 0 0 44 82.01 16.29 128
9 10 Bhavin Kantilal Solanki South Centre - II BATCC-II 9 9 3 91 6 3 0 0 21 81.25 15.17 96
4 5 Rajesh Kumar Srivastava South Centre - II BATCC-II 12 12 0 140 21 0 0 0 35 80.92 11.67 159
0 1 Rohit Verma South Centre - I BATCC-I 10 10 2 198 21 1 0 0 47 76.74 24.75 220
7 8 Arjun Nadol South Centre - II BATCC-II 12 12 0 109 10 5 0 0 33 75.69 9.08 125
8 9 Ankit Gupta South Centre - II BATCC-II 10 10 4 109 4 1 0 0 31 49.10 18.17 97

50 Over Bowling¶

InĀ [31]:
# Load the data from both Excel files
# df_regionalliga_bowling = pd.read_excel('F50/BCV-Regionalliga-Bayern 2024 Bowling Records - Bayerischer Cricket Verband e.V..xlsx')
df_verbandsliga_bowling = pd.read_excel('F50/BCV-Verbandsliga-Bayern 2025 Bowling Records - Bayerischer Cricket Verband e.V..xlsx')

# Standardize column names
# df_regionalliga_bowling.columns = df_regionalliga_bowling.columns.str.strip().str.lower()
df_verbandsliga_bowling.columns = df_verbandsliga_bowling.columns.str.strip().str.lower()

# Ensure both DataFrames have the same columns
# df_regionalliga_bowling = df_regionalliga_bowling[df_verbandsliga_bowling.columns]

# Merge the two DataFrames along rows
# df_combined_bowling = pd.concat([df_regionalliga_bowling, df_verbandsliga_bowling], ignore_index=True)
df_combined_bowling_odi =df_verbandsliga_bowling.copy()
InĀ [32]:
output_file = 'F50/Combined_BCV_Bowling_Records_2025.xlsx'
df_combined_bowling_odi.to_excel(output_file, index=False)

Top 20 wicket takers¶

InĀ [33]:
# Convert columns 'wkts' and 'econ' to numeric, forcing errors to NaN (if any)
df_combined_bowling_odi['wkts'] = pd.to_numeric(df_combined_bowling_odi['wkts'], errors='coerce')
df_combined_bowling_odi['econ'] = pd.to_numeric(df_combined_bowling_odi['econ'], errors='coerce')
df_combined_bowling_odi['overs'] = pd.to_numeric(df_combined_bowling_odi['overs'], errors='coerce')

# Sort by 'wkts' (descending)
top_20_by_wkts = df_combined_bowling_odi.sort_values(by=['wkts'], ascending=[False]).head(20)
top_20_by_wkts
Out[33]:
# player group team mat inns overs runs wkts bbf ... dots econ avg sr hat-trick 4w 5w wides nb points
0 1 Abdul Nasir Kunari South Centre - I BATCC-I 8 8 50.0 116 18 9/Ā 4 ... 244 2.32 6.44 16.7 0 1 0 22 0 570
1 2 Muhammed Minhaj Madathodi South Centre - II BATCC-II 8 8 54.5 269 13 27/Ā 3 ... 238 4.91 20.69 25.3 0 0 0 62 9 388
2 3 Amit Rajendra Desai South Centre - I BATCC-I 7 7 37.2 138 12 28/Ā 3 ... 166 3.70 11.50 18.7 0 0 0 33 4 383
3 4 Mohammed Shakir South Centre - II BATCC-II 5 5 36.0 155 11 22/Ā 5 ... 145 4.31 14.09 19.6 0 0 1 32 1 351
4 5 Haris Ali South Centre - I BATCC-I 8 8 38.1 154 10 7/Ā 4 ... 175 4.03 15.40 22.9 0 1 0 45 4 332
5 6 Mohammad Essa Haqbin South Centre - I BATCC-I 9 5 21.5 58 8 4/Ā 3 ... 108 2.66 7.25 16.4 0 0 0 10 2 258
6 7 Rajesh Kumar Srivastava South Centre - I BATCC-I 10 8 30.2 142 8 51/Ā 3 ... 128 4.68 17.75 22.8 0 0 0 29 3 225
7 8 Paras Jain South Centre - II BATCC-II 3 3 15.0 83 8 5/Ā 5 ... 65 5.53 10.38 11.2 0 0 1 17 1 240
8 9 Bhavin Kantilal Solanki South Centre - II BATCC-II 7 7 34.5 194 8 27/Ā 3 ... 137 5.57 24.25 26.1 0 0 0 44 2 221
9 10 Vineep Bhat South Centre - I BATCC-I 8 6 30.0 67 7 16/Ā 4 ... 137 2.23 9.57 25.7 0 1 0 5 0 280
10 11 Nitin Madan South Centre - I BATCC-I 5 4 23.3 83 7 28/Ā 5 ... 103 3.53 11.86 20.1 0 0 1 5 1 224
11 12 Rohit Verma South Centre - I BATCC-I 5 5 16.3 58 6 23/Ā 3 ... 69 3.52 9.67 16.5 0 0 0 6 3 167
12 13 Satya Prakash Sahoo South Centre - I BATCC-I 5 5 27.0 63 5 10/Ā 2 ... 134 2.33 12.60 32.4 0 0 0 8 0 231
13 14 Venkat Vinay Ayyagari South Centre - II BATCC-II 7 7 41.1 194 5 45/Ā 2 ... 156 4.71 38.80 49.4 0 0 0 17 3 145
14 15 Ankit Gupta South Centre - II BATCC-II 8 6 19.0 114 5 6/Ā 2 ... 68 6.00 22.80 22.8 0 0 0 11 2 140
15 16 Moynul Hasan South Centre - I BATCC-I 4 3 11.0 62 4 23/Ā 2 ... 48 5.64 15.50 16.5 0 0 0 11 1 106
16 17 Muhammad Omer Farooq South Centre - II BATCC-II 5 2 6.2 37 3 34/Ā 2 ... 22 5.84 12.33 12.7 0 0 0 5 0 75
17 18 Shahroz Ismail South Centre - II BATCC-II 1 1 7.0 44 3 44/Ā 3 ... 27 6.29 14.67 14.0 0 0 0 7 0 76
18 19 Avi Patel South Centre - I BATCC-I 8 2 4.2 11 2 8/Ā 2 ... 19 2.54 5.50 13.0 0 0 0 2 0 62
19 20 Abhijeesh Maroli South Centre - II BATCC-II 3 2 9.0 83 2 33/Ā 1 ... 26 9.22 41.50 27.0 0 0 0 11 2 40

20 rows Ɨ 21 columns

Top 20 by economy rate. Only those conisdered who have bowled more than the team mean.¶

InĀ [34]:
# Calculate the mean of 'overs' column to use as 'n'
n = df_combined_bowling_odi['overs'].mean()
print('Mean of overs across players:', n)
# Filter players who have bowled min n overs
filtered_df = df_combined_bowling_odi[df_combined_bowling_odi['overs'] >= n]

# Sort by 'economy rate' in ascending order to find the top 10 players by economy
top_20_by_economy = filtered_df.sort_values(by=['econ'], ascending=[True]).head(20)
top_20_by_economy
Mean of overs across players: 23.39565217391304
Out[34]:
# player group team mat inns overs runs wkts bbf ... dots econ avg sr hat-trick 4w 5w wides nb points
9 10 Vineep Bhat South Centre - I BATCC-I 8 6 30.0 67 7 16/Ā 4 ... 137 2.23 9.57 25.7 0 1 0 5 0 280
0 1 Abdul Nasir Kunari South Centre - I BATCC-I 8 8 50.0 116 18 9/Ā 4 ... 244 2.32 6.44 16.7 0 1 0 22 0 570
12 13 Satya Prakash Sahoo South Centre - I BATCC-I 5 5 27.0 63 5 10/Ā 2 ... 134 2.33 12.60 32.4 0 0 0 8 0 231
2 3 Amit Rajendra Desai South Centre - I BATCC-I 7 7 37.2 138 12 28/Ā 3 ... 166 3.70 11.50 18.7 0 0 0 33 4 383
4 5 Haris Ali South Centre - I BATCC-I 8 8 38.1 154 10 7/Ā 4 ... 175 4.03 15.40 22.9 0 1 0 45 4 332
3 4 Mohammed Shakir South Centre - II BATCC-II 5 5 36.0 155 11 22/Ā 5 ... 145 4.31 14.09 19.6 0 0 1 32 1 351
6 7 Rajesh Kumar Srivastava South Centre - I BATCC-I 10 8 30.2 142 8 51/Ā 3 ... 128 4.68 17.75 22.8 0 0 0 29 3 225
13 14 Venkat Vinay Ayyagari South Centre - II BATCC-II 7 7 41.1 194 5 45/Ā 2 ... 156 4.71 38.80 49.4 0 0 0 17 3 145
1 2 Muhammed Minhaj Madathodi South Centre - II BATCC-II 8 8 54.5 269 13 27/Ā 3 ... 238 4.91 20.69 25.3 0 0 0 62 9 388
8 9 Bhavin Kantilal Solanki South Centre - II BATCC-II 7 7 34.5 194 8 27/Ā 3 ... 137 5.57 24.25 26.1 0 0 0 44 2 221

10 rows Ɨ 21 columns

T20 Bowling¶

InĀ [35]:
# Load the data from both Excel files
# df_regionalliga_bowling = pd.read_excel('T20/T20 1.Verbandsliga - 2024 Bowling Records - Bayerischer Cricket Verband e.V..xlsx')
df_verbandsliga_bowling = pd.read_excel('T20/T20 2. Verbandsliga - 2025 Bowling Records - Bayerischer Cricket Verband e.V..xlsx')

# Standardize column names
# df_regionalliga_bowling.columns = df_regionalliga_bowling.columns.str.strip().str.lower()
df_verbandsliga_bowling.columns = df_verbandsliga_bowling.columns.str.strip().str.lower()

# Ensure both DataFrames have the same columns
# df_regionalliga_bowling = df_regionalliga_bowling[df_verbandsliga_bowling.columns]

# Merge the two DataFrames along rows
# df_combined_bowling = pd.concat([df_regionalliga_bowling, df_verbandsliga_bowling], ignore_index=True)
df_combined_bowling_t20 = df_verbandsliga_bowling.copy()
InĀ [36]:
output_file = 'T20/Combined_BCV_Bowling_Records_2025.xlsx'
df_combined_bowling_t20.to_excel(output_file, index=False)

Top 20 wicket takers¶

InĀ [37]:
# Convert columns 'wkts' and 'econ' to numeric, forcing errors to NaN (if any)
df_combined_bowling_t20['wkts'] = pd.to_numeric(df_combined_bowling_t20['wkts'], errors='coerce')
df_combined_bowling_t20['econ'] = pd.to_numeric(df_combined_bowling_t20['econ'], errors='coerce')
df_combined_bowling_t20['overs'] = pd.to_numeric(df_combined_bowling_t20['overs'], errors='coerce')

# Sort by 'wkts' (descending)
top_20_by_wkts = df_combined_bowling_t20.sort_values(by=['wkts'], ascending=[False]).head(20)
top_20_by_wkts
Out[37]:
# player group team mat inns overs runs wkts bbf ... dots econ avg sr hat-trick 4w 5w wides nb points
0 1 Vineep Bhat South Centre - I BATCC-I 10 10 37.5 196 18 18/Ā 4 ... 121 5.18 10.89 12.6 0 1 0 23 0 456
1 2 Muhammed Minhaj Madathodi South Centre - II BATCC-II 12 12 44.0 253 17 16/Ā 3 ... 166 5.75 14.88 15.5 0 0 0 42 2 401
2 3 Abdul Nasir Kunari South Centre - I BATCC-I 10 10 36.0 169 14 10/Ā 4 ... 132 4.69 12.07 15.4 0 2 0 21 2 358
3 4 Satya Prakash Sahoo South Centre - I BATCC-I 8 8 30.0 133 13 15/Ā 3 ... 101 4.43 10.23 13.8 0 0 0 6 2 330
4 5 Amit Rajendra Desai South Centre - I BATCC-I 9 9 31.5 177 13 16/Ā 3 ... 116 5.56 13.62 14.7 0 0 0 17 1 325
5 6 Rajesh Kumar Srivastava South Centre - II BATCC-II 12 10 34.0 212 11 20/Ā 3 ... 125 6.24 19.27 18.5 0 0 0 30 0 293
6 7 Mohammed Shakir South Centre - II BATCC-II 10 10 35.0 191 9 11/Ā 2 ... 126 5.46 21.22 23.3 0 0 0 16 2 275
7 8 Bhavin Kantilal Solanki South Centre - II BATCC-II 9 8 27.4 158 8 20/Ā 3 ... 110 5.71 19.75 20.8 0 0 0 36 1 222
8 9 Venkat Vinay Ayyagari South Centre - II BATCC-II 12 11 34.0 196 8 15/Ā 2 ... 110 5.76 24.50 25.5 0 0 0 27 1 198
9 10 Nitin Madan South Centre - I BATCC-I 4 4 9.1 73 7 27/Ā 5 ... 26 7.96 10.43 7.9 0 0 1 3 0 156
10 11 Paras Jain South Centre - II BATCC-II 4 3 9.2 63 4 20/Ā 2 ... 34 6.75 15.75 14.0 0 0 0 8 0 86
13 14 Avi Patel South Centre - I BATCC-I 6 3 9.0 64 3 20/Ā 2 ... 29 7.11 21.33 18.0 0 0 0 3 0 73
11 12 Deepak Kamal South Centre - II BATCC-II 11 3 7.0 43 3 6/Ā 1 ... 19 6.14 14.33 14.0 0 0 0 2 0 67
12 13 Mohammad Essa Haqbin South Centre - I BATCC-I 10 7 14.4 96 3 9/Ā 1 ... 39 6.55 32.00 29.3 0 0 0 10 1 70
14 15 Haris Ali South Centre - I BATCC-I 4 4 10.0 52 2 12/Ā 1 ... 37 5.20 26.00 30.0 0 0 0 11 2 53
15 16 Abhijeesh Marol South Centre - II BATCC-II 7 4 11.0 66 2 12/Ā 1 ... 32 6.00 33.00 33.0 0 0 0 13 1 48
16 17 Deep Lamba South Centre - II BATCC-II 2 2 3.0 25 2 9/Ā 1 ... 9 8.33 12.50 9.0 0 0 0 3 1 39
17 18 Fahad Munawwar South Centre - I BATCC-I 2 2 6.0 51 2 38/Ā 2 ... 15 8.50 25.50 18.0 0 0 0 2 3 44
18 19 Moynul Hasan South Centre - I BATCC-I 1 1 2.0 4 1 4/Ā 1 ... 8 2.00 4.00 12.0 0 0 0 0 0 27
19 20 Ankit Gupta South Centre - II BATCC-II 10 4 10.0 64 1 26/Ā 1 ... 31 6.40 64.00 60.0 0 0 0 10 1 30

20 rows Ɨ 21 columns

Top 20 by economy rate. Only those conisdered who have bowled more than the team mean.¶

InĀ [38]:
# Calculate the mean of 'overs' column to use as 'n'
n = df_combined_bowling_t20['overs'].mean()
print('Mean of overs across players:', n)
# Filter players who have bowled min n overs
filtered_df = df_combined_bowling_t20[df_combined_bowling_t20['overs'] >= n]

# Sort by 'economy rate' in ascending order to find the top 10 players by economy
top_20_by_economy = filtered_df.sort_values(by=['econ'], ascending=[True]).head(20)
top_20_by_economy
Mean of overs across players: 18.413636363636364
Out[38]:
# player group team mat inns overs runs wkts bbf ... dots econ avg sr hat-trick 4w 5w wides nb points
3 4 Satya Prakash Sahoo South Centre - I BATCC-I 8 8 30.0 133 13 15/Ā 3 ... 101 4.43 10.23 13.8 0 0 0 6 2 330
2 3 Abdul Nasir Kunari South Centre - I BATCC-I 10 10 36.0 169 14 10/Ā 4 ... 132 4.69 12.07 15.4 0 2 0 21 2 358
0 1 Vineep Bhat South Centre - I BATCC-I 10 10 37.5 196 18 18/Ā 4 ... 121 5.18 10.89 12.6 0 1 0 23 0 456
6 7 Mohammed Shakir South Centre - II BATCC-II 10 10 35.0 191 9 11/Ā 2 ... 126 5.46 21.22 23.3 0 0 0 16 2 275
4 5 Amit Rajendra Desai South Centre - I BATCC-I 9 9 31.5 177 13 16/Ā 3 ... 116 5.56 13.62 14.7 0 0 0 17 1 325
7 8 Bhavin Kantilal Solanki South Centre - II BATCC-II 9 8 27.4 158 8 20/Ā 3 ... 110 5.71 19.75 20.8 0 0 0 36 1 222
1 2 Muhammed Minhaj Madathodi South Centre - II BATCC-II 12 12 44.0 253 17 16/Ā 3 ... 166 5.75 14.88 15.5 0 0 0 42 2 401
8 9 Venkat Vinay Ayyagari South Centre - II BATCC-II 12 11 34.0 196 8 15/Ā 2 ... 110 5.76 24.50 25.5 0 0 0 27 1 198
5 6 Rajesh Kumar Srivastava South Centre - II BATCC-II 12 10 34.0 212 11 20/Ā 3 ... 125 6.24 19.27 18.5 0 0 0 30 0 293

9 rows Ɨ 21 columns

Natual Language Statistics¶

InĀ [39]:
# Example: Summarize one dataset
summary = manager.summarize(df_combined_batting_odi, textgen_config=config)
print("Summary:", summary)

# Example: Ask LIDA to generate goals (questions it can answer)
goals = manager.goals(summary=summary, textgen_config=config)
print("Possible goals:", goals)

# Example: Visualize data to answer a specific goal
viz_list = manager.visualize(
    summary=summary,
    goal="Show top 5 run scorers in ODI batting",
    textgen_config=config,
    library="seaborn"
)

viz = viz_list[0]
chart_code = viz.code
# print("Generated code:\n", chart_code)

# Execute the code with your DataFrame as 'data'
exec(chart_code, {"data": df_combined_batting_odi, "plt": plt})

# Show the chart
plt.show()
Summary: {'name': '', 'file_name': '', 'dataset_description': '', 'fields': [{'column': '#', 'properties': {'dtype': 'number', 'std': 10, 'min': 1, 'max': 35, 'samples': [27, 14, 25], 'num_unique_values': 35, 'semantic_type': '', 'description': ''}}, {'column': 'player', 'properties': {'dtype': 'string', 'samples': ['Abhijeesh Maroli', 'Arjun Nadol', 'Nitin Madan'], 'num_unique_values': 35, 'semantic_type': '', 'description': ''}}, {'column': 'group', 'properties': {'dtype': 'category', 'samples': ['South Centre - I', 'South Centre - II'], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}, {'column': 'team', 'properties': {'dtype': 'category', 'samples': ['BATCC-I', 'BATCC-II'], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}, {'column': 'mat', 'properties': {'dtype': 'number', 'std': 2, 'min': 1, 'max': 10, 'samples': [1, 6], 'num_unique_values': 10, 'semantic_type': '', 'description': ''}}, {'column': 'inns', 'properties': {'dtype': 'number', 'std': 2, 'min': 1, 'max': 10, 'samples': [1, 6], 'num_unique_values': 10, 'semantic_type': '', 'description': ''}}, {'column': 'no', 'properties': {'dtype': 'number', 'std': 1, 'min': 0, 'max': 4, 'samples': [1, 4], 'num_unique_values': 4, 'semantic_type': '', 'description': ''}}, {'column': 'runs', 'properties': {'dtype': 'number', 'std': 61, 'min': 1, 'max': 243, 'samples': [74, 44], 'num_unique_values': 34, 'semantic_type': '', 'description': ''}}, {'column': "4's", 'properties': {'dtype': 'number', 'std': 5, 'min': 0, 'max': 21, 'samples': [21, 16], 'num_unique_values': 16, 'semantic_type': '', 'description': ''}}, {'column': "6's", 'properties': {'dtype': 'number', 'std': 3, 'min': 0, 'max': 19, 'samples': [7, 3], 'num_unique_values': 8, 'semantic_type': '', 'description': ''}}, {'column': "50's", 'properties': {'dtype': 'number', 'std': 0, 'min': 0, 'max': 2, 'samples': [2, 1], 'num_unique_values': 3, 'semantic_type': '', 'description': ''}}, {'column': "100's", 'properties': {'dtype': 'number', 'std': 0, 'min': 0, 'max': 0, 'samples': [0], 'num_unique_values': 1, 'semantic_type': '', 'description': ''}}, {'column': 'hs', 'properties': {'dtype': 'number', 'std': 20, 'min': 1, 'max': 66, 'samples': [2], 'num_unique_values': 29, 'semantic_type': '', 'description': ''}}, {'column': 'sr', 'properties': {'dtype': 'number', 'std': 27.225843793206867, 'min': 12.5, 'max': 129.95, 'samples': [35.29], 'num_unique_values': 35, 'semantic_type': '', 'description': ''}}, {'column': 'avg', 'properties': {'dtype': 'number', 'std': 8.261177912634594, 'min': 1.0, 'max': 31.67, 'samples': [4.0], 'num_unique_values': 32, 'semantic_type': '', 'description': ''}}, {'column': 'points', 'properties': {'dtype': 'number', 'std': 82, 'min': -5, 'max': 356, 'samples': [-5], 'num_unique_values': 32, 'semantic_type': '', 'description': ''}}], 'field_names': ['#', 'player', 'group', 'team', 'mat', 'inns', 'no', 'runs', "4's", "6's", "50's", "100's", 'hs', 'sr', 'avg', 'points']}
Possible goals: [Goal(question='What is the distribution of runs scored by players?', visualization='histogram of runs', rationale='This tells us about the performance of players in terms of runs scored, allowing us to identify trends, outliers, and the overall scoring distribution among players.', index=0), Goal(question='How do the average scores compare between different teams?', visualization='bar chart of avg grouped by team', rationale='This visualization will help us understand which team has the highest average score, providing insights into team performance and potential areas for improvement.', index=1), Goal(question='What is the relationship between the number of matches played and total runs scored?', visualization='scatter plot of mat vs runs', rationale='This will help us analyze if there is a correlation between the number of matches played and the total runs scored, indicating player consistency and performance over time.', index=2), Goal(question="How many players have scored 50's and 100's?", visualization="bar chart of count of players with 50's and 100's", rationale='This visualization will provide insights into the frequency of significant scoring milestones among players, highlighting standout performances.', index=3), Goal(question='What is the distribution of strike rates across players?', visualization='box plot of sr', rationale="This will help us understand the variability and central tendency of players' strike rates, identifying which players are more aggressive or conservative in their batting approach.", index=4)]
<string>:14: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

<string>:17: UserWarning: No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
<string>:14: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

<string>:17: UserWarning: No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image

Gradio Interface¶

InĀ [40]:
# Datasets
datasets = {
    "Batting ODI": df_combined_batting_odi,
    "Batting T20": df_combined_batting_t20,
    "Bowling ODI": df_combined_bowling_odi,
    "Bowling T20": df_combined_bowling_t20
}

# Helper function to convert matplotlib figure to image
def plot_to_image(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches="tight")
    buf.seek(0)
    img = Image.open(buf)
    return img

# Helper function to generate text answer using OpenAI
def get_text_answer(df, question):
    # Convert DataFrame to CSV string for context (limit size if needed)
    csv_str = df.to_csv(index=False)
    prompt = f"Here is cricket data:\n{csv_str}\n\nQuestion: {question}\nAnswer in concise text."
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a cricket stats analyst."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
        )
    answer = response.choices[0].message.content
    # print("Answer:", answer)
    return answer

# Gradio function
def ask(dataset_name, question, return_type):
    df = datasets[dataset_name]
    
    if return_type == "Text Answer":
        answer = get_text_answer(df, question)
        return answer, None  # second output (chart) is None
    
    elif return_type == "Chart":
        summary = manager.summarize(df, textgen_config=config)
        viz_list = manager.visualize(summary=summary, goal=question, textgen_config=config, library="seaborn")
        viz = viz_list[0]
        chart_code = viz.code
        
        plt.figure(figsize=(10,6))
        exec(chart_code, {"data": df, "plt": plt, "sns": sns})
        fig = plt.gcf()
        plt.close(fig)
        
        return "", plot_to_image(fig)  # first output empty text, second output chart image


# Gradio interface
with gr.Blocks(css="""
#input-row {background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1);}
#dataset-dropdown, #query-textbox, #return-type {background-color: #ffffff; border-radius: 8px; border: 1px solid #ccc;}
#ask-button {background-color: #1f77b4; color: #ffffff; border-radius: 8px;}
#text-output {background-color: #ffffff; border-radius: 8px; border: 1px solid #ccc;}
#chart-output {border: 2px solid #1f77b4; border-radius: 8px;}
""") as iface:
    
    # Page background
    gr.HTML("<style>body {background-color: #f0f4f8;}</style>")

    # Top section: inputs and button inside a styled column (like a card)
    with gr.Row(elem_id="input-row", variant="panel"):
        dataset_dropdown = gr.Dropdown(
            choices=list(datasets.keys()), label="Select Dataset", elem_id="dataset-dropdown"
        )
        query_textbox = gr.Textbox(
            label="Your Question", lines=2, elem_id="query-textbox"
        )
        return_type = gr.Radio(
            choices=["Text Answer", "Chart"], label="Return Type", elem_id="return-type"
        )
        submit_btn = gr.Button("Ask", elem_id="ask-button")

    # Text answer
    text_output = gr.Textbox(label="Answer", lines=5, elem_id="text-output")

    # Chart at bottom
    chart_output = gr.Image(label="Chart", elem_id="chart-output")

    # Connect function
    submit_btn.click(
        ask,
        inputs=[dataset_dropdown, query_textbox, return_type],
        outputs=[text_output, chart_output]
    )

# Launch interface without css parameter
iface.launch(share=True)
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://1c7f4610628da651b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Out[40]:

Calculate Best Allrounder¶

InĀ [42]:
# ---- CREATE TOTAL RUNS PER PLAYER ----
df_batting_all = pd.concat(
    [df_combined_batting_odi, df_combined_batting_t20],
    ignore_index=True
)

df_batting_totals = (
    df_batting_all.groupby("player", as_index=False)["runs"]
    .sum()
    .rename(columns={"runs": "Total Runs"})
)


# ---- CREATE TOTAL WICKETS PER PLAYER ----
df_bowling_all = pd.concat(
    [df_combined_bowling_odi, df_combined_bowling_t20],
    ignore_index=True
)

df_bowling_totals = (
    df_bowling_all.groupby("player", as_index=False)["wkts"]
    .sum()
    .rename(columns={"wkts": "Total Wickets"})
)
InĀ [49]:
import matplotlib.pyplot as plt

# Sort and take top 10
top10 = df_bowling_totals.sort_values(
    by="Total Wickets", ascending=False
).head(10)

plt.figure(figsize=(10, 6))
plt.bar(top10["player"], top10["Total Wickets"])
plt.xticks(rotation=45, ha='right')
plt.xlabel("Player")
plt.ylabel("Total Wickets")
plt.title("Top 10 Players by Total Wickets (Combined ODI + T20)")
plt.tight_layout()
plt.show()
No description has been provided for this image
InĀ [50]:
top10_batting = df_batting_totals.sort_values(
    by="Total Runs", ascending=False
).head(10)

plt.figure(figsize=(10, 6))
plt.bar(top10_batting["player"], top10_batting["Total Runs"])
plt.xticks(rotation=45, ha='right')
plt.xlabel("Player")
plt.ylabel("Total Runs")
plt.title("Top 10 Players by Total Runs (Combined ODI + T20)")
plt.tight_layout()
plt.show()
No description has been provided for this image