Source code for BharatFinTrack.cagr
import pandas
import datetime
import typing
import os
from .helper import Helper
[docs]
class CAGR:
'''
Provide functionalities for computing and analyzing Compound Annual Growth Rate (CAGR).
'''
[docs]
def sort_since_inception(
self,
csv_file: str,
within_category: bool = False,
excel_file: typing.Optional[str] = None
) -> pandas.DataFrame:
'''
Sort securities in descending order by CAGR (%) since inception,
either across all securities or within each category.
Parameters
----------
csv_file : str
Path to the input CSV file generated by either
:meth:`BharatFinTrack.NSETRI.download_equity_close` or
:meth:`BharatFinTrack.NSEPRI.download_equity_close`.
within_category : bool, optional
If True, sort securities within each category; otherwise, sort across all securities.
Default is False.
excel_file : str, optional
Path to an Excel file to save the output DataFrame. Default is None.
Returns
-------
DataFrame
DataFrame sorted in descending order by CAGR (%) since launch.
'''
# Check static type of input variable origin
Helper()._validate_variable_origin_static_type(
vars_types=typing.get_type_hints(
obj=self.sort_since_inception
),
vars_values=locals()
)
# Read DataFrame
df = Helper()._csv_date_format(
csv_file=csv_file,
date_cols=['Base Date', 'Close Date']
)
# Computing CAGR
df = Helper()._df_cagr_column(
df=df
)
# Sort DataFrame
df = Helper()._cagr_sort_within_category(df=df) if within_category else Helper()._cagr_sort(df=df)
# Save DataFrame
if excel_file is not None:
# Validate output file path
Helper()._validate_file_path(
input_file=excel_file,
input_ext='.xlsx'
)
# Write DataFrame to the Excel file
with pandas.ExcelWriter(
path=excel_file,
engine='xlsxwriter'
) as excel_writer:
df.to_excel(
excel_writer=excel_writer,
index=False,
float_format='%.2f'
)
workbook = excel_writer.book
worksheet = excel_writer.sheets['Sheet1']
header_format = workbook.add_format(
{
'bold': True,
'align': 'center'
}
)
for idx, col in enumerate(df.columns):
if col == 'Index Name':
worksheet.set_column(idx, idx, 60)
elif col in ['ID', 'Y-M-D']:
worksheet.set_column(
idx, idx, 15,
workbook.add_format({'align': 'center'})
)
else:
worksheet.set_column(idx, idx, 15)
worksheet.write(0, idx, col, header_format)
# Color DataFrame for sorting within category
if within_category:
category_names = df['Category'].unique()
colors = [
'FFB6C1',
'FFD700',
'7CFC00',
'00FFFF'
]
end_col = len(df.columns) - 1
start_row = 1
for category, color in zip(category_names, colors):
end_row = start_row + sum(df['Category'] == category) - 1
worksheet.conditional_format(
start_row, 0, end_row, end_col,
{
'type': 'no_blanks',
'format': workbook.add_format({'bg_color': color})
}
)
start_row = end_row + 1
return df
[docs]
def yearly_return(
self,
csv_file: str,
excel_file: typing.Optional[str] = None
) -> pandas.DataFrame:
'''
Calculate the year-wise CAGR (%) for a given security. Here, year-wise refers to the
CAGR calculated for periods ending on the present date, going back one year, two years,
three years, and so on, up to the available data range.
Parameters
----------
csv_file : str
Path to the CSV file obtained from :meth:`BharatFinTrack.NSETRI.download_daily_data`.
excel_file : str, optional
Path to an Excel file to save the output DataFrame. Default is None.
Returns
-------
DataFrame
DataFrame containing the yearly CAGR (%).
'''
# Check static type of input variable origin
Helper()._validate_variable_origin_static_type(
vars_types=typing.get_type_hints(
obj=self.yearly_return
),
vars_values=locals()
)
# Read DataFrame
df = Helper()._csv_date_format(
csv_file=csv_file,
date_cols=['Date']
)
# Start and end dates
start_date = df['Date'].min()
end_date = df['Date'].max()
# Date difference
date_diff = Helper()._date_difference(
start_date=start_date,
end_date=end_date
)
year_diff = date_diff['years']
# CAGR DataFrame
cagr_df = pandas.DataFrame()
for idx in range(year_diff + 1):
if idx < year_diff:
cagr_year: float = idx + 1
cagr_start = end_date.replace(year=end_date.year - cagr_year)
while True:
if df['Date'].isin([cagr_start]).any():
break
else:
cagr_start = cagr_start - datetime.timedelta(days=1)
yi_df = df[df['Date'].isin([cagr_start])]
else:
cagr_year = round(year_diff + (date_diff['months'] / 12) + (date_diff['days'] / 365), 1)
yi_df = df.iloc[:1, :]
# Year-wise CAGR summary
cagr_df.loc[idx, 'Year'] = cagr_year
cagr_df.loc[idx, 'Start Date'] = yi_df.iloc[0, 0]
cagr_df.loc[idx, 'Start Value'] = yi_df.iloc[0, 1]
cagr_df.loc[idx, 'Close Date'] = end_date
cagr_df.loc[idx, 'Close Value'] = df.iloc[-1, -1]
cagr_df.loc[idx, 'Multiple(X)'] = df.iloc[-1, -1] / yi_df.iloc[0, 1]
cagr_df.loc[idx, 'CAGR(%)'] = 100 * (pow(df.iloc[-1, -1] / yi_df.iloc[0, 1], 1 / cagr_year) - 1)
# Drop duplicates row if any
cagr_df = cagr_df.drop_duplicates(ignore_index=True)
# Saving DataFrame
if excel_file is not None:
# Validate output file path
Helper()._validate_file_path(
input_file=excel_file,
input_ext='.xlsx'
)
# Write DataFrame to the Excel file
with pandas.ExcelWriter(
path=excel_file,
engine='xlsxwriter'
) as excel_writer:
cagr_df.to_excel(
excel_writer=excel_writer,
index=False
)
workbook = excel_writer.book
worksheet = excel_writer.sheets['Sheet1']
header_format = workbook.add_format(
{
'bold': True,
'align': 'center'
}
)
for idx, col in enumerate(cagr_df.columns):
worksheet.set_column(
idx, idx, 15,
workbook.add_format({'num_format': '#,##0.0'})
)
worksheet.write(0, idx, col, header_format)
return cagr_df
[docs]
def compare_performance(
self,
indices: list[str],
dir_path: str,
excel_file: str
) -> pandas.DataFrame:
'''
Generate two DataFrames that compare year-wise CAGR (%) and growth multiple (X)
of a yearly fixed investment across multiple securities. Here, year-wise refers to the
CAGR calculated for periods ending on the present date, going back one year, two years, and so on,
up to the available data range. The output DataFrames are saved to an Excel file,
where the cells with the best performance among securities for each year are highlighted
in green-yellow, and those with the worst performance are highlighted in sandy brown.
Additionally, a scoring mechanism is implemented for the securities based on their growth values.
For each year, securities are ranked in ascending order of growth, with the lowest value
receiving the lowest score (1), and the highest value receiving the highest score.
The total scores for each security are calculated by summing their yearly scores.
Securities are then sorted in descending order based on their total scores.
Parameters
----------
indices : list
A list of index (security) names to compare in the CAGR (%) and growth multiple (X).
dir_path : str
Path to the directory containing CSV files with historical data for each index.
Each CSV file must be named as ``{index}.csv`` corresponding to the index names
provided in the ``indices`` list. These files should be obtained from
:meth:`BharatFinTrack.NSETRI.download_daily_data` and
:meth:`BharatFinTrack.NSETRI.update_daily_data` methods.
excel_file : str
Path to an Excel file to save the output DataFrames.
Returns
-------
DataFrame
DataFrame containing the index names and their total scores.
'''
# Check static type of input variable origin
Helper()._validate_variable_origin_static_type(
vars_types=typing.get_type_hints(
obj=self.compare_performance
),
vars_values=locals()
)
# Validate output file path
Helper()._validate_file_path(
input_file=excel_file,
input_ext='.xlsx'
)
# Validate equal close date across all securities
Helper()._validate_same_end_date_in_dfs(
indices=indices,
dir_path=dir_path,
)
# Year-wise CAGR analysis for all securities
dfs = [
self.yearly_return(
csv_file=os.path.join(dir_path, f'{index}.csv')
) for index in indices
]
# Compare XIRR and Growth with score
rank_df = Helper()._indices_metric_comparison(
indices=indices,
dfs=dfs,
remove_cols=[
'Start Value',
'Close Value'
],
common_cols=[
'Year',
'Start Date',
'Close Date'
],
rename_col='CAGR(%)',
excel_file=excel_file,
sheet_names=[
'CAGR(%)',
'Growth(X)'
]
)
return rank_df