Source code for BharatFinTrack.cagr

import pandas
import datetime
import typing
import os
from .helper import Helper



[docs]
class CAGR:
    '''
    Provide functionalities for computing and analyzing Compound Annual Growth Rate (CAGR).
    '''


[docs]
    def sort_since_inception(
        self,
        csv_file: str,
        within_category: bool = False,
        excel_file: typing.Optional[str] = None
    ) -> pandas.DataFrame:
        '''
        Sort securities in descending order by CAGR (%) since inception,
        either across all securities or within each category.

        Parameters
        ----------
        csv_file : str
            Path to the input CSV file generated by either
            :meth:`BharatFinTrack.NSETRI.download_equity_close` or
            :meth:`BharatFinTrack.NSEPRI.download_equity_close`.

        within_category : bool, optional
            If True, sort securities within each category; otherwise, sort across all securities.
            Default is False.

        excel_file : str, optional
            Path to an Excel file to save the output DataFrame. Default is None.

        Returns
        -------
        DataFrame
            DataFrame sorted in descending order by CAGR (%) since launch.
        '''

        # Check static type of input variable origin
        Helper()._validate_variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.sort_since_inception
            ),
            vars_values=locals()
        )

        # Read DataFrame
        df = Helper()._csv_date_format(
            csv_file=csv_file,
            date_cols=['Base Date', 'Close Date']
        )

        # Computing CAGR
        df = Helper()._df_cagr_column(
            df=df
        )

        # Sort DataFrame
        df = Helper()._cagr_sort_within_category(df=df) if within_category else Helper()._cagr_sort(df=df)

        # Save DataFrame
        if excel_file is not None:
            # Validate output file path
            Helper()._validate_file_path(
                input_file=excel_file,
                input_ext='.xlsx'
            )
            # Write DataFrame to the Excel file
            with pandas.ExcelWriter(
                path=excel_file,
                engine='xlsxwriter'
            ) as excel_writer:
                df.to_excel(
                    excel_writer=excel_writer,
                    index=False,
                    float_format='%.2f'
                )
                workbook = excel_writer.book
                worksheet = excel_writer.sheets['Sheet1']
                header_format = workbook.add_format(
                    {
                        'bold': True,
                        'align': 'center'
                    }
                )
                for idx, col in enumerate(df.columns):
                    if col == 'Index Name':
                        worksheet.set_column(idx, idx, 60)
                    elif col in ['ID', 'Y-M-D']:
                        worksheet.set_column(
                            idx, idx, 15,
                            workbook.add_format({'align': 'center'})
                        )
                    else:
                        worksheet.set_column(idx, idx, 15)
                    worksheet.write(0, idx, col, header_format)
                # Color DataFrame for sorting within category
                if within_category:
                    category_names = df['Category'].unique()
                    colors = [
                        'FFB6C1',
                        'FFD700',
                        '7CFC00',
                        '00FFFF'
                    ]
                    end_col = len(df.columns) - 1
                    start_row = 1
                    for category, color in zip(category_names, colors):
                        end_row = start_row + sum(df['Category'] == category) - 1
                        worksheet.conditional_format(
                            start_row, 0, end_row, end_col,
                            {
                                'type': 'no_blanks',
                                'format': workbook.add_format({'bg_color': color})
                            }
                        )
                        start_row = end_row + 1

        return df



[docs]
    def yearly_return(
        self,
        csv_file: str,
        excel_file: typing.Optional[str] = None
    ) -> pandas.DataFrame:

        '''
        Calculate the year-wise CAGR (%) for a given security. Here, year-wise refers to the
        CAGR calculated for periods ending on the present date, going back one year, two years,
        three years, and so on, up to the available data range.

        Parameters
        ----------
        csv_file : str
            Path to the CSV file obtained from the following methods:

            - :meth:`BharatFinTrack.NSETRI.download_daily_data`
            - :meth:`BharatFinTrack.NPS.scheme_historical_nav`

        excel_file : str, optional
            Path to an Excel file to save the output DataFrame. Default is None.

        Returns
        -------
        DataFrame
            DataFrame containing the yearly CAGR (%).
        '''

        # Check static type of input variable origin
        Helper()._validate_variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.yearly_return
            ),
            vars_values=locals()
        )

        # Read DataFrame
        df = Helper()._csv_date_format(
            csv_file=csv_file,
            date_cols=['Date']
        )

        # Start and end dates
        start_date = df['Date'].min()
        end_date = df['Date'].max()

        # Date difference
        date_diff = Helper()._date_difference(
            start_date=start_date,
            end_date=end_date
        )
        year_diff = date_diff['years']

        # CAGR DataFrame
        cagr_df = pandas.DataFrame()
        for idx in range(year_diff + 1):
            if idx < year_diff:
                cagr_year: float = idx + 1
                cagr_start = end_date.replace(year=end_date.year - cagr_year)
                while True:
                    if df['Date'].isin([cagr_start]).any():
                        break
                    else:
                        cagr_start = cagr_start - datetime.timedelta(days=1)
                yi_df = df[df['Date'].isin([cagr_start])]
            else:
                cagr_year = round(year_diff + (date_diff['months'] / 12) + (date_diff['days'] / 365), 1)
                yi_df = df.iloc[:1, :]
            # Year-wise CAGR summary
            cagr_df.loc[idx, 'Year'] = cagr_year
            cagr_df.loc[idx, 'Start Date'] = yi_df.iloc[0, 0]
            cagr_df.loc[idx, 'Start Value'] = yi_df.iloc[0, 1]
            cagr_df.loc[idx, 'Close Date'] = end_date
            cagr_df.loc[idx, 'Close Value'] = df.iloc[-1, -1]
            cagr_df.loc[idx, 'Multiple(X)'] = df.iloc[-1, -1] / yi_df.iloc[0, 1]
            cagr_df.loc[idx, 'CAGR(%)'] = 100 * (pow(df.iloc[-1, -1] / yi_df.iloc[0, 1], 1 / cagr_year) - 1)

        # Drop duplicates row if any
        cagr_df = cagr_df.drop_duplicates(ignore_index=True)

        # Saving DataFrame
        if excel_file is not None:
            # Validate output file path
            Helper()._validate_file_path(
                input_file=excel_file,
                input_ext='.xlsx'
            )
            # Write DataFrame to the Excel file
            with pandas.ExcelWriter(
                path=excel_file,
                engine='xlsxwriter'
            ) as excel_writer:
                cagr_df.to_excel(
                    excel_writer=excel_writer,
                    index=False
                )
                workbook = excel_writer.book
                worksheet = excel_writer.sheets['Sheet1']
                header_format = workbook.add_format(
                    {
                        'bold': True,
                        'align': 'center'
                    }
                )
                for idx, col in enumerate(cagr_df.columns):
                    worksheet.set_column(
                        idx, idx, 15,
                        workbook.add_format({'num_format': '#,##0.0'})
                    )
                    worksheet.write(0, idx, col, header_format)

        return cagr_df



[docs]
    def compare_performance(
        self,
        indices: list[str],
        dir_path: str,
        excel_file: str
    ) -> pandas.DataFrame:
        '''
        Generate two DataFrames that compare year-wise CAGR (%) and growth multiple (X)
        of a yearly fixed investment across multiple securities. Here, year-wise refers to the
        CAGR calculated for periods ending on the present date, going back one year, two years, and so on,
        up to the available data range. The output DataFrames are saved to an Excel file,
        where the cells with the best performance among securities for each year are highlighted
        in green-yellow, and those with the worst performance are highlighted in sandy brown.

        Additionally, a scoring mechanism is implemented for the securities based on their growth values.
        For each year, securities are ranked in ascending order of growth, with the lowest value
        receiving the lowest score (1), and the highest value receiving the highest score.
        The total scores for each security are calculated by summing their yearly scores.
        Securities are then sorted in descending order based on their total scores.

        Parameters
        ----------
        indices : list
            A list of index (security) names to compare in the CAGR (%) and growth multiple (X).

        dir_path : str
            Path to the directory containing CSV files with historical data for each index.
            Each CSV file must be named as ``{index}.csv`` corresponding to the index names
            provided in the ``indices`` list. These files should be obtained from:

            - :meth:`BharatFinTrack.NSETRI.download_daily_data`
            - :meth:`BharatFinTrack.NPS.scheme_historical_nav`

        excel_file : str
            Path to an Excel file to save the output DataFrames.

        Returns
        -------
        DataFrame
            DataFrame containing the index names and their total scores.
        '''

        # Check static type of input variable origin
        Helper()._validate_variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compare_performance
            ),
            vars_values=locals()
        )

        # Validate output file path
        Helper()._validate_file_path(
            input_file=excel_file,
            input_ext='.xlsx'
        )

        # Validate equal close date across all securities
        Helper()._validate_same_end_date_in_dfs(
            indices=indices,
            dir_path=dir_path,
        )

        # Year-wise CAGR analysis for all securities
        dfs = [
            self.yearly_return(
                csv_file=os.path.join(dir_path, f'{index}.csv')
            ) for index in indices
        ]

        # Compare XIRR and Growth with score
        rank_df = Helper()._indices_metric_comparison(
            indices=indices,
            dfs=dfs,
            remove_cols=[
                'Start Value',
                'Close Value'
            ],
            common_cols=[
                'Year',
                'Start Date',
                'Close Date'
            ],
            rename_col='CAGR(%)',
            excel_file=excel_file,
            sheet_names=[
                'CAGR(%)',
                'Growth(X)'
            ]
        )

        return rank_df