diff --git a/casparser/process/__init__.py b/casparser/process/__init__.py index b58767b..ee81162 100644 --- a/casparser/process/__init__.py +++ b/casparser/process/__init__.py @@ -25,7 +25,7 @@ def process_cas_text(text, file_type: FileType = FileType.UNKNOWN) -> ProcessedC :param text: :return: """ - if file_type == FileType.NSDL: + if file_type in (FileType.NSDL, FileType.CDSL): return process_nsdl_text(text) cas_statement_type = detect_cas_type(text[:1000]) if cas_statement_type == CASFileType.DETAILED: diff --git a/casparser/process/nsdl_statement.py b/casparser/process/nsdl_statement.py index ecf844f..102505f 100644 --- a/casparser/process/nsdl_statement.py +++ b/casparser/process/nsdl_statement.py @@ -12,6 +12,7 @@ DEMAT_HEADER_RE, DEMAT_MF_HEADER_RE, DEMAT_MF_TYPE_RE, + DEMAT_STATEMENT_PERIOD_ALT_RE, DEMAT_STATEMENT_PERIOD_RE, NSDL_CDSL_HOLDINGS_RE, NSDL_EQ_RE, @@ -19,15 +20,25 @@ NSDL_MF_RE, ) +# Search window for statement period (CDSL/NSDL block order can put it after 1000 chars) +_HEADER_SEARCH_LEN = 5000 + def parse_header(text): """ Parse CAS header data. :param text: CAS text """ + search_text = text[:_HEADER_SEARCH_LEN] if m := re.search( DEMAT_STATEMENT_PERIOD_RE, - text, + search_text, + re.DOTALL | re.MULTILINE | re.I, + ): + return m.groupdict() + if m := re.search( + DEMAT_STATEMENT_PERIOD_ALT_RE, + search_text, re.DOTALL | re.MULTILINE | re.I, ): return m.groupdict() @@ -35,7 +46,7 @@ def parse_header(text): def process_nsdl_text(text): - hdr_data = parse_header(text[:1000]) + hdr_data = parse_header(text) statement_period = StatementPeriod(from_=hdr_data["from"], to=hdr_data["to"]) accounts = re.findall( DEMAT_HEADER_RE, diff --git a/casparser/process/regex.py b/casparser/process/regex.py index 373611c..be0237c 100644 --- a/casparser/process/regex.py +++ b/casparser/process/regex.py @@ -54,6 +54,11 @@ r"for\s+the\s+period\s+from\s+(?P\d{2}-[a-zA-Z0-9]{2,3}-\d{4})" r"\s+to\s+(?P\d{2}-[a-zA-Z0-9]{2,3}-\d{4})" ) +# Alternate wording used in some CDSL statements (e.g. inbound) +DEMAT_STATEMENT_PERIOD_ALT_RE = ( + r"Statement\s+for\s+the\s+period\s+from\s+(?P\d{2}-[a-zA-Z0-9]{2,3}-\d{4})" + r"\s+to\s+(?P\d{2}-[a-zA-Z0-9]{2,3}-\d{4})" +) DEMAT_HEADER_RE = ( r"((?:CDSL|NSDL)\s+demat\s+account)\s+(.+?)\s*DP\s*Id\s*:\s*(.+?)" r"\s*Client\s*Id\s*:\s*(\d+)\s+(\d+)\s+([\d,.]+)"