Skip to content

pfptcommunity/senderstats-core

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

6 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

senderstats-core

Core processing engine for SenderStats.

This library provides the underlying logic for analyzing outbound email traffic, identifying top senders, and generating structured reporting data from Proofpoint Smart Search exports or CSV datasets.


Installation

pip install senderstats-core

`

General Usage

from types import SimpleNamespace
from senderstats_core.common.defaults import *
from senderstats_core.data.data_source_type import DataSourceType
from senderstats_core.processing.config_manager import ConfigManager
from senderstats_core.processing.data_source_manager import DataSourceManager
from senderstats_core.processing.pipeline_manager import PipelineManager
from senderstats_core.processing.pipeline_processor import PipelineProcessor
from senderstats_core.reporting.pipeline_processor_report import PipelineProcessorReport

def process():
    # Create args namespace
    args = SimpleNamespace()
    args.debug = True
    args.source_type = DataSourceType.CSV
    args.input_files = ['file1.csv','file2.csv']
    args.output_file = 'output.xlsx'
    args.ip_field =  DEFAULT_IP_FIELD
    args.mfrom_field = DEFAULT_MFROM_FIELD
    args.hfrom_field = DEFAULT_HFROM_FIELD
    args.rcpts_field = DEFAULT_RCPTS_FIELD
    args.rpath_field = DEFAULT_RPATH_FIELD
    args.msgid_field = DEFAULT_MSGID_FIELD
    args.subject_field = DEFAULT_SUBJECT_FIELD
    args.msgsz_field = DEFAULT_MSGSZ_FIELD
    args.date_field = DEFAULT_DATE_FIELD
    args.gen_hfrom = False
    args.gen_rpath = False
    args.gen_alignment = False
    args.gen_msgid = False
    args.expand_recipients = False
    args.no_display = False
    args.remove_prvs = False
    args.decode_srs = False
    args.normalize_bounces = False
    args.normalize_entropy = False
    args.no_empty_hfrom = False
    args.sample_subject = False
    args.with_probability = True
    
    args.exclude_ips = []
    args.exclude_domains = []
    args.restrict_domains = []
    args.exclude_senders = []
    args.exclude_dup_msgids = False
    args.date_format = DEFAULT_DATE_FORMAT
    args.no_default_exclude_domains = False
    args.no_default_exclude_ips = False

    config = ConfigManager(args)

    # Treat "no input files" as an error path (not a silent return)
    if not config.input_files:
        raise ValueError("No input files to read. Please check the input files exist.")

    # Show current filtering options selected
    config.display_filter_criteria()

    # Data source manager can be extended to read streams, etc. Default to CSV
    data_source_manager = DataSourceManager(config)
    
    # Pipeline manager is responsible for which filter, transform, processor is triggered and the order of them
    pipeline_manager = PipelineManager(config)

    # Pipeline processor does the internal processing based in the DataSource interface. 
    processor = PipelineProcessor(data_source_manager, pipeline_manager)
    
    # Processes the data based on the DataSourceManager(), essentially calling read_data till all data has been processed.
    processor.process_data()

    # Display a summary for data processed
    pipeline_manager.get_filter_manager().display_summary()

    # Create a pipeline processor report
    report = PipelineProcessorReport(config.output_file, pipeline_manager, config.with_probability)

    # Enumerates the reportable processing nodes to generate the report. 
    report.generate()

    # Save the data to args.output_file
    report.close()

About

Core processing library to process csv and smart search data

Resources

License

Stars

Watchers

Forks

Packages

 
 
 

Contributors

Languages