Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .devcontainer
Submodule .devcontainer added at 0ddb12
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "gitsubmodule"
directory: "/"
schedule:
interval: "daily"
19 changes: 19 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: CI

on:
push:
branches: [ main ]
tags: [ v*.*.* ]

pull_request:
branches: [ main ]
types:
- synchronize
- opened
- reopened

jobs:
call_ci:
uses: EffectiveRange/ci-workflows/.github/workflows/python-ci.yaml@latest-python
with:
coverage-threshold: 90
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
tests/generated/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down Expand Up @@ -187,6 +188,7 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
.idea

# Ruff stuff:
.ruff_cache/
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule ".devcontainer"]
path = .devcontainer
url = https://github.com/EffectiveRange/devcontainer-defs
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,24 @@
# table-differ
# table-differ

Creates a diff of two tabular data files (CSV, Excel, etc.) based on a specified key column.
It identifies added, removed, and common rows between the two files and can output the results in Excel format.

```bash
usage: table-differ.py [-h] --key KEY [--exclude [EXCLUDE ...]] [--added-xlsx ADDED_XLSX] [--removed-xlsx REMOVED_XLSX] [--common-xlsx COMMON_XLSX] old_file new_file

positional arguments:
old_file Path to old file
new_file Path to new file

options:
-h, --help show this help message and exit
--key KEY Key column name
--exclude [EXCLUDE ...]
Columns to exclude from comparison/output
--added-xlsx ADDED_XLSX
Path to write the added rows as an Excel file
--removed-xlsx REMOVED_XLSX
Path to write the removed rows as an Excel file
--common-xlsx COMMON_XLSX
Path to write the common rows as an Excel file
```
120 changes: 120 additions & 0 deletions bin/table-differ.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2024 Ferenc Nandor Janky <ferenj@effective-range.com>
# SPDX-FileCopyrightText: 2024 Attila Gombos <attila.gombos@effective-range.com>
# SPDX-License-Identifier: MIT

import argparse
from pathlib import Path
from typing import List, Optional

import pandas as pd


def compare_excel(
old_file: str,
new_file: str,
key: str,
exclude: List[str],
added_xlsx: Optional[str] = None,
removed_xlsx: Optional[str] = None,
common_xlsx: Optional[str] = None,
) -> None:
old_df = pd.read_excel(old_file)
new_df = pd.read_excel(new_file)

old_df = old_df.drop(columns=exclude, errors="ignore")
new_df = new_df.drop(columns=exclude, errors="ignore")

if old_df is None or new_df is None:
return

if key not in old_df.columns or key not in new_df.columns:
raise ValueError(f"Key column '{key}' not found in both files")

column_order = new_df.columns.tolist()

old_df = old_df.set_index(key)
new_df = new_df.set_index(key)

old_keys = set(old_df.index)
new_keys = set(new_df.index)

removed_keys = old_keys - new_keys
added_keys = new_keys - old_keys
common_keys = old_keys & new_keys

removed = old_df.loc[list(removed_keys)]
added = new_df.loc[list(added_keys)]
common = new_df.loc[list(common_keys)]

def format_output(df: pd.DataFrame) -> pd.DataFrame:
df = df.reset_index()
df = df.reindex(columns=column_order)
return df

print("\n=== Removed rows ===")
print(format_output(removed).to_string(index=False))

print("\n=== Added rows ===")
print(format_output(added).to_string(index=False))

print("\n=== Common rows ===")
print(format_output(common).to_string(index=False))

if added_xlsx:
Path(added_xlsx).parent.mkdir(parents=True, exist_ok=True)
format_output(added).to_excel(added_xlsx, index=False)
if removed_xlsx:
Path(removed_xlsx).parent.mkdir(parents=True, exist_ok=True)
format_output(removed).to_excel(removed_xlsx, index=False)
if common_xlsx:
Path(common_xlsx).parent.mkdir(parents=True, exist_ok=True)
format_output(common).to_excel(common_xlsx, index=False)


def main(argv: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser()
parser.add_argument("old_file", type=str, help="Path to old file")
parser.add_argument("new_file", type=str, help="Path to new file")
parser.add_argument("--key", required=True, help="Key column name")
parser.add_argument(
"--exclude",
nargs="*",
default=[],
help="Columns to exclude from comparison/output"
)
parser.add_argument(
"--added-xlsx",
type=str,
default=None,
help="Path to write the added rows as an Excel file"
)
parser.add_argument(
"--removed-xlsx",
type=str,
default=None,
help="Path to write the removed rows as an Excel file"
)
parser.add_argument(
"--common-xlsx",
type=str,
default=None,
help="Path to write the common rows as an Excel file"
)

args = parser.parse_args(argv)

compare_excel(
args.old_file,
args.new_file,
args.key,
args.exclude,
args.added_xlsx,
args.removed_xlsx,
args.common_xlsx,
)


if __name__ == "__main__":
main()
58 changes: 58 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[pack-python]
packaging =
wheel
fpm-deb
wheel-deb = -A=all

[mypy]
packages = bin
strict = True

[flake8]
exclude = build,dist,.venv,.eggs
max-line-length = 120
max-complexity = 10
count = True
statistics = True
show-source = True
per-file-ignores =
# F401: imported but unused
# F403: import * used; unable to detect undefined names
__init__.py: F401,F403

[tool:pytest]
addopts = --capture=no --verbose
python_files = *Test.py
python_classes = *Test

[coverage:run]
relative_files = true
branch = True
source = bin

[coverage:report]
; Regexes for lines to exclude from consideration
exclude_also =
; Don't complain about missing debug-only code:
def __repr__
if self\.debug

; Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError

; Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:

; Don't complain about abstract methods, they aren't run:
@(abc\.)?abstractmethod

ignore_errors = True
skip_empty = True

[coverage:html]
directory = coverage/html

[coverage:json]
output = coverage/coverage.json
13 changes: 13 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from setuptools import setup

setup(
name='table-differ',
description='Excel table difference generator tool',
long_description='Excel table difference generator tool',
author='Ferenc Nandor Janky & Attila Gombos',
author_email='info@effective-range.com',
scripts=['bin/table-differ.py'],
use_scm_version=True,
setup_requires=["setuptools_scm"],
install_requires=['openpyxl', 'pandas']
)
Binary file added tests/expected/added.xlsx
Binary file not shown.
Binary file added tests/expected/common.xlsx
Binary file not shown.
74 changes: 74 additions & 0 deletions tests/expected/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

=== Removed rows ===
refdes qty value mpn manufacturer
U503 1 MIC2876-5.25YMT-TR MIC2876-5.25YMT-TR Microchip Technology
D7 1 TS4148 RCG TS4148 RCG Taiwan Semiconductor Corporation
U5,U2,U4,U1,U3 5 74LVC1G07 SN74LVC1G07DPWR NaN
D602 1 PWR_ESD PESD0603-240 Littelfuse Inc.
FB601 1 FerriteBead BLE32PN260SH1L Murata Electronics
J101 1 VIN_2 691137710002 NaN
J2,J4 2 JMPRS 54202-T0804LF NaN

=== Added rows ===
refdes qty value mpn manufacturer
C522 1 39nF CC0402JRX7R9BB393 Murata Electronics
C210 1 200nF C1005X7R1E224K050BB NaN
U704 1 TMUX121NKGR TMUX121NKGR NaN
J102 1 VIN_2 1-2834011-2 NaN
U901 1 MC3416 MC3416 NaN
R601,R602,R603 3 0 ERJ-2GE0R00X Panasonic Electronic Components
C514,C518,C519,C520 4 47uF GRM21BR61A476ME15L Murata Electronics
U203 1 MCP3425A0T-E/CH MCP3425A0T-E/CH NaN
R511 1 365k ERJ-2RKF3653X Panasonic Electronic Components
U802 1 TPS22919DCKR TPS22919DCKR NaN
J803,J804 2 Conn_01x03 0533980371 NaN
U503 1 TPS61288RQQR TPS61288RQQR NaN
D801,D802,D804 3 D_Schottky_Filled CFSH05-20L TR PBFREE NaN
C521 1 100pF C0402C101J5RACAUTO Murata Electronics
R510 1 48.7k ERA-2AEB4872X Panasonic Electronic Components
L502 1 1.0001uH HPAL1V0630-1R0-R Murata Electronics
U601,U801 2 74LVC3G07GS_115 SN74LVC3G07DCUR NaN
R513 1 49.9k ERJ-2RKF4992X Panasonic Electronic Components
C517 1 2.2uF GRM155Z71A225KE44D Murata Electronics

=== Common rows ===
refdes qty value mpn manufacturer
L501 1 1uH DFE322520FD-1R0M=P2 Murata Electronics
Module301 1 ComputeModule5-CM5 10164227-1001A1RLF NaN
R604,R605,R610 3 5k1 ERA-2AEB512X Panasonic Electronic Components
Q201,Q801,Q802 3 Q_PMOS_GSD GSF3407 NaN
J501 1 BATT_CON 533750310 Molex
L201 1 2.2uH IHLP1210BZEZ2R2M5A Murata Electronics
D201 1 D_Zener BZX884B5V1 NaN
R202,R205,R301,R302,R401 5 2.2K 1% ERA-2AED222X NaN
U703 1 PIC18F16Q20 PIC18F16Q20-I/REB Microchip Technology
C203,C204 2 10uF CL31B106KBHNNNE Cal-Chip Electronics, Inc.
U702 1 SN74CBTLV3257RSVR SN74CBTLV3257RSVR Texas Instruments
R503,R504,R701,R704,R707,R708 6 1M ERJ-2RKF1004X Panasonic Electronic Components
C221,C703,C706,C707,C902 5 1nF GCM155R72A102KA37D Murata Electronics
R209,R505 2 887 ERJ-2RKF8870X NaN
U201 1 LM63635DQDRRRQ1 LM63635DQDRRRQ1 NaN
J401,J402,J801,J802 4 Conn_01x22_Female FH12-22S-0.5SH(55) NaN
U202 1 TPS2121RUXR TPS2121RUXR NaN
C217,C218 2 100uF GRM21BR60J107ME15L NaN
U602 1 FT230XQ FT230XQ-T FTDI, Future Technology Devices International Ltd
R807,R808,R809 3 62 ERJ-2RKF62R0X Panasonic Electronic Components
J101 1 VIN_2_ALT 533750210 NaN
R201,R203,R204,R206,R208,R210,R501,R502,R506,R507,R512,R609,R14,R702,R703,R705,R706,R709,R801,R802,R803,R804,R806,R811,R812,R817 26 10k ERJ-2RKF1002X Panasonic Electronic Components
BT301 1 Battery_Cell 3000 NaN
U501 1 BQ25622 BQ25622RYKR Texas Instruments
C604,C605 2 56pF GRT1555C1H560JA02J Murata Electronics
C201,C202,C205,C206,C212,C215,C216,C219,C502,C505,C507,C513,C516,C601,C606,C609,C610,C612,C701,C702,C704,C708,C709,C710,C801,C803,C804,C805,C901 29 100nF GRM155R71E104KE14J NaN
D602,D603 2 ESD ESDUD05BFX WeEn Semiconductors
R207 1 33k ERA-2AEB333X Panasonic Electronic Components
U502 1 NCP164CSN330T1G NCP164CSN330T1G NaN
U701,U803 2 GSF7002DW GSF7002DW Good-Ark Semiconductor
SW801 1 ~ 228HDSARBFR NaN
R606,R607 2 33 ERJ-2RKF33R0X Panasonic Electronic Components
R508 1 5k3 RT0402DRE075K3L Panasonic Electronic Components
C208,C209,C213,C214,C501,C503,C504,C506,C509,C510,C511,C802 12 22uF GRM21BR61E226ME44K NaN
C207,C220,C508,C512,C515,C607,C608,C611,C705 9 4.7uF GRM188R6YA475KE15D Murata Electronics
P101,P102 2 USB4105GFA120 USB4105-GF-A-120 NaN
R805,R810,R813,R814,R815,R816 6 1k ERJ-2RKF1001X NaN
R509 1 30k1 ERA-2AEB3012X Panasonic Electronic Components
BZ801 1 Buzzer OWMB-505020S-40-12D Ole Wolff Electronics Inc
Binary file added tests/expected/removed.xlsx
Binary file not shown.
Loading
Loading