Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions src/concall_tools.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import fitz
import enchant

from speakers.extraction import Speaker

from speakers.extraction import (
Speaker,
Speaker_with_is_management
)
from speakers.extraction import (
get_speakers_from_text as _get_speakers_from_text,
get_speakers_in_bold as _get_speakers_in_bold,
get_speakers_capitals as _get_speakers_capitals,
get_lines as _get_lines,
remove_last_char_if_colon as _remove_last_char_if_colon
)
from speakers.extraction import get_speakers_in_bold as _get_speakers_in_bold



def get_speakers(pdf_name, algorithm="auto"):
Expand All @@ -20,13 +29,58 @@ def get_speakers(pdf_name, algorithm="auto"):
- plain
- auto
"""
if algorithm not in ["bold", "plain", "auto"]:
raise ValueError("algorithm must be one of 'bold', 'plain', 'auto'")

if algorithm not in ["bold", "plain", "auto", "Capitals"]:
raise ValueError("algorithm must be one of 'bold', 'plain', 'auto' ")
doc = fitz.open(pdf_name)
speakers = []
speakers_common = []
names_from_plain_bold = []
if algorithm == "auto" or algorithm == "bold":
speakers = _get_speakers_in_bold(doc)
if not speakers and (algorithm == "auto" or algorithm == "plain"):
speakers = _get_speakers_from_text(doc)
return speakers
if algorithm in ["bold",'plain']:
return speakers
else:
speakers_in_capital = _get_speakers_capitals(pdf_name)
for name,firm in speakers:
names_from_plain_bold.append(name)
for speaker in speakers_in_capital:
if speaker.name in names_from_plain_bold:
speakers_common.append(speaker)
else:
if speaker.firm!=None:
speakers_common.append(speaker)
return speakers_common

def get_conversations(pdf_name):
speakers_common=get_speakers(pdf_name)
names=[]
conversation=[]
conversation_with_speaker=[]
order_of_speakers=[]
lines=[]
s=''
lines=_get_lines(pdf_name)
for speaker in speakers_common:
names.append(speaker[0])
flag=0
for line in lines:
line=_remove_last_char_if_colon(line)
if line in names:
order_of_speakers.append(line)
if line not in names:
s = s + line + ' '
else:
if flag == 1:
conversation.append(s)
flag=1
s=''
if s!='':
conversation.append(s)
for i in range(len(order_of_speakers)):
for speaker in speakers_common:
if speaker[0]==order_of_speakers[i]:
conversation_with_speaker.append((speaker,conversation[i]))
return conversation_with_speaker

Loading