-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path5-makeVideo.py
More file actions
1136 lines (1016 loc) · 56.1 KB
/
5-makeVideo.py
File metadata and controls
1136 lines (1016 loc) · 56.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import datetime, os, sys, argparse, glob, re, json, itertools
pathToUtils = "lecture-daemon_data"
###append the path to basic data files
sys.path.append(pathToUtils)
import fileUtils
import pandas as pd
import srt #python3 -m pip install srt
from moviepy.editor import VideoFileClip #python3 -m pip install moviepy
from moviepy.editor import *
from moviepy.audio.AudioClip import AudioArrayClip
from PIL import Image
from pydub import AudioSegment #python3 -m pip install pydub
import audiosegment as audiosegwrap #python3 -m pip install audiosegment
import ffmpeg #python3 -m pip install ffmpeg-python
import numpy as np
#brew install rubberband
import pyrubberband as pyrb #python3 -m pip install pyrubberband
theLeaderImage = "lecture-daemon_data/leaderImage.png"
videoSuffixList = ['.mp4', '.m4v', '.mov']
audioSuffixList = ['.aiff', '.mp3', '.wav', ".m4a"]
imageSuffixList = ['.png','.jpg','.jpeg','.gif']
theSlateDuration = 3
outroDuration = 5
def booleanCheck(theVariable):
#assume theVariable is a string
if theVariable.lower()=="true":
return True
else:
return False
def pathExistsMake(thePath, makeBool=False):
###does the folder containing audio files exist?
theFeedback=" Path '%s' found: %s"
if os.path.exists(thePath)==False:
print(theFeedback % (thePath, "FALSE"))
if makeBool==False:
print(" Exiting.")
sys.exit()
else:
#Make the dir if it doesn't exist
print(" Creating dir '%s'" % (thePath))
os.mkdir(thePath)
return os.path.abspath(thePath)
else:
print(theFeedback % (thePath, "TRUE"))
return os.path.abspath(thePath)
def indexListMatchingElement(theList, theElement):
checkedList = []
i=0
for anELement in theList:
if str(anELement) != theElement:
checkedList.append(i)
i=i+1
return checkedList
def pydub_to_moviepy(theAudio, frameRate):
if theAudio.channels < 2:
#needs to be stereo for the conversion to a numpy arry to be OK
#for then being used by moviepy
print(" Changing number of audio channels to two(2)...")
theAudio = audiosegwrap.from_mono_audiosegments(theAudio, theAudio)
else:
test = audiosegwrap.empty()
theAudio = test+theAudio
print(" Converting audio to numpy array...")
theAudio = theAudio.to_numpy_array() #pydub convert to a numpy array
#convert pydup numpy array to what moviepy wants
#this is dirt and spit
print(" 32bit numpy array...")
theAudio = theAudio*3.05175781e-05
theAudio = AudioArrayClip(theAudio, fps=frameRate) #moviepy read in numpy array
theAudio.end = theAudio.duration
return theAudio
# def parse_ass_positioning(theText):
# #look of '{\an#}'' where # is between 1 &9, inclusive
# #a failure will have a single element in the list (just text)
# #a success will return [#,string]
# #where # is the ASS keypad position on screen
# #in the event no # is found, return a default of 2
# theMatchedSplit = re.split('{an([1-9])}(.*)', theText)
# if len(theMatchedSplit) < 2:
# theMatchedSplit.insert(0,'')
# theMatchedSplit.insert(1,2)
# return theMatchedSplit
# def ass_position_to_imageMagick_position(theNumber):
# #http://docs.aegisub.org/3.2/ASS_Tags/
# #https://imagemagick.org/script/command-line-options.php
# #ASS number tags map like keypad #s
# #the gravity cardinal values in imagemagick seem e-w inverted
# #why?
# if theNumber == 1: return "SouthWest"
# elif theNumber == 2: return "South"
# elif theNumber == 3: return "SouthEast"
# elif theNumber == 4: return "West"
# elif theNumber == 5: return "Center"
# elif theNumber == 6: return "East"
# elif theNumber == 7: return "NorthWest"
# elif theNumber == 8: return "North"
# elif theNumber == 9: return "NorthEast"
# else: return "Center"
def resize(t,holdTime,theStartSizeW,theEndSizeW):
#calculate the size at time t
holdTime=float(holdTime)
if t<holdTime:
size = 1.0
else:
size = theEndSizeW
return size
def move(t,holdTime,startX,endX,startY,endY,theSlateSize,theClipSize):
holdTime=float(holdTime)
theClipW = theClipSize[0]
theClipH = theClipSize[1]
theFrameWCntr = theSlateSize[0]/2
theFrameHCntr = theSlateSize[1]/2
theClpWCntr = theClipSize[0]/2
theClpHCntr = theClipSize[1]/2
#parse if text location is used
if startX == 'left':
startX = 0
if startX == 'center':
startX = theFrameWCntr-theClpWCntr
if startX == 'right':
startX = theSlateSize[0]-theClipW
if startY == 'top':
startY = 0
if startY == 'center':
startY = theFrameHCntr-theClpHCntr
if startY == 'bottom':
startY = theSlateSize[1]-theClipH
if endX == 'left':
endX = 0
if endX == 'center':
endX = theFrameWCntr-theClpWCntr
if endX == 'right':
endX = theSlateSize[0]-theClipW
if endY == 'top':
endY = 0
if endY == 'center':
endY = theFrameHCntr-theClpHCntr
if endY == 'bottom':
endY = theSlateSize[1]-theClipH
#endY = theSlateSize[1]
#calculate the X at time t
if t<holdTime:
x = startX
y = startY
else:
x = endX
y = endY
return (x,y)
def calcPos(t,holdTime,startX,endX,startY,endY,theSlateSize,theClipSize):
holdTime=float(holdTime)
theClipW = theClipSize[0]
theClipH = theClipSize[1]
theFrameWCntr = theSlateSize[0]/2
theFrameHCntr = theSlateSize[1]/2
theClpWCntr = theClipSize[0]/2
theClpHCntr = theClipSize[1]/2
#parse if text location is used
if startX == 'left':
startX = 0
if startX == 'center':
startX = theFrameWCntr-theClpWCntr
if startX == 'right':
startX = theSlateSize[0]-theClipW
if startY == 'top':
startY = 0
if startY == 'center':
startY = theFrameHCntr-theClpHCntr
if startY == 'bottom':
startY = theSlateSize[1]-theClipH
if endX == 'left':
endX = 0
if endX == 'center':
endX = theFrameWCntr-theClpWCntr
if endX == 'right':
endX = theSlateSize[0]-theClipW
if endY == 'top':
endY = 0
if endY == 'center':
endY = theFrameHCntr-theClpHCntr
if endY == 'bottom':
endY = theSlateSize[1]-theClipH
#endY = theSlateSize[1]
#this slop method sucks. STH 2020-0803
#slope of line to get from start to end
rise = endY-startY
run = endX-startX
slope = rise/run
b = startY-slope
#calculate the X at time t
if endX<startX:
theClipW=0-theClipW
if t<holdTime:
x = startX
y = startY
else:
if endX<startX:
x = max(endX,startX+theClipW*(t-holdTime))
else:
x = min(endX,startX+theClipW*(t-holdTime))
#use the calculated x to find y @ time t using slope
y = (slope*x)+b
return (x,y)
# def audio_duck(sound, position, duration, gain=-15.0, fade_duration=500):
# #stackoverflow.com/questions/33880261/bad-quality-after-multiple-fade-effect-with-pydub
# #retrieved 2020-0608
# """
# sound - an AudioSegment object
# position - how many milliseconds into the sound the duck should
# begin (this is where overlaid audio could begin, the fade down
# will happen before this point)
# duration - how long should the sound stay quiet (milliseconds)
# gain - how much quieter should the sound get (in dB)
# fade_duration - how long sound the fades last (in milliseconds)
# """
# # this part is from the beginning until the end of the ducked section
# print("duck duck goose")
# print(position)
# print(duration)
# print(gain)
# print(fade_duration)
# the_prefix = sound[0:position]
# the_ducked = sound[position:position+duration]
# the_suffix = sound[position+duration:]
# print(len(the_prefix))
# print(len(the_ducked))
# print(len(the_suffix))
# sys.exit()
# # first_part = sound[:position+duration]
# # first_part = first_part.fade(to_gain=gain, end=position, duration=fade_duration)
# # # this part begins where the fade_up happens (will just fade in)
# # second_part = sound[position+duration:]
# # second_part = second_part.fade(from_gain=gain, start=0, duration=fade_duration)
# # return first_part + second_part
def readStartStopTimes(theAlignmentFile, metaDataList):
#this will fail if the last entry has to "end" data
#might need to look at the mp3 and get the time length
theLastTimePoint = theAlignmentFile['stop'].iat[-1] #this should be the last recorded time in the alignment file
metaDataList = list(theAlignmentFile['meta'])
##########################################################################################
#look at the alignment file and see if the meta data defines the start and stop points
if 'start' in metaDataList:
startDataList = list(theAlignmentFile['start'])
i = metaDataList.index('start')
lectureStartTime=startDataList[i]
print(" Lecture start meta data found: %s" % lectureStartTime)
else:
lectureStartTime = 0.0 #seconds
print(" No explicit start. Start set to: %s" % lectureStartTime)
if 'stop' in metaDataList:
stopDataList = list(theAlignmentFile['stop'])
i = metaDataList.index('stop')
lectureStopTime=stopDataList[i]
print(" Lecture stop meta data found: %s" % lectureStopTime)
else:
lectureStopTime=theLastTimePoint
print(" No explicit stop. Stop set to: %s" % lectureStopTime)
##########################################################################################
return [lectureStartTime, lectureStopTime]
def makeSRTVFile(theLectureName, theSRTVIndexList, theSlideList, theSlideDir, metaDataList, startDataList, stopDataList, lectureStartTime, lectureStopTime, theSRTVDir, theSRTDir):
theSlideDir = os.path.join(theSlideDir,theLectureName)
theSRTVList = []
theSRTList = []
#I should have made things a form of json form the start
# :( STH 2020-0805
###some things will just be in the meta column and not have a corresponding
###item in the slide column
jsonifier = '{%s}'
for i in range(len(metaDataList)):
theStr = metaDataList[i]
jsonText = '{%s}' % theStr
try:
jsonDict = json.loads(jsonText)
if 'cut' in jsonDict.keys():
if 'duration' in jsonDict['cut'].keys():
theDuration = float(jsonDict['cut']['duration'])
theStart = float(startDataList[i])
theStop = theStart+theDuration
theIndex = len(theSRTVList)+1
print(" Cut at %s --> %s" %(datetime.timedelta(seconds=theStart+float(lectureStartTime)), datetime.timedelta(seconds=theStop+float(lectureStartTime))))
theStart = datetime.timedelta(seconds=theStart) #convert to datetime
theStop = datetime.timedelta(seconds=theStop) #convert to datetime
theSRTVList.append(srt.Subtitle(theIndex,theStart,theStop,"na",str(jsonDict)))
except:
continue
allMediaSuffixList = imageSuffixList + videoSuffixList + audioSuffixList
for i in range(len(theSRTVIndexList)):
theSlide = theSlideList[theSRTVIndexList[i]]
theFileSuffix = os.path.splitext(theSlide)[1]
if (theFileSuffix in allMediaSuffixList):
theSlide = os.path.join(theSlideDir,theSlide)
theSlide = os.path.relpath(theSlide)
print(" %s" % (theSlide))
theMeta = str(metaDataList[theSRTVIndexList[i]])
theStart = float(startDataList[theSRTVIndexList[i]])
theStop = ""
theMetaList= [x.strip() for x in theMeta.split(';')]
#print(theFileSuffix)
if i == len(theSRTVIndexList)-1:
#this just pads the ends so last slide stays up to the end
lectureStopTime = float(lectureStopTime)
theStop = float(lectureStopTime+(outroDuration))
#print(theStop)
#sys.exit()
elif theFileSuffix in videoSuffixList:
#read in the video clip
theVideoClip = VideoFileClip(theSlide)
#start parsing the meta data
for aMetaArg in theMetaList:
###########################
#split the arg and values
theMetaArgList = [x.strip() for x in aMetaArg.split(':')]
theArg = theMetaArgList[0]
if len(theMetaArgList)>1: theValue = theMetaArgList[1]
###########################
if theArg == "loop":
#default the looping to stop when the next slide starts
theStop = float(startDataList[theSRTVIndexList[i+1]])
if theArg == "duration":
#if an explicit duration is provided, use that
theStop = float(theStart+float(theValue))
elif theStop == "":
#if no duration is provided, and if it's not looped, the duration should be the video length
theDuration = theVideoClip.duration #in seconds
theStop = float(theStart+theDuration)
#startDataList[theSRTVIndexList[i+1]]=theStop
elif theFileSuffix in audioSuffixList:
#this section is for audio replace
theStop = float(stopDataList[theSRTVIndexList[i]])
elif theFileSuffix == "" or (os.path.exists(os.path.join(theSlideDir,theSlideList[theSRTVIndexList[i]])) == False):
#this section is for text overlays
#need to have it written to a unique srt file
#lectureStartTime = datetime.timedelta(seconds=lectureStartTime) #convert to datetime
theStart = float(startDataList[theSRTVIndexList[i]])-float(lectureStartTime)
theStop = float(stopDataList[theSRTVIndexList[i]])-float(lectureStartTime)
#convert theMeta to a list
#theMetaList =theMeta.split(';')
for aMetaArg in theMetaList:
###########################
#split the arg and values
theMetaArgList = [x.strip() for x in aMetaArg.split(':')]
theArg = theMetaArgList[0]
if len(theMetaArgList)>1: theValue = theMetaArgList[1]
###########################
# for j in theMetaList:
# theSublist = j.split(":")
#if there is a defined duration, use that as theStop
if theArg == "duration":
theTextDuration = float(theValue)
theStop = (float(startDataList[theSRTVIndexList[i]])+theTextDuration)-float(lectureStartTime)
#print(theStart)
#print(theStop)
print(" %s --> %s\n" %(datetime.timedelta(seconds=theStart+float(lectureStartTime)), datetime.timedelta(seconds=theStop+float(lectureStartTime))))
if theMeta == "nan": theMeta = ""
theStart = datetime.timedelta(seconds=theStart+theSlateDuration) #convert to datetime
theStop = datetime.timedelta(seconds=theStop+theSlateDuration) #convert to datetime
theSRTList.append(srt.Subtitle(i,theStart,theStop,theSlide,theMeta))
continue #move to the next item in theSRTVIndexList
else:
#this is sort of complicated
#a slide image should end when the next slide OR video starts
#it should not end when an audio clip begins
#it should not end when a text overlay beings
#so if the following entries are an audio clip OR a text overlay, ignore it
####NEW
#start parsing the meta data
for aMetaArg in theMetaList:
###########################
#split the arg and values
theMetaArgList = [x.strip() for x in aMetaArg.split(':')]
theArg = theMetaArgList[0]
if len(theMetaArgList)>1: theValue = theMetaArgList[1]
###########################
if theArg == "duration":
#if an explicit duration is provided, use that
theStop = float(theStart+float(theValue))
############
if theStop=="":
while (os.path.splitext(theSlideList[theSRTVIndexList[i+1]])[1] in audioSuffixList) or (
os.path.exists(os.path.relpath(os.path.join(theSlideDir,theSlideList[theSRTVIndexList[i+1]]))) == False):
#print(theSlideDir)
#print(theSlideList[theSRTVIndexList[i+1]])
#print(os.path.relpath(os.path.join(theSlideDir,theSlideList[theSRTVIndexList[i+1]])))
#print(os.path.isfile(os.path.relpath(os.path.join(theSlideDir,theSlideList[theSRTVIndexList[i+1]]))))
i=i+1
print(" ERROR: Slide may not exist")
theStop = float(startDataList[theSRTVIndexList[i+1]])
#print(theStop)
#print(datetime.timedelta(seconds=theStop))
#print(i)
theStart = datetime.timedelta(seconds=theStart) #convert to datetime
theStop = datetime.timedelta(seconds=theStop) #convert to datetime
print(" %s --> %s\n" %(theStart, theStop))
if theMeta == "nan": theMeta = ""
theIndex = len(theSRTVList)+1
theSRTVList.append(srt.Subtitle(theIndex,theStart,theStop,theSlide,theMeta))
###save an actual srt file
if theSRTList != []:
theFileName = theLectureName+".srt"
theFileName = os.path.join(theSRTDir, theFileName)
with open(theFileName, "w") as theFile:
theFile.write(srt.compose(theSRTList))
print(" SRT file generated\n")
###save the data as a modified version of a srt file
theFileName = theLectureName+".srtv"
theFileName = os.path.join(theSRTVDir, theFileName)
with open(theFileName, "w") as theFile:
theFile.write(srt.compose(theSRTVList))
print(" Hacky SRT-video file generated\n")
#need to reindex subtitles based on start time
#there must be a better way, but this should work
theSRTVList = srt.compose(theSRTVList)
theSRTVList = list(srt.parse(theSRTVList))
return theSRTVList
##########################################################################################
def processAudio(theSRTVList, theAudioDir, theLectureName, lectureStartTime, lectureStopTime):
audioFilePath = os.path.join(theAudioDir, theLectureName+".mp3")
theLectureAudio = "" #just a place holder that gives a logic check later
theAudioClip = "" #just a place holder that gives a logic check later
madeAnAudioEdit = False
#I should have made things a form of json form the start
# :( STH 2020-0805
#for anEntry in theSRTVList:
for i in theSRTVList:
thePosition = ((i.start).total_seconds())*1000.0 #convert to millisec
theMetaList = [x.strip() for x in i.proprietary.split(';')]
theGain = 0 #no ducking
#theSlide = anEntry.content
theContent= i.content
theFileSuffix = os.path.splitext(theContent)[1]
if theFileSuffix in audioSuffixList:
#################################
#only load the lecture audio once
if theLectureAudio == "":
print(" Loading lecture audio file %s" % audioFilePath)
theLectureAudio = AudioSegment.from_file(audioFilePath)
#################################
#only load the clip audio once
#if theAudioClip == "":
print(" Loading audio clip %s" % theContent)
theAudioClip = AudioSegment.from_file(theContent)
#################################
theDuration = theAudioClip.duration_seconds
for aMetaArg in theMetaList:
###########################
#split the arg and values
theMetaArgList = [x.strip() for x in aMetaArg.split(':')]
theArg = theMetaArgList[0]
theValue=""
#print(theArg)
if len(theMetaArgList)>1: theValue = theMetaArgList[1]
###########################
if theArg == "duration":
theNewDuration = float(theValue) #this assumes the duration is less than the clip length.
#print(" Setting clip duration to %s" % (theDuration))
#theAudioClip = theAudioClip[:theDuration*1000]
#################################
#experiment with altering the duration of sound clips
#2021-0513 STH
theAudioClipArray = np.array(theAudioClip.get_array_of_samples())
sampleRate = theAudioClip.frame_rate
tempoRatio=theDuration/theNewDuration
#print(tempoRatio)
if theNewDuration!=theDuration:
print(" Adjusting duration of clip to be %s seconds" % theNewDuration)
theAudioClipArray_fast = pyrb.time_stretch(theAudioClipArray, sampleRate, tempoRatio)
theAudioClipArray = np.int16(theAudioClipArray_fast * 2 ** 15)
channels = 2 if (theAudioClipArray_fast.ndim == 2 and theAudioClipArray_fast.shape[1] == 2) else 1
theAudioClip = AudioSegment(theAudioClipArray.tobytes(), frame_rate=sampleRate, sample_width=2, channels=channels)
#sys.exit()
###########################
if theArg == "replace":
print(" Replacing audio at time %s seconds" % (thePosition/1000))
print(" Ducking audio file")
theGain = -100 #duck the audio if the meta data says replace
###########################
if theArg == "overlay":
print(" Overlaying clip onto main audio")
################
print(" Combining audio...")
theLectureAudio = theLectureAudio.overlay(theAudioClip, position=(thePosition), gain_during_overlay=theGain)
madeAnAudioEdit = True
##########################################################################################
#audio ducking, especially for use with video insertion
for aMetaArg in theMetaList:
###########################
#split the arg and values
theMetaArgList = [x.strip() for x in aMetaArg.split(':')]
theArg = theMetaArgList[0]
theValue = ""
if len(theMetaArgList)>1: theValue = theMetaArgList[1]
###########################
if theArg == "lecture":
###This allows for multiple sub metas after the :
theSubValue = [x.strip() for x in theValue.split(',')]
if theSubValue[0] == "mute":
thePosition = ((i.start).total_seconds())*1000.0 #convert to millisec
theStop = ((i.end).total_seconds())*1000.0 #convert to millisec
theDuration = theStop-thePosition
if len(theSubValue)>1:
#theStop = float(theSubValue[1])*1000.0 #convert to millisec
#theDuration = theStop-thePosition
theDuration = float(theSubValue[1])*1000.0 #convert to millisec
#duck the lecture if indicated
if madeAnAudioEdit == False: print(" Audio edits detected...")
if theLectureAudio == "":
print(" Loading audio file %s" % audioFilePath)
theLectureAudio = AudioSegment.from_file(audioFilePath)
theGain = -100 #duck the audio if the meta data says to mute the lecture audio
print(" Ducking audio at time %s seconds" % (thePosition/1000))
print(" Duration will be %s seconds" % (theDuration/1000))
#make a silent audio clip
theSilence= AudioSegment.silent(duration=theDuration)
theLectureAudio = theLectureAudio.overlay(theSilence, position=(thePosition), gain_during_overlay=theGain)
#the following might be a saner way to do this:
#theLectureAudio = theLectureAudio.fade(to_gain=theGain, start=thePosition, end=theStop)
madeAnAudioEdit = True
##########################################################################################
# ##########################################################################################
# #insert pad (silent) audio if needed
# #will probably need to provide a way to insert into existing audio
# #but for simplicity right now, just assume that pad gets put at the start
# #STH 2020-0621
# if 'lecture:insert' in theMetaList:
# if madeAnAudioEdit == False: print(" 507 Audio edits detected...")
# if theLectureAudio == "":
# print(" Loading audio file %s" % audioFilePath)
# theLectureAudio = AudioSegment.from_file(audioFilePath)
# theDuration = 5.0 #just give it a default
# for j in theMetaList:
# theSublist = [x.strip() for x in j.split(':')]
# if theSublist[0].strip() == "duration":
# theDuration = float(theSublist[1]) #if there is a defined duration, use that
# print(" Inserting pad silence at start. %s seconds" % (theDuration))
# theLectureAudio = AudioSegment.silent(duration=theDuration*1000)+theLectureAudio #x1000 to convert sec to millisec
# ##########################################################################################
# #This section does things like insert the correct word if you screwed up in lecture
# #The cannonical example is saying "latitude" when you(I) mean "longitude"
# if theFileSuffix in audioSuffixList:
# if madeAnAudioEdit == False: print(" 523 Audio edits detected...")
# #################################
# #only load the lecture audio once
# if theLectureAudio == "":
# print(" Loading audio file %s" % audioFilePath)
# theLectureAudio = AudioSegment.from_file(audioFilePath)
# #################################
# #################################
# #only load the clip audio once
# if theAudioClip == "":
# print(" Loading audio clip %s" % theSlide)
# theAudioClip = AudioSegment.from_file(theSlide)
# #################################
# print("!!!!!!!")
# print(theArg)
# #thePosition = ((anEntry.start).total_seconds()+allPadDuration)*1000.0 #convert to millisec
# thePosition = ((anEntry.start).total_seconds())*1000.0 #convert to millisec
# #theStop = ((anEntry.end).total_seconds())*1000.0 #convert to millisec
# if theArg == 'replace':
# #theAudioClip = AudioSegment.from_file(audioFilePath)
# print(" Ducking audio file")
# theGain = -100 #duck the audio if the meta data says replace
# print(" Replacing audio at time %s seconds" % (thePosition/1000))
# if theArg == 'duration':
# print("do duration stuff")
# ################
# #experiment in duration
# print("trimming clip to 10 seconds")
# print(theAudioClip.duration_seconds)
# theAudioClip = theAudioClip[:10000]
# ################
# theLectureAudio = theLectureAudio.overlay(theAudioClip, position=(thePosition), gain_during_overlay=theGain)
# madeAnAudioEdit = True
##########################################################################################
#trim the audio to fit lectureStartTime & lectureStopTime
#multiply lectureStartTime & lectureStopTime by 1000 because pydup does things in milliseconds
#This is important: this trim action is done at the end because the srtv file records the timing
#of all events from start of audio, not from start point.
#if you duck audio, the time is from start of audio, not from designated start
#if you trim first the time of the duck is thrown off.
#This can be fixed
theLastTimePoint = theSRTVList[-1].end
if (lectureStartTime != 0.0) or (lectureStopTime != theLastTimePoint):
if madeAnAudioEdit == False: print(" Audio edits detected...")
if theLectureAudio == "":
print(" Loading audio file %s" % audioFilePath)
theLectureAudio = AudioSegment.from_file(audioFilePath)
print(" Editing audio length")
lectureStopTime = float(lectureStopTime)
#############
#weird bug where _sometimes_ the start time is seen as a string
# print(type(lectureStartTime))
# print(type(lectureStopTime))
lectureStartTime = float(lectureStartTime)
#############
theLectureAudio = theLectureAudio[int(lectureStartTime*1000.0):int(lectureStopTime*1000.0)]
madeAnAudioEdit = True
##########################################################################################
if madeAnAudioEdit==True:
print(" Saving edited audio as a new copy...")
(thePath, theName) = os.path.split(audioFilePath)
theName="EDITED - "+theName
audioFilePath = os.path.join(thePath,theName)
theLectureAudio.export(audioFilePath, format="mp3")
return audioFilePath
def makeVideoFromSRTVList(theSRTVList, theSlideDir, audioFilePath, theLectureName, theCandidateVideoDir):
theSlideList = []
theCompositList = []
theTextClipList = []
cutList = []
#####insert the slate at the start
theSlideDir = os.path.join(theSlideDir,theLectureName)
theSlate = os.path.join(theSlideDir,"Slide0.png")
theCandidateVideoPath = os.path.join(theCandidateVideoDir,theLectureName+".mp4")
if os.path.exists(theSlate):
theSlateSize = Image.open(theSlate).size #This will be used to resize videos later
################################################################################
#load in the lecture audio file
#need to load this in first in case you need to duck lecture audio
print(" Loading audio file %s" % audioFilePath)
theLectureAudio = AudioSegment.from_file(audioFilePath)
theAudioFrameRate = theLectureAudio.frame_rate #needed later to convert numpy array
################################################################################
###There can be a situation where there are no slides immediately after the slate
###This inserts a black filler slide
###Do this once to make a leader if needed
if ((theSRTVList[0].start).total_seconds() != 0.0) and ("start" not in theSRTVList[0].proprietary):
#theDuration = theSRTVList[0].start.total_seconds()-theSlateDuration #subtract out the duration of the start slate
theDuration = theSRTVList[0].start.total_seconds() #no need to subtract the slate duration if slate gets inserted at the end of the process
theSlideList.append(ImageClip(theLeaderImage).set_duration(theDuration))
for i in theSRTVList:
overlayClip = False
theMetaList = [x.strip() for x in i.proprietary.split(';')]
if "start" in theMetaList:
theStartOffset = (i.start).total_seconds()
if "stop" in theMetaList:break
theContent= i.content
theDuration = (i.end-i.start).total_seconds()
theFileSuffix = os.path.splitext(theContent)[1]
if theFileSuffix in videoSuffixList:
print(" Generating video clip...")
theVideoClip = VideoFileClip(theContent)
# resize (keep aspect ratio)
#theVideoClip = theVideoClip.fx(vfx.resize, width=theSlateSize[0]*0.8)
if theVideoClip.w>theSlateSize[0]:
print(" Video is too wide. Resizing...")
theVideoClip = theVideoClip.fx(vfx.resize, width=theSlateSize[0])
if theVideoClip.h>theSlateSize[1]:
print(" Video is too high. Resizing...")
#theVideoClip = theVideoClip.fx(vfx.resize, height=theSlateSize[1]*0.8)
theVideoClip = theVideoClip.fx(vfx.resize, height=theSlateSize[1])
for aMetaArg in theMetaList:
###########################
#split the arg and values
theMetaArgList = [x.strip() for x in aMetaArg.split(':')]
theArg = theMetaArgList[0]
if len(theMetaArgList)>1: theValue = theMetaArgList[1]
###########################
# print(" Generating video clip...")
# theVideoClip = VideoFileClip(theContent)
# # resize (keep aspect ratio)
# #theVideoClip = theVideoClip.fx(vfx.resize, width=theSlateSize[0]*0.8)
# if theVideoClip.w>theSlateSize[0]:
# print(" Video is too wide. Resizing...")
# theVideoClip = theVideoClip.fx(vfx.resize, width=theSlateSize[0])
# if theVideoClip.h>theSlateSize[1]:
# print(" Video is too high. Resizing...")
# #theVideoClip = theVideoClip.fx(vfx.resize, height=theSlateSize[1]*0.8)
# theVideoClip = theVideoClip.fx(vfx.resize, height=theSlateSize[1])
###########################
if theArg == "overlay" in theMetaList:
#default location placement should be slide center
#This should probably be abstracted out to a function.
#it's going to be used in multiple locations probably
SWC = int(theSlateSize[0]/2) #SlideWidthCenter (SWC)
SHC = int(theSlateSize[1]/2) #SlideHeightCenter (SHC)
CWC = int(theVideoClip.w/2) #ClipWidthCenter (CWC)
CHC = int(theVideoClip.h/2) #ClipHeightCenter (CHC)
#theVideoClip=theVideoClip.set_position((SWC-CWC, SHC-CHC))
theVideoClip=theVideoClip.set_position((SWC-CWC, SHC-CHC))
overlayClip = True
if theArg == "video":
if theValue == "mute":
#theVideoClip = VideoFileClip(theContent, audio=False)
theVideoClip = theVideoClip.without_audio()
#else:
#theVideoClip = VideoFileClip(theContent, audio=True)
if theArg == "loop":
theVideoClip = theVideoClip.fx(vfx.loop, duration=theDuration)
if theArg == "duration":
theDuration = float(theValue)
if theDuration > theVideoClip.duration:
#the specified duration is longer than the video. Loop it
theVideoClip = theVideoClip.fx(vfx.loop, duration=theDuration)
if theDuration < theVideoClip.duration:
#the specified duration is less than the video. Trim it
theVideoClip = theVideoClip.set_end(theDuration)
if theArg == "resize":
theParams = [x.strip() for x in theValue.split(',')]
holdTime = float(theParams[0])
theSizeRatio = float(theParams[1])
###
theStartSizeW = theVideoClip.w
theEndSizeW = theStartSizeW*theSizeRatio
theEndSizeW = theSizeRatio
orgX = theVideoClip.pos(0)[0]
orgY = theVideoClip.pos(0)[1]
###
theVideoClip = theVideoClip.resize(lambda t : (resize(t,holdTime,theStartSizeW,theEndSizeW)))
#after you resize, make sure th xy location remains the same
#theVideoClip = theVideoClip.set_position((orgX,orgY))
if theArg == "move":
theMoveParams = [x.strip() for x in theValue.split(',')]
holdTime = theMoveParams[0]
endX = theMoveParams[1]
endY = theMoveParams[2]
###
startX = theVideoClip.pos(0)[0] #what is the X position of the clip at time 0
startY = theVideoClip.pos(0)[1] #what is the Y position of the clip at time 0
theClipSize = theVideoClip.size
theVideoClip = theVideoClip.set_position(lambda t:(move(t,holdTime,startX,endX,startY,endY,theSlateSize,theClipSize)))
if theArg == "animove":
#does not work great
#STH 2020-0803
theMoveParams = [x.strip() for x in theValue.split(',')]
holdTime = theMoveParams[0]
endX = theMoveParams[1]
endY = theMoveParams[2]
startX = theVideoClip.pos(0)[0] #what is the X position of the clip at time 0
startY = theVideoClip.pos(0)[1] #what is the Y position of the clip at time 0
theClipSize = theVideoClip.size
theVideoClip = theVideoClip.set_position(lambda t:(calcPos(t,holdTime,startX,endX,startY,endY,theSlateSize,theClipSize)))
if theArg == "aniresize":
#does not work great
#STH 2020-0803
#this needs to be fixed to make it more like move(allowing for hold time)
theSize = float(theValue)
theVideoClip = theVideoClip.resize(lambda t : 1-0.02*t)
###########################
if overlayClip == True:
theVideoClip=theVideoClip.set_start(i.start.total_seconds()-theStartOffset+theSlateDuration)
# #default location placement should be slide center
# #This should probably be abstracted out to a function.
# #it's going to be used in multiple locations probably
# SWC = int(theSlateSize[0]/2) #SlideWidthCenter (SWC)
# SHC = int(theSlateSize[1]/2) #SlideHeightCenter (SHC)
# CWC = int(theVideoClip.w/2) #ClipWidthCenter (CWC)
# CHC = int(theVideoClip.h/2) #ClipHeightCenter (CHC)
# #theVideoClip=theVideoClip.set_position((SWC-CWC, SHC-CHC))
# theVideoClip=theVideoClip.set_position((SWC-CWC, SHC-CHC))
theCompositList.append(theVideoClip)
else:
theSlideList.append(theVideoClip)
########################################################
elif theFileSuffix in audioSuffixList:
print(" Generating audio clip...")
if "replace" in theMetaList:
bla=1
elif theFileSuffix in imageSuffixList:
print(" Generating image clip...")
aSlide = ImageClip(theContent).set_duration(theDuration)
################################################
###Experimenting with audio over the start slate
###STH 2021-1010
###See also code @ ~895
# if i.index==1:
# print(" Clip is start slide. Shortening for slate")
# aSlide = ImageClip(theContent).set_duration(theDuration-theSlateDuration)
# else:
# aSlide = ImageClip(theContent).set_duration(theDuration)
###Turned off because it was messing up subtitles
###STH 2021-1201
################################################
if aSlide.w>theSlateSize[0]:
print(" Image is too wide. Resizing...")
#aSlide = aSlide.fx(vfx.resize, width=theSlateSize[0]*0.9)
aSlide = aSlide.fx(vfx.resize, width=theSlateSize[0])
if aSlide.h>theSlateSize[1]:
print(" Image is too high. Resizing...")
#aSlide = aSlide.fx(vfx.resize, width=theSlateSize[1]*0.9)
aSlide = aSlide.fx(vfx.resize, height=theSlateSize[1])
theSlideList.append(aSlide)
else:
print(" Not video, image, or audio...")
for aMetaArg in theMetaList:
#print("************************")
#print("the start offset is %s" % theStartOffset)
try:
aMetaArg = aMetaArg.replace("'", '"')
jsonDict = json.loads(aMetaArg)
if 'cut' in jsonDict.keys():
if 'duration' in jsonDict['cut'].keys():
#cutting is done after removing any unwanted lead in
#and after adding the slate
#so remove the startOffset and add the slate duration
theStart = (i.start).total_seconds() - theStartOffset
#theStart = (i.start).total_seconds()-theSlateDuration
#theStop = (i.end).total_seconds()
#theStop = (i.end).total_seconds()-theSlateDuration
theStop = (i.end).total_seconds() + theSlateDuration - theStartOffset
cutList.append([theStart,theStop])
#print(cutList)
except:
continue
################################################################################
#insert the slate at the start
theSlateClip = ImageClip(theSlate).set_duration(theSlateDuration)
theSlideList.insert(0,theSlateClip)
################################################################################
#with length editing, the slate should be inserted right at the end, after subclip is made
print(" Concatenating image and video clips...")
catedVideo = concatenate_videoclips(theSlideList, method="compose")
#Set the size back to slate size
#there is a bug related to clip resizing I have not tracked down yet
#STH 0802-2020
catedVideo = catedVideo.fx(vfx.resize, width=theSlateSize[0])
#catedVideo = catedVideo.fx(vfx.resize, (theSlateSize[0],theSlateSize[1]))
###Turned off as part of text-over-start slate changes ~line 835
################################################################################
#insert the initial silence for the opening slate
if theSlateDuration > 0:
print(" Adding slate intro buffer...")
theLectureAudio = AudioSegment.silent(duration=theSlateDuration*1000)+theLectureAudio #x1000 to convert sec to millisec
################################################################################
#convert the pydub audio into something moviepy can use
theLectureAudio = pydub_to_moviepy(theLectureAudio, theAudioFrameRate)
################################################################################
#composit the video audio with the lecture audio
#print(" Combining video and lecture audio...")
#print(vars(video.audio))
#print(vars(theLectureAudio))
################################################################################
if catedVideo.audio != None:
theMoviePyAudio = catedVideo.audio
theCompositeAudio = CompositeAudioClip([theMoviePyAudio, theLectureAudio])
else:
theCompositeAudio = theLectureAudio
################################################################################
finalVideo = catedVideo.set_audio(theCompositeAudio)
if theCompositList!=[]:
#use wisely
#doing this is a slow process.
#theCompositList.append(finalVideo)
theCompositList.insert(0,finalVideo)
#finalVideo = CompositeVideoClip([finalVideo,theCompositList[0].set_start(2)])
finalVideo = CompositeVideoClip(theCompositList)
################################################################################
finalVideo.write_videofile(theCandidateVideoPath, fps=12, audio=True, write_logfile=False, threads=4)
print(cutList)
return cutList
else:
###should be moved up with other initial checks
theFeedback=" Path '%s' found: %s"
print(theFeedback % (theSlate, "FALSE"))
print(" Exiting.")
sys.exit()
if __name__ == '__main__':
timerStartTime = datetime.datetime.now()
parser = argparse.ArgumentParser(description='Who wants some popcorn?')
parser.add_argument('--mksrt', metavar='', dest='mksrt', default=True, required=False, help='make srt & srvt files? Default True' )
parser.add_argument('--editaudio', metavar='', dest='editaudio', default=True, required=False, help='make edits to audio according to alignment file? Default True' )
parser.add_argument('--mkvideo', metavar='', dest='mkvideo', default=True, required=False, help='generate the final video? Default True' )
parser.add_argument('--addsrt', metavar='', dest='addsrt', default=True, required=False, help='add text overlays from srt(if exists)? Default True' )
parser.add_argument('--alignmentDir', metavar='', dest='theAlignmentDir', default='intermediate/lecture_alignments', required=False, help='path to alignment file directory')
parser.add_argument('--srtvDir', metavar='', dest='theSRTVDir', default='intermediate/lecture_srtv', required=False, help='path to SRTV file directory')
parser.add_argument('--srtDir', metavar='', dest='theSRTDir', default='intermediate/lecture_srt', required=False, help='path to SRT file directory')
parser.add_argument('--audioDir', metavar='', dest='theAudioDir', default='intermediate/processed_audio/transcribed', required=False, help='path to folder containing processed mp3s' )
parser.add_argument('--slideDir', metavar='', dest='theSlideDir', default='intermediate/lecture_slides', required=False, help='general path to slide images')
parser.add_argument('--videoOut', metavar='', dest='theCandidateVideoDir', default='output/candidate_video', required=False, help='path to the folder videos will be written to')
args = parser.parse_args()
print(args)
#check and see if boolean things are boolean
if isinstance(args.mksrt, str):
args.mksrt = booleanCheck(args.mksrt)
if isinstance(args.editaudio, str):
args.editaudio = booleanCheck(args.editaudio)
if isinstance(args.mkvideo, str):
args.mkvideo = booleanCheck(args.mkvideo)
if isinstance(args.addsrt, str):
args.addsrt = booleanCheck(args.addsrt)
theSlideDir = fileUtils.pathExistsMake(args.theSlideDir)
theAlignmentDir = fileUtils.pathExistsMake(args.theAlignmentDir)
theAudioDir = fileUtils.pathExistsMake(args.theAudioDir)
theSRTDir = fileUtils.pathExistsMake(args.theSRTDir, True)
theSRTVDir = fileUtils.pathExistsMake(args.theSRTVDir, True)
theCandidateVideoDir = fileUtils.pathExistsMake(args.theCandidateVideoDir, True)
theLeaderImage = fileUtils.pathExistsMake(theLeaderImage)
###Start reading in alignment files from the alignment directory
tempVar = os.path.join(theAlignmentDir, "*.csv")
for theFileName in glob.glob(tempVar):
theLectureName = os.path.basename(theFileName)
print(" File found. Opening '%s'" % theLectureName)
theLectureName = os.path.splitext(theLectureName)[0]
theAlignmentFile = pd.read_csv(theFileName, header=None, names=['word','start','stop','token','slide','meta'], usecols=[1,2,3,4,5,6], encoding = "ISO-8859-1")
startDataList = list(theAlignmentFile['start'])
stopDataList = list(theAlignmentFile['stop'])
theSlideList = list(theAlignmentFile['slide'])
metaDataList = list(theAlignmentFile['meta'])
###Read the alignment file and try to get lecture start/stop data
lectureStartTime, lectureStopTime = readStartStopTimes(theAlignmentFile, metaDataList)