diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..a0418a6
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,5 @@
+OPENAI_API_KEY =
+LANGCHAIN_PROJECT=
+LANGCHAIN_API_KEY=
+LANGCHAIN_TRACING_V2=
+ELEVENLABS_KEY=
\ No newline at end of file
diff --git a/README.md b/README.md
index c9ef56d..5b2233a 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,10 @@
-# Shortrocity
+# short-videos
+generate short videos using unstructured , langchain , cv2 , elevenlabs , OpenAi
+# what is the difference ?
+ 1.this app is using langchain
+ 2. using Ai models to extract the narration and imgaes descriptions instead of heurstics
+ 3. some tests for matching lengths
+ 4. using langsmith for montoring
+ 5. the app accepts a url for any html page instead of a manual way to copy text
+ 5. [future] deploy as a restapi using langserve
-Shortrocity is a tool for making AI generated short videos ("shorts" or "reels") with a ChatGPT generated script, narrated by ElevenLabs or OpenAI text-to-speech. DALL-E 3 generated background images are also added to the background.
-
-## Quick Start
-
-First, add your API-keys to the environment:
-
-```console
-$ export OPENAI_API_KEY=YOUR_OPENAI_API_KEY
-$ export ELEVENLABS_API_KEY=YOUR_ELEVENLABS_API_KEY
-```
-
-Then, put your source content in a file, for example `source.txt` and run the `main.py`:
-
-```console
-$ ./main.py source.txt
-Generating script...
-Generating narration...
-Generating images...
-Generating video...
-DONE! Here's your video: shorts/1701788183/short.avi
-``````
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/audio.py b/app/audio.py
new file mode 100644
index 0000000..685b810
--- /dev/null
+++ b/app/audio.py
@@ -0,0 +1,33 @@
+"""
+create a narration audio out of a text
+"""
+from data_parser import parse_data
+from templates import template_images , template_narrator
+from utils import create_dict_pairs
+from elevenlabs import set_api_key , generate , save
+from load_dotenv import load_dotenv
+load_dotenv()
+import os
+set_api_key(os.getenv("ELEVENLABS_KEY"))
+
+def concatenate_text() :
+ text=""
+ parsed_narrations = parse_data(template_narrator)
+ parsed_images = parse_data(template_images)
+ l = create_dict_pairs(parsed_narrations,parsed_images)
+ for d in l :
+ text += d["text"]+"\n\n"
+ return text
+
+def generate_audio(text):
+ audio = generate(
+ text=text ,
+ voice="T7QGPtToiqH4S8VlIkMJ",
+ model="eleven_multilingual_v2"
+ )
+ save(audio = audio , filename="./data/audio.mp3")
+
+
+generate_audio(
+ text=concatenate_text()
+)
\ No newline at end of file
diff --git a/app/data_parser.py b/app/data_parser.py
new file mode 100644
index 0000000..bb8aa13
--- /dev/null
+++ b/app/data_parser.py
@@ -0,0 +1,55 @@
+from langchain.output_parsers import CommaSeparatedListOutputParser
+from langchain.prompts import PromptTemplate , SystemMessagePromptTemplate , ChatPromptTemplate
+from langchain_community.chat_models import ChatOpenAI
+from narration import call
+from langchain.schema.messages import SystemMessage
+from utils import clean
+from templates import template_images , template_narrator
+
+# context = call()
+# output_parser = CommaSeparatedListOutputParser()
+# format_instructions = output_parser.get_format_instructions()
+# print(format_instructions)
+
+# prompt = PromptTemplate(
+# template=template,
+# input_variables=["context"],
+# )
+
+# chat_template = ChatPromptTemplate.from_messages(
+# [SystemMessagePromptTemplate.from_template(template=template_images)])
+
+# model = ChatOpenAI(temperature=0 ,model_name="gpt-3.5-turbo-1106" , )
+
+# input = chat_template.format_messages(context=context)
+
+# messages = [
+# SystemMessage(_input)
+# ]
+# print(input)
+
+# chain = chat_template | model
+# output = chain.invoke({"context":context}).content
+# clean(data=output)
+# print(f"model output is\n\n {output} , \nfirst element is \n {output[0]} , \n type is {type(output)}")
+# print(output.split("\n")[0])
+# text = output_parser.parse(output)
+# print(context)
+# print("\n\n***********\n\n")
+# print(
+
+
+
+def parse_data(template:str)->list:
+ context = call()
+ chat_template = ChatPromptTemplate.from_messages(
+ [SystemMessagePromptTemplate.from_template(template=template)])
+
+ model = ChatOpenAI(temperature=0 ,model_name="gpt-3.5-turbo-1106" )
+
+ input = chat_template.format_messages(context=context)
+ chain = chat_template | model
+ output = chain.invoke({"context":context}).content
+ cleaned_list = clean(data=output)
+ return cleaned_list
+
\ No newline at end of file
diff --git a/app/generate_images.py b/app/generate_images.py
new file mode 100644
index 0000000..c709c43
--- /dev/null
+++ b/app/generate_images.py
@@ -0,0 +1,45 @@
+"""
+a text to image module .
+responsible for generating images by using DALLE-3 openai model
+"""
+import base64
+from data_parser import parse_data
+from templates import template_images , template_narrator
+from utils import create_dict_pairs
+import os
+
+
+def get_images_descriptions() :
+ images=[]
+ parsed_narrations = parse_data(template_narrator)
+ parsed_images = parse_data(template_images)
+ l = create_dict_pairs(parsed_narrations,parsed_images)
+ for d in l :
+ images.append(d["image"])
+
+ return images
+
+def generate_images(images=get_images_descriptions()):
+ from openai import OpenAI
+ client = OpenAI()
+ for i , img in enumerate(images) :
+ response = client.images.generate(
+ model="dall-e-3",
+ prompt=img,
+ size="1024x1024",
+ quality="standard",
+ n=1,
+ response_format="b64_json"
+ )
+ image_b64 = response.data[0].b64_json
+
+ if not os.path.exists("./data/images/"):
+ os.makedirs("./data/images/")
+
+
+ with open(f"./data/images/image_{i}.webp" , "wb") as f :
+ f.write(base64.b64decode(image_b64))
+
+
+
+generate_images()
\ No newline at end of file
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..fe4424f
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,12 @@
+"""
+main file to handle all process
+it should call all the main functions that handle the following scenario
+1.generating data
+2.clean the data generated
+3.get structured data
+4.generate audio
+5.generate images
+6.create the overall video
+"""
+from app.data_parser import parse_data
+from app.templates import *
\ No newline at end of file
diff --git a/app/narration.py b/app/narration.py
new file mode 100644
index 0000000..04ca614
--- /dev/null
+++ b/app/narration.py
@@ -0,0 +1,31 @@
+"""
+create a narration text out of a raw text from a given website or an article
+ideas:
+1. scrape a website text , filter it and generate an add out of it .
+2. create a youtube shorts app generator
+"""
+from load_dotenv import load_dotenv
+load_dotenv()
+import os
+from utils import load_html_text
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import ChatPromptTemplate
+from templates import template
+
+def call():
+ # prompt = PromptTemplate.from_template(template)
+ # prompt.format(context=load_html_text())
+ context = load_html_text()
+ # print(prompt.format(context))
+ chat_prompt = ChatPromptTemplate.from_messages([
+ ("system", template)
+ ])
+ chain = chat_prompt | ChatOpenAI()
+ response = chain.invoke({"context":context })
+
+ with open("./data/response.txt" , "w") as f :
+ f.write(response.content)
+ print(response.content)
+ return response.content
+
+# print(call())
\ No newline at end of file
diff --git a/app/templates.py b/app/templates.py
new file mode 100644
index 0000000..6789637
--- /dev/null
+++ b/app/templates.py
@@ -0,0 +1,89 @@
+from langchain.prompts import PromptTemplate
+
+from langchain.prompts.chat import ChatPromptTemplate
+
+template = """
+Craft a compelling advertisement script as if you were a seasoned content creator expert.
+Your task is to create a persuasive and engaging promotional piece tailored to a specific context provided.
+Consider the target audience, key messaging, and the overall tone to captivate and drive interest effectively.
+Dive into the realm of creativity, utilizing your expertise to seamlessly blend innovation and consumer appeal into a seamless promotional narrative.
+Remember to provide details that showcase the uniqueness of the product or service while delivering a memorable and impactful call-to-action.
+###############################
+your script will be used as a short reel/video along with images that describes the text .
+your script will be passed to a text to speech model to convert it into an audio
+use the following examples as a refrence
+don't output music indicator like [Upbeat music playing] ,[Upbeat music fades out]'
+**********************************************
+examples :
+
+Example Pair 1:
+
+Image Description 1:
+"A vibrant can of Bolt Boost energy drink surrounded by dynamic lightning bolts, symbolizing energy and power."
+Ad Text 1:
+"Unleash the Power Within! Introducing Bolt Boost – the energy drink that fuels your ambition. Tackle your day with vitality and focus. Time to elevate your energy game!"
+
+
+Image Description 2:
+"An energetic individual conquering challenges with a glowing aura, holding a can of Bolt Boost, surrounded by a vibrant, active environment."
+Ad Text 2:
+"Revitalize your day with Bolt Boost! Packed with natural ingredients and a burst of flavor, this energy elixir keeps you at your peak. Elevate your performance, embrace the Bolt Boost experience!"
+
+Image Description 3:
+"A creative workspace with Bolt Boost cans scattered around, featuring a laptop with artistic tools, showcasing the synergy between the energy drink and creative endeavors."
+Ad Text 3:
+"Fuel Your Passion! Bolt Boost, the ultimate energy companion for creators. Whether you're a designer, writer, or artist, power up your creativity and break through boundaries. Unleash your potential!"
+
+
+Image Description 4:
+"A visually stunning scene of a creative mind at work, surrounded by Bolt Boost cans and a burst of vibrant colors, highlighting the fusion of creativity and energy."
+Ad Text 4:
+"Create, Energize, Repeat! Bolt Boost – the choice of innovators. Sip on inspiration and crush creative blocks. Elevate your craft with the energy that matches your ambition."
+
+************************************************
+context:{context}
+"""
+
+template_narrator = """
+scrape all the narrator text from the following context
+use the examples blow as a refrence
+examples :
+\n\nNarrator: "Passion. Quality. Commitment. At McDonald\'s, we\'re passionate about our food, always striving to provide you with the best dining experience possible.
+"\n\n[Images of fresh ingredients being prepared and cooked]
+\n\nNarrator: "From our balanced options in the Happy Meal to our Quarter Pounder burgers made with 100% fresh beef cooked to order, we\'re committed to serving you quality food.
+"\n\n[Close-up shots of various menu items]
+ your output should be like below :
+
+Passion. Quality. Commitment. At McDonald\'s, we\'re passionate about our food, always striving to provide you with the best dining experience possible.\n
+From our balanced options in the Happy Meal to our Quarter Pounder burgers made with 100% fresh beef cooked to order, we\'re committed to serving you quality food.\n
+context:{context}.
+Your response should be single values seperated by a new line \n
+append \n to every value you parse
+don't forget any narration
+the count of narrations extracted should be the same as image descriptions
+"""
+
+template_images = """
+scrape all the images description from the following context
+images description are always enclosed in square brackets []
+every Narrator text is followed by an image description . please scrape all the images
+use the examples blow as a refrence
+examples :
+\n\nNarrator: "Passion. Quality. Commitment. At McDonald\'s, we\'re passionate about our food, always striving to provide you with the best dining experience possible.
+"\n\n[Images of fresh ingredients being prepared and cooked]
+\n\nNarrator: "From our balanced options in the Happy Meal to our Quarter Pounder burgers made with 100% fresh beef cooked to order, we\'re committed to serving you quality food.
+"\n\n[Close-up shots of various menu items]
+
+ your output should be like below :
+
+fresh ingredients being prepared and cooked\n
+Close-up shots of various menu items\n
+
+context:{context}.
+Your response should be single values seperated by a new line \n
+append \n to every value you parse
+don't enclude any image indicator in the output . just extract all the image description.
+don't add any text to it
+the count of narrations extracted should be the same as image descriptions
+don't forget any image.
+"""
\ No newline at end of file
diff --git a/app/utils.py b/app/utils.py
new file mode 100644
index 0000000..a5230cc
--- /dev/null
+++ b/app/utils.py
@@ -0,0 +1,52 @@
+
+def download_html_from_url(url):
+ import requests
+ response = requests.get(url)
+ if response.status_code == 200:
+ html_content = response.text
+ # Now 'html_content' contains the HTML of the webpage
+ # print(html_content)
+ with open("./data/file.html" , "w") as f:
+ f.write(html_content)
+ else:
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
+
+
+def load_html_text(url:str="https://www.mcdonalds.com/us/en-us/about-our-food.html") :
+ """
+ accepts a link to an html website
+ returns : A Document langchain object with text scrapped
+ """
+ from langchain.document_loaders import BSHTMLLoader , UnstructuredHTMLLoader
+ download_html_from_url(url)
+ loader = UnstructuredHTMLLoader(file_path="./data/file.html")
+ data = loader.load()
+ # print(f"type of object : {type(data)} & data is : {data}")
+ with open("./data/file.txt" , "w") as f:
+ f.write(data[0].page_content)
+ return data[0].page_content
+
+# load_html_text()
+
+# load_html_text(url="https://www.mcdonalds.com/us/en-us/about-our-food.html")
+
+def clean(data:str)->list :
+ l=[]
+ data = data.split("\n")
+ for d in data :
+ cleaned = d.strip().replace('"' , '').replace("\n",'').replace('[' ,'').replace(']','')
+ l.append(cleaned)
+ print(cleaned)
+ print(l)
+ return l
+
+def create_dict_pairs(text:list , images:list) -> dict :
+ l = []
+ for tex , img in zip(text,images):
+ i={}
+ if len(tex) > 0 and len(img) > 0 :
+ i["text"] , i["image"]= tex , img
+ l.append(i)
+ print(l)
+ return l
+
\ No newline at end of file
diff --git a/data/audio.mp3 b/data/audio.mp3
new file mode 100644
index 0000000..8b9a938
Binary files /dev/null and b/data/audio.mp3 differ
diff --git a/data/file.html b/data/file.html
new file mode 100644
index 0000000..54349c1
--- /dev/null
+++ b/data/file.html
@@ -0,0 +1,3370 @@
+
+
+
+
Our Terms and Conditions have changed. Please take a moment to review the new McDonald’s Terms and Conditions by clicking on the link. These include updates relating to Mobile Order & Pay, MyMcDonald’s Rewards, dispute resolution, and arbitration. By continuing to use our website, you are indicating that you have reviewed and agree to these Terms and Conditions. You are also indicating that you are agreeing to a legally binding contract and intend to do so.
From adding more balanced options to our Happy Meal®, to serving up fresh beef Quarter Pounder® burgers that are cooked when you order, we’re always finding ways to show our commitment to our customers and our food.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Commitment to Quality
+
+
+
We're dedicated to improving the way we prepare our quality food and the ingredients that go into it.
Our Quarter Pounder®* patty is made with 100% fresh beef and cooked right when you order. It’s hot and deliciously juicy and full of flavor.
*Weight before cooking 4 oz. At participating McDonald's. Fresh Beef available at most restaurants in contiguous US. Not available in Alaska, Hawaii, and US Territories.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/data/images/image_0.webp b/data/images/image_0.webp
new file mode 100644
index 0000000..88e482d
Binary files /dev/null and b/data/images/image_0.webp differ
diff --git a/data/images/image_1.webp b/data/images/image_1.webp
new file mode 100644
index 0000000..5f3cf49
Binary files /dev/null and b/data/images/image_1.webp differ
diff --git a/data/images/image_2.webp b/data/images/image_2.webp
new file mode 100644
index 0000000..d6cbf55
Binary files /dev/null and b/data/images/image_2.webp differ
diff --git a/data/images/image_3.webp b/data/images/image_3.webp
new file mode 100644
index 0000000..11de826
Binary files /dev/null and b/data/images/image_3.webp differ
diff --git a/data/res.py b/data/res.py
new file mode 100644
index 0000000..79954ee
--- /dev/null
+++ b/data/res.py
@@ -0,0 +1,21 @@
+d = [
+"Are you passionate about your food? At McDonald's, we share your passion! We're constantly finding ways to improve our menu and provide you with the best quality ingredients. Get ready to discover the McDonald's difference!",
+
+"Commitment to quality is at the heart of everything we do. We're dedicated to improving the way we prepare our food and the ingredients that go into it. From adding more balanced options to our Happy Meal®, to serving up fresh beef Quarter Pounder® burgers that are cooked when you order.",
+
+"What's in your food? We believe in transparency. That's why we want you to know what makes our ingredients special. Visit our website to find out more about the high-quality ingredients we use in our menu items.",
+
+"At McDonald's, we have a food philosophy that drives us. We're passionate about our food, and we're committed to always evolving what matters to you. Because your satisfaction is our top priority.",
+
+"Curious about the nutrition content of your favorite McDonald's meals? Our nutrition calculator has all the information you're looking for. Discover the nutritional value of your favorite menu items and make informed choices that fit your lifestyle.",
+
+"Our food experts are the backbone of our commitment to quality. From our chefs to our dieticians and suppliers, they care deeply about the food you eat. Rest assured, you're in good hands.",
+
+"And let's not forget about our iconic Quarter Pounder®! Made with 100% fresh beef and cooked right when you order, it's hot, juicy, and full of flavor. Experience the difference of fresh beef.",
+
+"We also understand the importance of providing balanced options for your little ones. With our Happy Meal®, you can enjoy a meal that's not only delicious but also nutritious. When your kids are happy, you're happy too!",
+
+"When it comes to variety, we've got you covered. From classic favorites to new and exciting menu items, there's something for everyone at McDonald's. Explore our menu and discover your next delicious meal.",
+
+"So, join us at McDonald's and experience the passion we have for our food. We're constantly striving to provide you with the best quality ingredients and menu options. Your satisfaction is our mission. Come and taste the McDonald's difference today!"
+]
\ No newline at end of file
diff --git a/data/response.txt b/data/response.txt
new file mode 100644
index 0000000..0c76b09
--- /dev/null
+++ b/data/response.txt
@@ -0,0 +1,37 @@
+[Upbeat music playing]
+
+Narrator: We're passionate about our food. At McDonald's, we believe in continuously improving our menu to provide you with more balanced options and quality ingredients.
+
+[Images of fresh vegetables and fruits, along with a variety of McDonald's menu items, appear on the screen]
+
+Narrator: Our commitment to quality is unwavering. We carefully select the ingredients that go into our food, ensuring that each bite is filled with flavor and nutrition.
+
+[Images of McDonald's food experts, including chefs and dieticians, appear on the screen]
+
+Narrator: Our food experts, from our chefs to our dieticians and suppliers, care deeply about the food you eat. They work tirelessly to ensure that every meal meets our high standards of taste and quality.
+
+[Image of a sizzling Quarter Pounder burger cooking on a grill]
+
+Narrator: Take our Quarter Pounder burger, for example. Made with 100% fresh beef, it's cooked right when you order, ensuring that it's hot, juicy, and bursting with flavor.
+
+[Image of a Happy Meal box with a smiling child]
+
+Narrator: And when it comes to our Happy Meals, we believe in providing delicious and balanced options for your kids. Because when they enjoy their meal, you can enjoy it too.
+
+[Image of a variety of McDonald's menu items, including salads, wraps, and grilled chicken]
+
+Narrator: We understand that everyone has different preferences and dietary needs. That's why we offer a variety of choices, so you can find something that suits your taste and lifestyle.
+
+[Image of a nutrition calculator with the McDonald's logo]
+
+Narrator: Want to know the nutrition information of your favorite meals? Our nutrition calculator has got you covered. Simply enter your order, and you'll get all the details you need.
+
+[Image of the McDonald's logo]
+
+Narrator: So, whether you're craving a classic Big Mac or looking for a healthier option, McDonald's is here for you. We're committed to evolving what matters to you, because we're passionate about our food.
+
+[Upbeat music fades out]
+
+Narrator: Visit our website to learn more about our commitment to quality, our food philosophy, and our menu options. McDonald's – serving you the food you love, with a side of passion.
+
+[McDonald's logo appears on the screen with the tagline "I'm lovin' it"]
\ No newline at end of file
diff --git a/images.py b/images.py
deleted file mode 100644
index 60c2058..0000000
--- a/images.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from openai import OpenAI
-import base64
-import os
-
-client = OpenAI()
-
-def create_from_data(data, output_dir):
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
-
- image_number = 0
- for element in data:
- if element["type"] != "image":
- continue
- image_number += 1
- image_name = f"image_{image_number}.webp"
- generate(element["description"] + ". Vertical image, fully filling the canvas.", os.path.join(output_dir, image_name))
-
-def generate(prompt, output_file, size="1024x1792"):
- response = client.images.generate(
- model="dall-e-3",
- prompt=prompt,
- size=size,
- quality="standard",
- response_format="b64_json",
- n=1,
- )
-
- image_b64 = response.data[0].b64_json
-
- with open(output_file, "wb") as f:
- f.write(base64.b64decode(image_b64))
-
diff --git a/main.py b/main.py
deleted file mode 100755
index 9e63867..0000000
--- a/main.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-
-from openai import OpenAI
-import time
-import json
-import sys
-import os
-
-import narration
-import images
-import video
-
-client = OpenAI()
-
-if len(sys.argv) < 2:
- print(f"USAGE: {sys.argv[0]} SOURCE_FILENAME")
- sys.exit(1)
-
-with open(sys.argv[1]) as f:
- source_material = f.read()
-
-short_id = str(int(time.time()))
-output_file = "short.avi"
-
-basedir = os.path.join("shorts", short_id)
-if not os.path.exists(basedir):
- os.makedirs(basedir)
-
-print("Generating script...")
-
-response = client.chat.completions.create(
- model="gpt-4",
- messages=[
- {
- "role": "system",
- "content": """You are a YouTube short narration generator. You generate 30 seconds to 1 minute of narration. The shorts you create have a background that fades from image to image as the narration is going on.
-
-You will need to generate descriptions of images for each of the sentences in the short. They will be passed to an AI image generator. DO NOT IN ANY CIRCUMSTANCES use names of celebrities or people in the image descriptions. It is illegal to generate images of celebrities. Only describe persons without their names. Do not reference any real person or group in the image descriptions. Don't mention the female figure or other sexual content in the images because they are not allowed.
-
-You are however allowed to use any content, including real names in the narration. Only image descriptions are restricted.
-
-Note that the narration will be fed into a text-to-speech engine, so don't use special characters.
-
-Respond with a pair of an image description in square brackets and a narration below it. Both of them should be on their own lines, as follows:
-
-###
-
-[Description of a background image]
-
-Narrator: "One sentence of narration"
-
-[Description of a background image]
-
-Narrator: "One sentence of narration"
-
-[Description of a background image]
-
-Narrator: "One sentence of narration"
-
-###
-
-The short should be 6 sentences maximum.
-
-You should add a description of a fitting backround image in between all of the narrations. It will later be used to generate an image with AI.
-"""
- },
- {
- "role": "user",
- "content": f"Create a YouTube short narration based on the following source material:\n\n{source_material}"
- }
- ]
-)
-
-response_text = response.choices[0].message.content
-response_text.replace("’", "'").replace("`", "'").replace("…", "...").replace("“", '"').replace("”", '"')
-
-with open(os.path.join(basedir, "response.txt"), "w") as f:
- f.write(response_text)
-
-data, narrations = narration.parse(response_text)
-with open(os.path.join(basedir, "data.json"), "w") as f:
- json.dump(data, f, ensure_ascii=False)
-
-print(f"Generating narration...")
-narration.create(data, os.path.join(basedir, "narrations"))
-
-print("Generating images...")
-images.create_from_data(data, os.path.join(basedir, "images"))
-
-print("Generating video...")
-video.create(narrations, basedir, output_file)
-
-print(f"DONE! Here's your video: {os.path.join(basedir, output_file)}")
diff --git a/narration.py b/narration.py
deleted file mode 100644
index 4df6fc8..0000000
--- a/narration.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from elevenlabs import generate, set_api_key, save, RateLimitError
-import openai
-import os
-
-elevenlabs_key = os.getenv("ELEVENLABS_API_KEY")
-
-if elevenlabs_key:
- set_api_key(elevenlabs_key)
-
-narration_api = "elevenlabs" # (or "openai")
-
-def parse(narration):
- data = []
- narrations = []
- lines = narration.split("\n")
- for line in lines:
- if line.startswith('Narrator: '):
- text = line.replace('Narrator: ', '')
- data.append({
- "type": "text",
- "content": text.strip('"'),
- })
- narrations.append(text.strip('"'))
- elif line.startswith('['):
- background = line.strip('[]')
- data.append({
- "type": "image",
- "description": background,
- })
- return data, narrations
-
-def create(data, output_folder):
- if not os.path.exists(output_folder):
- os.makedirs(output_folder)
-
- n = 0
- for element in data:
- if element["type"] != "text":
- continue
-
- n += 1
- output_file = os.path.join(output_folder, f"narration_{n}.mp3")
-
- if narration_api == "openai":
- audio = openai.audio.speech.create(
- input=element["content"],
- model="tts-1",
- voice="alloy",
- )
-
- audio.stream_to_file(output_file)
- else:
- audio = generate(
- text=element["content"],
- voice="Michael",
- model="eleven_monolingual_v1"
- )
- save(audio, output_file)
diff --git a/requirements.txt b/requirements.txt
index 723770f..6e327ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,91 @@
-opencv-python
-elevenlabs
-openai
-pydub
-numpy
+aiohttp==3.9.1
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.2.0
+asttokens==2.4.1
+async-timeout==4.0.3
+attrs==23.1.0
+backoff==2.2.1
+beautifulsoup4==4.12.2
+certifi==2023.11.17
+chardet==5.2.0
+charset-normalizer==3.3.2
+click==8.1.7
+cohere==4.39
+dataclasses-json==0.6.3
+decorator==5.1.1
+distro==1.9.0
+elevenlabs==0.2.27
+emoji==2.9.0
+exceptiongroup==1.2.0
+executing==2.0.1
+fastavro==1.9.2
+filetype==1.2.0
+frozenlist==1.4.1
+greenlet==3.0.3
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+idna==3.6
+importlib-metadata==6.11.0
+iniconfig==2.0.0
+ipython==8.19.0
+jedi==0.19.1
+joblib==1.3.2
+jsonpatch==1.33
+jsonpath-python==1.0.6
+jsonpointer==2.4
+langchain==0.0.352
+langchain-community==0.0.6
+langchain-core==0.1.3
+langdetect==1.0.9
+langsmith==0.0.75
+load-dotenv==0.1.0
+lxml==4.9.4
+marshmallow==3.20.1
+matplotlib-inline==0.1.6
+multidict==6.0.4
+mypy-extensions==1.0.0
+nltk==3.8.1
+numpy==1.26.2
+openai==1.6.1
+packaging==23.2
+parso==0.8.3
+pexpect==4.9.0
+pluggy==1.3.0
+prompt-toolkit==3.0.43
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pydantic==2.5.3
+pydantic_core==2.14.6
+Pygments==2.17.2
+pytest==7.4.3
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-iso639==2023.12.11
+python-magic==0.4.27
+PyYAML==6.0.1
+rapidfuzz==3.6.0
+regex==2023.12.25
+requests==2.31.0
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.24
+stack-data==0.6.3
+tabulate==0.9.0
+tenacity==8.2.3
+tiktoken==0.5.2
+tomli==2.0.1
+tqdm==4.66.1
+traitlets==5.14.0
+typing-inspect==0.9.0
+typing_extensions==4.9.0
+unstructured==0.11.6
+unstructured-client==0.15.1
+urllib3==2.1.0
+wcwidth==0.2.12
+websockets==12.0
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.17.0
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_parsed_data.py b/tests/test_parsed_data.py
new file mode 100644
index 0000000..b5babf6
--- /dev/null
+++ b/tests/test_parsed_data.py
@@ -0,0 +1,13 @@
+from app.data_parser import parse_data
+from app.templates import template_images , template_narrator
+from app.utils import create_dict_pairs
+parsed_narrations = parse_data(template_narrator)
+parsed_images = parse_data(template_images)
+
+def test_parsed_data_length_match():
+ assert len(parsed_narrations) == len(parsed_images)
+
+def test_list_length_of_images_narrations():
+ l= create_dict_pairs(parsed_narrations , parsed_images)
+ assert len(l) == len(parsed_images)
+
diff --git a/text.py b/text.py
deleted file mode 100644
index 58e8d91..0000000
--- a/text.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from pydub import AudioSegment
-import subprocess
-import math
-import cv2
-import os
-
-offset = 50
-
-def get_audio_duration(audio_file):
- return len(AudioSegment.from_file(audio_file))
-
-def write_text(text, frame, video_writer):
- font = cv2.FONT_HERSHEY_SIMPLEX
- white_color = (255, 255, 255)
- black_color = (0, 0, 0)
- thickness = 10
- font_scale = 3
- border = 5
-
- # Calculate the position for centered text
- text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
- text_x = (frame.shape[1] - text_size[0]) // 2 # Center horizontally
- text_y = (frame.shape[0] + text_size[1]) // 2 # Center vertically
- org = (text_x, text_y) # Position of the text
-
- frame = cv2.putText(frame, text, org, font, font_scale, black_color, thickness + border * 2, cv2.LINE_AA)
- frame = cv2.putText(frame, text, org, font, font_scale, white_color, thickness, cv2.LINE_AA)
-
- video_writer.write(frame)
-
-def add_narration_to_video(narrations, input_video, output_dir, output_file):
- # Open the video file
- cap = cv2.VideoCapture(input_video)
-
- # Define the codec and create a VideoWriter object to save the output video
- fourcc = cv2.VideoWriter_fourcc(*'XVID')
- temp_video = os.path.join(output_dir, "with_transcript.avi")
- out = cv2.VideoWriter(temp_video, fourcc, 30, (int(cap.get(3)), int(cap.get(4))))
-
- full_narration = AudioSegment.empty()
-
- for i, narration in enumerate(narrations):
- audio = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
- duration = get_audio_duration(audio)
- narration_frames = math.floor(duration / 1000 * 30)
-
- full_narration += AudioSegment.from_file(audio)
-
- char_count = len(narration.replace(" ", ""))
- ms_per_char = duration / char_count
-
- frames_written = 0
- words = narration.split(" ")
- for w, word in enumerate(words):
- word_ms = len(word) * ms_per_char
-
- if i == 0 and w == 0:
- word_ms -= offset
- if word_ms < 0:
- word_ms = 0
-
- for _ in range(math.floor(word_ms/1000*30)):
- ret, frame = cap.read()
- if not ret:
- break
- write_text(word, frame, out)
- frames_written += 1
-
- for _ in range(narration_frames - frames_written):
- ret, frame = cap.read()
- out.write(frame)
-
- while out.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
- out.write(frame)
-
- temp_narration = os.path.join(output_dir, "narration.mp3")
- full_narration.export(temp_narration, format="mp3")
-
- # Release the VideoCapture and VideoWriter objects
- cap.release()
- out.release()
-
- # Close all OpenCV windows (if any)
- cv2.destroyAllWindows()
-
- ffmpeg_command = [
- 'ffmpeg',
- '-y',
- '-i', temp_video,
- '-i', temp_narration,
- '-map', '0:v', # Map video from the first input
- '-map', '1:a', # Map audio from the second input
- '-c:v', 'copy', # Copy video codec
- '-c:a', 'aac', # AAC audio codec
- '-strict', 'experimental',
- os.path.join(output_dir, output_file)
- ]
-
- subprocess.run(ffmpeg_command, capture_output=True)
-
- os.remove(temp_video)
- os.remove(temp_narration)
diff --git a/video.py b/video.py
deleted file mode 100644
index 378c37b..0000000
--- a/video.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from pydub import AudioSegment
-import numpy as np
-import math
-import cv2
-import os
-
-import text
-
-def get_audio_duration(audio_file):
- return len(AudioSegment.from_file(audio_file))
-
-def resize_image(image, width, height):
- # Calculate the aspect ratio of the original image
- aspect_ratio = image.shape[1] / image.shape[0]
-
- # Calculate the new dimensions to fit within the desired size while preserving aspect ratio
- if aspect_ratio > (width / height):
- new_width = width
- new_height = int(width / aspect_ratio)
- else:
- new_height = height
- new_width = int(height * aspect_ratio)
-
- # Resize the image to the new dimensions without distorting it
- return cv2.resize(image, (new_width, new_height))
-
-def create(narrations, output_dir, output_filename):
- # Define the dimensions and frame rate of the video
- width, height = 1080, 1920 # Change as needed for your vertical video
- frame_rate = 30 # Adjust as needed
-
- fade_time = 1000
-
- # Create a VideoWriter object to save the video
- fourcc = cv2.VideoWriter_fourcc(*'XVID') # You can change the codec as needed
- temp_video = os.path.join(output_dir, "temp_video.avi") # Output video file name
- out = cv2.VideoWriter(temp_video, fourcc, frame_rate, (width, height))
-
- # List of image file paths to use in the video
- image_paths = os.listdir(os.path.join(output_dir, "images")) # Replace with your image paths
- image_count = len(image_paths)
-
- # Load images and perform the transition effect
- for i in range(image_count):
- image1 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+1}.webp"))
-
- if i+1 < image_count:
- image2 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+2}.webp"))
- else:
- image2 = cv2.imread(os.path.join(output_dir, "images", f"image_1.webp"))
-
- image1 = resize_image(image1, width, height)
- image2 = resize_image(image2, width, height)
-
- narration = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
- duration = get_audio_duration(narration)
-
- if i > 0:
- duration -= fade_time
-
- if i == image_count-1:
- duration -= fade_time
-
- for _ in range(math.floor(duration/1000*30)):
- vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
- vertical_video_frame[:image1.shape[0], :] = image1
-
- out.write(vertical_video_frame)
-
- for alpha in np.linspace(0, 1, math.floor(fade_time/1000*30)):
- blended_image = cv2.addWeighted(image1, 1 - alpha, image2, alpha, 0)
- vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
- vertical_video_frame[:image1.shape[0], :] = blended_image
-
- out.write(vertical_video_frame)
-
- # Release the VideoWriter and close the window if any
- out.release()
- cv2.destroyAllWindows()
-
- text.add_narration_to_video(narrations, temp_video, output_dir, output_filename)
-
- os.remove(temp_video)