Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
submissions/Nannan_Aravazhi/venv/
44 changes: 44 additions & 0 deletions submissions/Nannan_Aravazhi/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
Wikipedia Reading-Check

This project takes a Wikipedia topic as input and fetches the article text.

It then summarizes it using a lightweight model and generates one short reading-check question based on the summary.

Runs fully on CPU. No GPU required.

How to Run
1. Create and activate a virtual environment
python3 -m venv submissions/<your_name>>/venv
source submissions/<your_name>/venv/bin/activate

2. Install dependencies
pip install -r submissions/<your_name>>/requirements.txt

3. Run the program
python submissions/<your_name>>/summarizer_qa.py


Then enter a topic, e.g.:

Enter a Wikipedia topic: cristiano ronaldo
or
lionel messi

Example Output
=== Summary ===
Cristiano Ronaldo dos Santos Aveiro is a Portuguese international footballer...

=== Reading Check ===
1. How many goals has Cristiano Ronaldo scored in the Champions League?

Models Used

Summarizer: sshleifer/distilbart-cnn-12-6

Question Generator: iarfmoose/t5-base-question-generator

Model Info:
First run downloads the model weights (~300MB summarizer, ~900MB QG).
After the first run, everything is cached locally in:

~/.cache/huggingface/hub/
5 changes: 5 additions & 0 deletions submissions/Nannan_Aravazhi/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
transformers
wikipedia-api
torch
sentencepiece
protobuf<5
119 changes: 119 additions & 0 deletions submissions/Nannan_Aravazhi/summarizer_qa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import wikipediaapi
from transformers import pipeline
from typing import List, Dict


#constants
WIKI_CHAR_LIMIT = 1000 # limitng to avoid overflow
SUM_MODEL = "sshleifer/distilbart-cnn-12-6" # heard it was decent + lightweight
SUM_MAX_LEN = 200
SUM_MIN_LEN = 50
QG_MODEL = "iarfmoose/t5-base-question-generator" # model for question generation
QG_NUM_QUESTIONS = 1 # asking a reading check question
USER_AGENT = "NannanAravazhi-AIatGT-AR-Fall2025/1.0 (contact: naravazhi3@gatech.edu)"



# intiializing pipeline here to use huggingface locally
summarizer_pipeline = pipeline("summarization", model=SUM_MODEL)
qg_pipeline = pipeline("text2text-generation", model=QG_MODEL)




# core functions
def fetch_wikipedia_content(topic: str) -> str:
"""
fetching text, limit to 1000, basic error handling
"""
wiki = wikipediaapi.Wikipedia(
language="en",
user_agent=USER_AGENT # setting user agent
)
page = wiki.page(topic)
if not page.exists():
raise ValueError(f"Can't find this topic: '{topic}'.")

text = (page.summary or "").strip()
if not text:
text = (page.text or "").strip()
if not text:
raise ValueError(f"No readable content on this topic: '{topic}'.")
return text[:WIKI_CHAR_LIMIT]


def summarize_text(content: str) -> str:
"""
creating short summary here
"""
if not content or not content.strip():
raise ValueError("Empty content; cannot summarize.")
result = summarizer_pipeline(content, max_length=SUM_MAX_LEN, min_length=SUM_MIN_LEN, do_sample=False)
return result[0]["summary_text"].strip()


def generate_questions(summary: str) -> List[str]:
"""
quiz-style questions
"""
if not summary or not summary.strip():
return []

prompt = f"Generate {QG_NUM_QUESTIONS} unique, short quiz-style questions based on this summary:\n{summary}"

out = qg_pipeline(
prompt,
max_length=64,
do_sample=True, #sampling
top_p=0.92,
top_k=50,
temperature=0.9,
num_return_sequences=QG_NUM_QUESTIONS,
num_beams=QG_NUM_QUESTIONS,
)
qs = [it["generated_text"].strip() for it in out if it["generated_text"].strip()]
seen, deduped = set(), []
for q in qs:
if q not in seen:
seen.add(q)
deduped.append(q)
if len(deduped) >= QG_NUM_QUESTIONS:
break
return deduped




# mani method
def main() -> None:
print("=== AI@GT: Wikipedia Summarizer and QGen ===")
topic = input("Enter a Wikipedia topic: ").strip() or "Georgia Institute of Technology"
try:
content = fetch_wikipedia_content(topic)
summary = summarize_text(content)
questions = generate_questions(summary)


# hashmap to hold
result: Dict[str, object] = {
"topic": topic,
"summary": summary,
"questions": questions
}


print("\n=== Summary ===\n" + result["summary"])
print("\n=== Questions ===")
if questions:
for i, q in enumerate(questions, 1):
print(f"{i}. {q}")
else:
print("(No questions generated)")


except Exception as e:
print(f"\n[Error] {e}")


if __name__ == "__main__":
main()