Skip to content

Commit f99fe2f

Browse files
committed
Add Option To Add Predefined Tags To Parse From The Quizzes
Signed-off-by: Tal Jacob <taljacob2@gmail.com>
1 parent 00fd112 commit f99fe2f

1 file changed

Lines changed: 50 additions & 1 deletion

File tree

nextstep-backend/src/services/job_quizzes_service.ts

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,48 @@ import { CompanyModel } from '../models/company_model';
55
import { CompanyData, ICompany, QuizData } from 'types/company_types';
66
import { Document } from 'mongoose';
77

8+
// Predefined tags to match against quizzes
9+
const PREDEFINED_TAGS = [
10+
// Example tags, replace/add as needed
11+
'QA', 'Software Engineer', 'Backend', 'Frontend', 'DevOps', 'Data Scientist', 'Algorithm', 'Student', 'Intern', 'Manager',
12+
'Automation', 'Validation', 'Verification', 'UI', 'C++', 'Java', 'Python', 'SQL', 'Security', 'Malware', 'Firmware',
13+
'Embedded', 'Web', 'Mobile', 'Cloud', 'Networking', 'Support', 'Technical', 'Test', 'Interview', 'Assessment', 'Coding',
14+
'Logic', 'HR', 'Personal', 'Group Dynamics', 'Exam', 'Assessment Center', 'Online Test', 'Phone Interview',
15+
'Onsite Interview', 'Technical Interview', 'Behavioral Interview', 'Case Study', 'Presentation', 'Assignment', 'Project',
16+
'Challenge', 'Simulation', 'Scenario', 'Task', 'Exercise', 'Problem', 'Solution', 'Tips', 'Advice', 'Preparation',
17+
'Experience', 'Feedback', 'Review', 'Summary', 'Report', 'Result', 'Score', 'Grade', 'Pass', 'Fail', 'Success', 'Failure',
18+
'Mistake', 'Error', 'Bug', 'Fix', 'Patch', 'Update', 'Upgrade', 'Release', 'Deployment', 'Integration', 'Testing',
19+
'Debugging', 'Troubleshooting', 'Maintenance', 'Customer', 'Client', 'User', 'Stakeholder', 'Partner', 'Vendor', 'Supplier',
20+
'Contractor', 'Consultant', 'Advisor', 'Mentor', 'Coach', 'Trainer', 'Teacher', 'Instructor', 'Lecturer', 'Professor',
21+
'Researcher', 'Scientist', 'Engineer', 'Developer', 'Programmer', 'Coder', 'Designer', 'Architect', 'Analyst', 'Specialist',
22+
'Expert', 'Professional', 'Practitioner', 'Technician', 'Operator', 'Administrator', 'Director', 'VP', 'C-level', 'CEO',
23+
'CTO', 'CIO', 'COO', 'CFO', 'CMO', 'CSO', 'CHRO', 'Board', 'Committee', 'Team', 'Group', 'Department', 'Division', 'Unit',
24+
'Section', 'Branch', 'Office', 'Site', 'Location', 'Region', 'Country', 'City', 'Area', 'Zone', 'District', 'Territory',
25+
'Market', 'Segment', 'Industry', 'Sector', 'Field', 'Domain', 'Discipline', 'Subject', 'Topic', 'Category', 'Type', 'Class',
26+
'Level', 'Grade', 'Rank', 'Position', 'Role', 'Title', 'Function', 'Responsibility', 'Duty', 'Activity', 'Operation',
27+
'Process', 'Procedure', 'Method', 'Technique', 'Tool', 'Instrument', 'Device', 'Equipment', 'Machine', 'System', 'Platform',
28+
'Application', 'Software', 'Hardware', 'Network', 'Database', 'Server', 'Interface', 'Protocol', 'Standard', 'Specification',
29+
'Requirement', 'Constraint', 'Limitation', 'Condition', 'Assumption', 'Risk', 'Issue', 'Opportunity', 'Threat', 'Weakness',
30+
'Strength', 'Advantage', 'Disadvantage', 'Benefit', 'Cost', 'Price', 'Value', 'Quality', 'Performance', 'Efficiency',
31+
'Effectiveness', 'Productivity', 'Reliability', 'Availability', 'Scalability', 'Flexibility', 'Adaptability', 'Maintainability',
32+
'Usability', 'Accessibility', 'Security', 'Privacy', 'Confidentiality', 'Integrity', 'Authenticity', 'Accountability',
33+
'Compliance', 'Regulation', 'Law', 'Policy', 'Rule', 'Guideline', 'Best Practice', 'Lesson Learned',
34+
// Add more as needed
35+
];
36+
37+
// Helper to match tags in a string (case-insensitive, whole word)
38+
function matchTags(text: string, tags: string[]): string[] {
39+
const found = new Set<string>();
40+
for (const tag of tags) {
41+
// Use word boundary for whole word match, case-insensitive
42+
const regex = new RegExp(`\\b${tag.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')}\\b`, 'i');
43+
if (regex.test(text)) {
44+
found.add(tag);
45+
}
46+
}
47+
return Array.from(found);
48+
}
49+
850
const companyToCompanyData = (company: Document<unknown, {}, ICompany> & ICompany): CompanyData => {
951
return {
1052
...company.toJSON(),
@@ -64,12 +106,19 @@ const parseJobQuizzesFromJobHuntHtml = (htmlPath: string): CompanyData[] => {
64106
const content = article.find('.faq-content').html() || '';
65107
const forum_link = article.find('.meta-faq-data-fields a[href]').attr('href') || '';
66108
// Generate tags: company, job role, technology, year, etc. Deduplicate.
67-
const quiz_tags = Array.from(new Set([
109+
let quiz_tags = Array.from(new Set([
68110
company_en_final,
69111
company_he_final,
70112
...quiz_title.split(/\s+/),
71113
].filter(Boolean)));
72114

115+
// Add matched predefined tags from title and content
116+
const matched_tags = new Set([
117+
...matchTags(quiz_title, PREDEFINED_TAGS),
118+
...matchTags(content, PREDEFINED_TAGS),
119+
]);
120+
quiz_tags = Array.from(new Set([...quiz_tags, ...matched_tags]));
121+
73122
quizzes.push({
74123
title: quiz_title,
75124
quiz_id,

0 commit comments

Comments
 (0)