@@ -5,6 +5,48 @@ import { CompanyModel } from '../models/company_model';
55import { CompanyData , ICompany , QuizData } from 'types/company_types' ;
66import { Document } from 'mongoose' ;
77
8+ // Predefined tags to match against quizzes
9+ const PREDEFINED_TAGS = [
10+ // Example tags, replace/add as needed
11+ 'QA' , 'Software Engineer' , 'Backend' , 'Frontend' , 'DevOps' , 'Data Scientist' , 'Algorithm' , 'Student' , 'Intern' , 'Manager' ,
12+ 'Automation' , 'Validation' , 'Verification' , 'UI' , 'C++' , 'Java' , 'Python' , 'SQL' , 'Security' , 'Malware' , 'Firmware' ,
13+ 'Embedded' , 'Web' , 'Mobile' , 'Cloud' , 'Networking' , 'Support' , 'Technical' , 'Test' , 'Interview' , 'Assessment' , 'Coding' ,
14+ 'Logic' , 'HR' , 'Personal' , 'Group Dynamics' , 'Exam' , 'Assessment Center' , 'Online Test' , 'Phone Interview' ,
15+ 'Onsite Interview' , 'Technical Interview' , 'Behavioral Interview' , 'Case Study' , 'Presentation' , 'Assignment' , 'Project' ,
16+ 'Challenge' , 'Simulation' , 'Scenario' , 'Task' , 'Exercise' , 'Problem' , 'Solution' , 'Tips' , 'Advice' , 'Preparation' ,
17+ 'Experience' , 'Feedback' , 'Review' , 'Summary' , 'Report' , 'Result' , 'Score' , 'Grade' , 'Pass' , 'Fail' , 'Success' , 'Failure' ,
18+ 'Mistake' , 'Error' , 'Bug' , 'Fix' , 'Patch' , 'Update' , 'Upgrade' , 'Release' , 'Deployment' , 'Integration' , 'Testing' ,
19+ 'Debugging' , 'Troubleshooting' , 'Maintenance' , 'Customer' , 'Client' , 'User' , 'Stakeholder' , 'Partner' , 'Vendor' , 'Supplier' ,
20+ 'Contractor' , 'Consultant' , 'Advisor' , 'Mentor' , 'Coach' , 'Trainer' , 'Teacher' , 'Instructor' , 'Lecturer' , 'Professor' ,
21+ 'Researcher' , 'Scientist' , 'Engineer' , 'Developer' , 'Programmer' , 'Coder' , 'Designer' , 'Architect' , 'Analyst' , 'Specialist' ,
22+ 'Expert' , 'Professional' , 'Practitioner' , 'Technician' , 'Operator' , 'Administrator' , 'Director' , 'VP' , 'C-level' , 'CEO' ,
23+ 'CTO' , 'CIO' , 'COO' , 'CFO' , 'CMO' , 'CSO' , 'CHRO' , 'Board' , 'Committee' , 'Team' , 'Group' , 'Department' , 'Division' , 'Unit' ,
24+ 'Section' , 'Branch' , 'Office' , 'Site' , 'Location' , 'Region' , 'Country' , 'City' , 'Area' , 'Zone' , 'District' , 'Territory' ,
25+ 'Market' , 'Segment' , 'Industry' , 'Sector' , 'Field' , 'Domain' , 'Discipline' , 'Subject' , 'Topic' , 'Category' , 'Type' , 'Class' ,
26+ 'Level' , 'Grade' , 'Rank' , 'Position' , 'Role' , 'Title' , 'Function' , 'Responsibility' , 'Duty' , 'Activity' , 'Operation' ,
27+ 'Process' , 'Procedure' , 'Method' , 'Technique' , 'Tool' , 'Instrument' , 'Device' , 'Equipment' , 'Machine' , 'System' , 'Platform' ,
28+ 'Application' , 'Software' , 'Hardware' , 'Network' , 'Database' , 'Server' , 'Interface' , 'Protocol' , 'Standard' , 'Specification' ,
29+ 'Requirement' , 'Constraint' , 'Limitation' , 'Condition' , 'Assumption' , 'Risk' , 'Issue' , 'Opportunity' , 'Threat' , 'Weakness' ,
30+ 'Strength' , 'Advantage' , 'Disadvantage' , 'Benefit' , 'Cost' , 'Price' , 'Value' , 'Quality' , 'Performance' , 'Efficiency' ,
31+ 'Effectiveness' , 'Productivity' , 'Reliability' , 'Availability' , 'Scalability' , 'Flexibility' , 'Adaptability' , 'Maintainability' ,
32+ 'Usability' , 'Accessibility' , 'Security' , 'Privacy' , 'Confidentiality' , 'Integrity' , 'Authenticity' , 'Accountability' ,
33+ 'Compliance' , 'Regulation' , 'Law' , 'Policy' , 'Rule' , 'Guideline' , 'Best Practice' , 'Lesson Learned' ,
34+ // Add more as needed
35+ ] ;
36+
37+ // Helper to match tags in a string (case-insensitive, whole word)
38+ function matchTags ( text : string , tags : string [ ] ) : string [ ] {
39+ const found = new Set < string > ( ) ;
40+ for ( const tag of tags ) {
41+ // Use word boundary for whole word match, case-insensitive
42+ const regex = new RegExp ( `\\b${ tag . replace ( / [ - / \\ ^ $ * + ? . ( ) | [ \] { } ] / g, '\\$&' ) } \\b` , 'i' ) ;
43+ if ( regex . test ( text ) ) {
44+ found . add ( tag ) ;
45+ }
46+ }
47+ return Array . from ( found ) ;
48+ }
49+
850const companyToCompanyData = ( company : Document < unknown , { } , ICompany > & ICompany ) : CompanyData => {
951 return {
1052 ...company . toJSON ( ) ,
@@ -64,12 +106,19 @@ const parseJobQuizzesFromJobHuntHtml = (htmlPath: string): CompanyData[] => {
64106 const content = article . find ( '.faq-content' ) . html ( ) || '' ;
65107 const forum_link = article . find ( '.meta-faq-data-fields a[href]' ) . attr ( 'href' ) || '' ;
66108 // Generate tags: company, job role, technology, year, etc. Deduplicate.
67- const quiz_tags = Array . from ( new Set ( [
109+ let quiz_tags = Array . from ( new Set ( [
68110 company_en_final ,
69111 company_he_final ,
70112 ...quiz_title . split ( / \s + / ) ,
71113 ] . filter ( Boolean ) ) ) ;
72114
115+ // Add matched predefined tags from title and content
116+ const matched_tags = new Set ( [
117+ ...matchTags ( quiz_title , PREDEFINED_TAGS ) ,
118+ ...matchTags ( content , PREDEFINED_TAGS ) ,
119+ ] ) ;
120+ quiz_tags = Array . from ( new Set ( [ ...quiz_tags , ...matched_tags ] ) ) ;
121+
73122 quizzes . push ( {
74123 title : quiz_title ,
75124 quiz_id,
0 commit comments