ethos-link · fidalgo · Apr 28, 2026 · Apr 28, 2026
diff --git a/lib/crawlscope/rules/structured_data.rb b/lib/crawlscope/rules/structured_data.rb
@@ -3,6 +3,8 @@
 module Crawlscope
   module Rules
     class StructuredData
+      CAREER_DETAIL_PATH = %r{/careers/[^/]+/?\z}
+
       attr_reader :code
 
       def initialize
@@ -65,6 +67,51 @@ def validate_page(page, issues, schema_registry)
             details: {errors: errors, source: source}
           )
         end
+
+        validate_job_posting_count(page, items, issues)
+      end
+
+      def validate_job_posting_count(page, items, issues)
+        job_postings = items.select { |item| structured_data_types(item.data).include?("JobPosting") }
+        return if job_postings.size == 1
+
+        if job_postings.size > 1
+          issues.add(
+            code: :multiple_job_postings,
+            severity: :warning,
+            category: :structured_data,
+            url: page.url,
+            message: "multiple JobPosting structured data blocks found",
+            details: {count: job_postings.size}
+          )
+        elsif career_detail_page?(page.url)
+          issues.add(
+            code: :missing_job_posting,
+            severity: :warning,
+            category: :structured_data,
+            url: page.url,
+            message: "career detail page missing JobPosting structured data",
+            details: {expected_type: "JobPosting"}
+          )
+        end
+      end
+
+      def structured_data_types(data)
+        return [] unless data.is_a?(Hash)
+
+        types = Array(data["@type"]).map(&:to_s)
+
+        if data["@graph"].is_a?(Array)
+          types.concat(data["@graph"].flat_map { |entry| structured_data_types(entry) })
+        end
+
+        types
+      end
+
+      def career_detail_page?(url)
+        URI(url).path.match?(CAREER_DETAIL_PATH)
+      rescue URI::InvalidURIError
+        false
       end
     end
   end

diff --git a/lib/crawlscope/schemas.rb b/lib/crawlscope/schemas.rb
@@ -330,6 +330,56 @@ class Schemas
       }
     }.freeze
 
+    JOB_POSTING = {
+      type: "object",
+      additionalProperties: true,
+      required: ["@type", "title", "description", "datePosted", "hiringOrganization"],
+      properties: {
+        "@context" => {enum: ["https://schema.org", "https://schema.org/"]},
+        "@type" => {const: "JobPosting"},
+        :title => {type: "string"},
+        :description => {type: "string"},
+        :identifier => {type: "object"},
+        :datePosted => {type: "string"},
+        :validThrough => {type: "string"},
+        :employmentType => {
+          anyOf: [
+            {type: "string"},
+            {type: "array", minItems: 1, items: {type: "string"}}
+          ]
+        },
+        :directApply => {type: "boolean"},
+        :hiringOrganization => {
+          type: "object",
+          required: ["@type", "name"],
+          properties: {
+            "@type" => {const: "Organization"},
+            :name => {type: "string"},
+            :sameAs => {type: "string", format: "uri"},
+            :logo => {type: "string", format: "uri"}
+          }
+        },
+        :applicantLocationRequirements => {
+          anyOf: [
+            {type: "object"},
+            {type: "array", minItems: 1, items: {type: "object"}}
+          ]
+        },
+        :jobLocationType => {type: "string"},
+        :jobLocation => {
+          anyOf: [
+            {type: "object"},
+            {type: "array", minItems: 1, items: {type: "object"}}
+          ]
+        },
+        :baseSalary => {type: "object"}
+      },
+      anyOf: [
+        {required: ["jobLocation"]},
+        {required: ["jobLocationType", "applicantLocationRequirements"]}
+      ]
+    }.freeze
+
     def self.schemas
       {
         "FAQPage" => FAQ_PAGE,
@@ -348,7 +398,8 @@ def self.schemas
         "Recipe" => RECIPE,
         "Event" => EVENT,
         "VideoObject" => VIDEO_OBJECT,
-        "WebPage" => WEB_PAGE
+        "WebPage" => WEB_PAGE,
+        "JobPosting" => JOB_POSTING
       }
     end
   end

diff --git a/test/crawlscope/structured_data_rule_test.rb b/test/crawlscope/structured_data_rule_test.rb
@@ -79,6 +79,97 @@ def test_reports_missing_structured_data_for_html_pages
     assert_equal ["json-ld", "microdata"], issues.to_a.first.details[:expected_sources]
   end
 
+  def test_validates_job_posting_markup
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/careers/sales-partner",
+      body: <<~HTML
+        <html>
+          <head>
+            <script type="application/ld+json">
+              {
+                "@context":"https://schema.org/",
+                "@type":"JobPosting",
+                "title":"Sales Partner",
+                "description":"A real role description.",
+                "datePosted":"2026-04-28",
+                "hiringOrganization":{"@type":"Organization","name":"Example","sameAs":"https://example.com/","logo":"https://example.com/icon.png"},
+                "jobLocationType":"TELECOMMUTE",
+                "applicantLocationRequirements":[{"@type":"Country","name":"South Africa"}]
+              }
+            </script>
+          </head>
+          <body><h1>Sales Partner</h1></body>
+        </html>
+      HTML
+    )
+
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+
+    assert_empty issues.to_a
+  end
+
+  def test_reports_schema_errors_for_invalid_job_posting_markup
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/careers/sales-partner",
+      body: <<~HTML
+        <html>
+          <head>
+            <script type="application/ld+json">
+              {"@context":"https://schema.org","@type":"JobPosting","title":"Sales Partner"}
+            </script>
+          </head>
+          <body><h1>Sales Partner</h1></body>
+        </html>
+      HTML
+    )
+
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+
+    assert_equal [:structured_data_schema_error], issues.to_a.map(&:code)
+    assert_includes issues.to_a.first.message, "description"
+  end
+
+  def test_reports_missing_job_posting_for_career_detail_pages
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/careers/sales-partner",
+      body: <<~HTML
+        <html>
+          <head>
+            <script type="application/ld+json">
+              {"@context":"https://schema.org","@type":"WebPage","name":"Sales Partner"}
+            </script>
+          </head>
+          <body><h1>Sales Partner</h1></body>
+        </html>
+      HTML
+    )
+
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+
+    assert_equal [:missing_job_posting], issues.to_a.map(&:code)
+  end
+
   private
 
   def page(url:, body:)