Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions ckanext/dcat/configuration_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,7 @@ class FormatFilter(BaseConfigProcessor):

@staticmethod
def check_config(config_obj):
if 'format_filter_include' in config_obj \
and 'format_filter_exclude' in config_obj:
raise ValueError('Harvest configuration cannot contain both '
'format_filter_include and format_filter_exclude')
for key in ['format_filter_include', 'format_filter_exclude']:
for key in ['format_filter_exclude', 'format_filter_include']:
if key in config_obj:
formats_list = config_obj[key]
if not isinstance(formats_list, list):
Expand All @@ -510,11 +506,7 @@ class TagFilter(BaseConfigProcessor):

@staticmethod
def check_config(config_obj):
if 'tag_filter_include' in config_obj \
and 'tag_filter_exclude' in config_obj:
raise ValueError('Harvest configuration cannot contain both '
'tag_filter_include and tag_filter_exclude')
for key in ['tag_filter_include', 'tag_filter_exclude']:
for key in ['tag_filter_exclude', 'tag_filter_include']:
if key in config_obj:
tags_list = config_obj[key]
if not isinstance(tags_list, list):
Expand Down
28 changes: 14 additions & 14 deletions ckanext/dcat/harvesters/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,17 @@ def _get_guids_and_datasets(self, content):
else:
raise ValueError('Wrong JSON object')

# Filter in/out datasets from particular organizations
# Filter datasets from particular organizations
org_filter_include = self.config.get('organizations_filter_include', [])
org_filter_exclude = self.config.get('organizations_filter_exclude', [])

# Filter in/out datasets with particular formats
format_filter_include = self.config.get('format_filter_include', [])
# Filter datasets with particular formats
format_filter_exclude = self.config.get('format_filter_exclude', [])
format_filter_include = self.config.get('format_filter_include', [])

# Filter in/out datasets with particular tags
tag_filter_include = self.config.get('tag_filter_include', [])
# Filter datasets with particular tags
tag_filter_exclude = self.config.get('tag_filter_exclude', [])
tag_filter_include = self.config.get('tag_filter_include', [])

for dataset in datasets:
# Get the organization name for the dataset
Expand All @@ -80,27 +80,27 @@ def _get_guids_and_datasets(self, content):
if dcat_publisher_name in org_filter_exclude:
continue

# Include/exclude dataset based on particular formats
if format_filter_include or format_filter_exclude:
# Exclude/include dataset based on particular formats
if format_filter_exclude or format_filter_include:
resource_formats = [
dist.get('format', '').lower()
for dist in dataset.get('distribution', [])
if dist.get('format')
]
if format_filter_exclude:
if any(fmt in resource_formats for fmt in format_filter_exclude):
continue
if format_filter_include:
if not any(fmt in resource_formats for fmt in format_filter_include):
continue
elif format_filter_exclude:
if any(fmt in resource_formats for fmt in format_filter_exclude):
continue

# Include/exclude dataset based on particular tags
# Exclude/include dataset based on particular tags
if tag_filter_exclude:
if any(tag in dataset.get('keyword', []) for tag in tag_filter_exclude):
continue
if tag_filter_include:
if not any(tag in dataset.get('keyword', []) for tag in tag_filter_include):
continue
elif tag_filter_exclude:
if any(tag in dataset.get('keyword', []) for tag in tag_filter_exclude):
continue

as_string = json.dumps(dataset)

Expand Down
36 changes: 18 additions & 18 deletions ckanext/dcat/tests/test_configuration_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,31 +935,31 @@ class TestFormatFilter:

def test_validation_correct_format(self):
config = {
"format_filter_include": [
"CSV",
"GeoJSON"
"format_filter_exclude": [
"PDF"
]
}
try:
self.processor.check_config(config)
assert config["format_filter_include"] == ["csv", "geojson"]
assert config["format_filter_exclude"] == ["pdf"]
except ValueError:
assert False

config = {
"format_filter_exclude": [
"PDF"
"format_filter_include": [
"CSV",
"GeoJSON"
]
}
try:
self.processor.check_config(config)
assert config["format_filter_exclude"] == ["pdf"]
assert config["format_filter_include"] == ["csv", "geojson"]
except ValueError:
assert False

def test_validation_wrong_format(self):
config = {
"format_filter_include": "CSV, GeoJSON"
"format_filter_exclude": "PDF"
}
try:
self.processor.check_config(config)
Expand All @@ -968,7 +968,7 @@ def test_validation_wrong_format(self):
assert True

config = {
"format_filter_exclude": "PDF"
"format_filter_include": "CSV, GeoJSON"
}
try:
self.processor.check_config(config)
Expand All @@ -983,9 +983,9 @@ class TestTagFilter:

def test_validation_correct_format(self):
config = {
"tag_filter_include": [
"Climate",
"Water"
"tag_filter_exclude": [
"Application",
"Software"
]
}
try:
Expand All @@ -994,9 +994,9 @@ def test_validation_correct_format(self):
assert False

config = {
"tag_filter_exclude": [
"Application",
"Software"
"tag_filter_include": [
"Climate",
"Water"
]
}
try:
Expand All @@ -1006,16 +1006,16 @@ def test_validation_correct_format(self):

def test_validation_wrong_format(self):
config = {
"tag_filter_include": "Climate, Water"
"tag_filter_exclude": "Application, Software"
}
try:
self.processor.check_config(config)
assert False
except ValueError:
assert True

config = {
"tag_filter_exclude": "Application, Software"
"tag_filter_include": "Climate, Water"
}
try:
self.processor.check_config(config)
Expand Down