diff --git a/.gitignore b/.gitignore index 23b99e089..3c63c5e62 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ +.venv/ __pycache__/ -bibliovenv/ -Bibenv/ -.idea/ \ No newline at end of file +*.pyc + .DS_Store +**/.ipynb_checkpoints/ +__pycache__/ +.venv/ diff --git a/app.py b/app.py index f0891f894..aac018ec0 100644 --- a/app.py +++ b/app.py @@ -854,7 +854,124 @@ def indicator_types_ui_all(): ), with ui.nav_panel("None", value="API"): - ui.h3("🚧 Warning: API is under construction 🚧") + ui.h3("🔌 API Query", style="color: #5567BB;") + ui.p("Retrieve data directly from OpenAlex or PubMed via API, without manual download.") + + api_result = reactive.Value(None) + + with ui.layout_sidebar(fillable=False, fill=False): + with ui.sidebar(id="sidebar_api", position="right"): + ui.h5("Query Parameters", style="color: #5567BB;") + ui.input_text("api_query", "Search Query", placeholder="es. machine learning") + ui.input_select( + "api_platform", + "Platform", + {"openalex": "OpenAlex", "pubmed": "PubMed"} + ) + ui.input_numeric("api_total", "Number of records", value=200, min=10, max=1000) + ui.input_action_button("run_api", "Run Query", icon=ICONS["play"]) + + @reactive.effect + @reactive.event(input.run_api) + def run_api_pipeline(): + def loading_modal(): + phrases = [ + "⏳ Loading... Please wait.", + "🌐 Querying the API...", + "📦 Retrieving records...", + "🔄 Standardizing data...", + "✅ Validating output...", + "✨ Almost there! Preparing your dataset...", + ] + modal = ui.modal( + ui.div( + ui.img( + src="https://cisslaboral.laleynext.es/Img/loader-circle.gif", + height="150px", + style="display: block; margin: 0 auto; text-align: center;", + ), + ui.h4( + phrases[0], + id="loading-phrase", + style="font-size: 15px; text-align: center; margin-top: 20px; color: gray;", + ), + ), + easy_close=False, + footer=None, + ) + js = f""" + + """ + return ui.HTML(str(modal) + js) + + ui.modal_show(loading_modal()) + try: + query = input.api_query() + platform = input.api_platform() + total = input.api_total() + + if not query: + ui.notification_show("⚠️ Please enter a search query.", type="warning", duration=5) + return + + records = retrieve(query=query, platform=platform, total=total) + df_api = standardize(records, source=platform) + df_api = validate(df_api) + + api_result.set(df_api) + df.set(df_api) # rende i dati disponibili a tutta la dashboard + + ui.notification_show( + f"✅ {len(df_api)} records retrieved from {platform.capitalize()} and loaded successfully.", + duration=5, + close_button=False + ) + except Exception as e: + ui.notification_show(f"❌ Error: {str(e)}", type="error", duration=10) + finally: + ui.modal_remove() + + ui.h4("Description", style="color: #5567BB;") + ui.p( + "This section allows you to retrieve bibliographic data directly via API " + "from OpenAlex or PubMed, without manually downloading any file. " + "The data is automatically standardized to the WoS schema and validated, " + "then loaded into the dashboard for analysis." + ) + + @render.ui + def show_api_result(): + result = api_result.get() + if result is None: + return ui.div( + ui.p( + "Enter a query, select a platform and click Run Query.", + style="text-align: center; color: #666; font-size: 16px;" + ), + style=( + "height: 300px; display: flex; flex-direction: column; " + "justify-content: center; align-items: center; " + "border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" + ) + ) + return ui.div( + ui.h5(f"✅ Dataset loaded: {len(result)} records, {len(result.columns)} columns", style="color: #5567BB;"), + ui.HTML(DT(result[['AU', 'TI', 'PY', 'SO', 'TC', 'DB', 'SR']].head(10), style="width:100%;")), + ui.p("Navigate to any analysis section from the sidebar to explore the data.", style="color: gray; font-size: 13px;") + ) + with ui.nav_panel("None", value="collections"): ui.h3("🚧 Warning: Merge Collection is under construction 🚧") @@ -1174,7 +1291,7 @@ def table_informations(): data['Average_Citations_per_Doc'][0] ] }) - return ui.HTML(DT(df_box, style="width=100%;")) + return ui.HTML(DT(df_box, style="width:100%;")) # --- Annual Scientific Production Section --- with ui.nav_panel("None", value="annual_scientific_production"): @@ -1228,7 +1345,7 @@ def show_annual_production(): @render.ui def table_annual_production(): _, publications_per_year = annual_informations() - return ui.HTML(DT(publications_per_year, style="width=100%;")) + return ui.HTML(DT(publications_per_year, style="width:100%;")) # AI bot Gemini Chat Integration # --- Floating Chat Button --- @@ -1382,7 +1499,7 @@ def show_average_citations(): @render.ui def table_average_citations(): _, avg_citations = average_citations() - return ui.HTML(DT(avg_citations, style="width=100%;")) + return ui.HTML(DT(avg_citations, style="width:100%;")) # --- Three-Field Plot Section --- with ui.nav_panel("None", value="three_field_plot"): @@ -1636,7 +1753,7 @@ def table_relevant_sources(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, relevant_sources_tab = result - return ui.HTML(DT(relevant_sources_tab, style="width=100%;")) + return ui.HTML(DT(relevant_sources_tab, style="width:100%;")) # --- Most Local Cited Sources Section --- with ui.nav_panel("None", value="most_local_cited_sources"): @@ -1780,7 +1897,7 @@ def table_local_cited_sources(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, local_cited_sources_tab = result - return ui.HTML(DT(local_cited_sources_tab, style="width=100%;")) + return ui.HTML(DT(local_cited_sources_tab, style="width:100%;")) # --- Bradford's Law Section --- with ui.nav_panel("None", value="bradfords_law"): @@ -1834,7 +1951,7 @@ def show_bradford_law(): @render.ui def table_bradford_law(): _, bradford_law_tab = bradford_law() - return ui.HTML(DT(bradford_law_tab, style="width=100%;")) + return ui.HTML(DT(bradford_law_tab, style="width:100%;")) # --- Sources' Local Impact Section --- with ui.nav_panel("None", value="sources_local_impact"): @@ -1980,7 +2097,7 @@ def table_sources_local_impact(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, sources_local_impact_tab = result - return ui.HTML(DT(sources_local_impact_tab, style="width=100%;")) + return ui.HTML(DT(sources_local_impact_tab, style="width:100%;")) # --- Sources' Production --- with ui.nav_panel("None", value="sources_production"): @@ -2126,7 +2243,7 @@ def table_sources_production(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, sources_production_tab = result - return ui.HTML(DT(sources_production_tab, style="width=100%;")) + return ui.HTML(DT(sources_production_tab, style="width:100%;")) # --- Most Relevant Authors Section --- with ui.nav_panel("None", value="most_relevant_authors"): @@ -2273,7 +2390,7 @@ def table_relevant_authors(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, relevant_authors_tab = result - return ui.HTML(DT(relevant_authors_tab, style="width=100%;")) + return ui.HTML(DT(relevant_authors_tab, style="width:100%;")) # --- Most Local Cited Authors Section --- with ui.nav_panel("None", value="most_local_cited_authors"): @@ -2421,7 +2538,7 @@ def table_local_cited_authors(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, local_cited_authors_tab = result - return ui.HTML(DT(local_cited_authors_tab, style="width=100%;")) + return ui.HTML(DT(local_cited_authors_tab, style="width:100%;")) # --- Authors' Production over Time Section --- with ui.nav_panel("None", value="authors_production"): @@ -2566,7 +2683,7 @@ def table_authors_production(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, table_authors_production, _ = result - return ui.HTML(DT(table_authors_production, style="width=100%;")) + return ui.HTML(DT(table_authors_production, style="width:100%;")) with ui.nav_panel("Table - Documents"): @render.ui @@ -2584,7 +2701,7 @@ def table_documents(): table_documents['DOI'] = table_documents['DOI'].apply( lambda x: f'{x}' if x != "N/A" else x ) - return ui.HTML(DT(table_documents, style="width=100%;")) + return ui.HTML(DT(table_documents, style="width:100%;")) # AI bot Gemini Chat Integration # --- Floating Chat Button --- @render.express() @@ -2736,7 +2853,7 @@ def show_lotka_law(): @render.ui def table_lotka_law(): _, lotka_law_tab = lotka_law() - return ui.HTML(DT(lotka_law_tab, style="width=100%;")) + return ui.HTML(DT(lotka_law_tab, style="width:100%;")) # --- Authors' Local Impact Section --- with ui.nav_panel("None", value="authors_local_impact"): @@ -2883,7 +3000,7 @@ def table_authors_local_impact(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, authors_local_impact_tab = result - return ui.HTML(DT(authors_local_impact_tab, style="width=100%;")) + return ui.HTML(DT(authors_local_impact_tab, style="width:100%;")) # --- Most Relevant Affiliations Section --- with ui.nav_panel("None", value="most_relevant_affiliations"): @@ -3030,7 +3147,7 @@ def table_relevant_affiliations(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, relevant_affiliations_tab = result - return ui.HTML(DT(relevant_affiliations_tab, style="width=100%;")) + return ui.HTML(DT(relevant_affiliations_tab, style="width:100%;")) # --- Affiliations' Production over Time Section --- with ui.nav_panel("None", value="affiliations_production"): @@ -3172,7 +3289,7 @@ def table_affiliations_production(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, table_affiliations_production = result - return ui.HTML(DT(table_affiliations_production, style="width=100%;")) + return ui.HTML(DT(table_affiliations_production, style="width:100%;")) # --- Affiliations' Local Impact Section --- with ui.nav_panel("None", value="corresponding_authors"): @@ -3316,7 +3433,7 @@ def table_countries_collaboration(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, countries_table = result - return ui.HTML(DT(countries_table, style="width=100%;")) + return ui.HTML(DT(countries_table, style="width:100%;")) # --- Countries' Scientific Production Section --- with ui.nav_panel("None", value="countries_scientific_production"): @@ -3422,7 +3539,7 @@ def show_countries_production(): @render.ui def table_countries_production(): _, countries_table = countries_production() - return ui.HTML(DT(countries_table, style="width=100%;")) + return ui.HTML(DT(countries_table, style="width:100%;")) # --- Countries' Production over Time Section --- with ui.nav_panel("None", value="countries_production_over_time"): @@ -3566,7 +3683,7 @@ def table_countries_over_time(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, countries_table = result - return ui.HTML(DT(countries_table, style="width=100%;")) + return ui.HTML(DT(countries_table, style="width:100%;")) # --- Most Cited Countries Section --- with ui.nav_panel("None", value="most_cited_countries"): @@ -3712,7 +3829,7 @@ def table_cited_countries(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, cited_countries_tab = result - return ui.HTML(DT(cited_countries_tab, style="width=100%;")) + return ui.HTML(DT(cited_countries_tab, style="width:100%;")) # --- Most Global Cited Documents Section --- with ui.nav_panel("None", value="most_global_cited_documents"): @@ -3852,7 +3969,7 @@ def table_cited_documents(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, cited_documents_tab = result - return ui.HTML(DT(cited_documents_tab, style="width=100%;")) + return ui.HTML(DT(cited_documents_tab, style="width:100%;")) # --- Most Local Cited Documents Section --- with ui.nav_panel("None", value="most_local_cited_documents"): @@ -3998,7 +4115,7 @@ def table_local_cited_documents(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, local_cited_documents_tab = result - return ui.HTML(DT(local_cited_documents_tab, style="width=100%;")) + return ui.HTML(DT(local_cited_documents_tab, style="width:100%;")) # --- Most Local Cited References Section --- with ui.nav_panel("None", value="most_local_cited_references"): @@ -4144,7 +4261,7 @@ def table_local_cited_refs(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, local_cited_refs_tab = result - return ui.HTML(DT(local_cited_refs_tab, style="width=100%;")) + return ui.HTML(DT(local_cited_refs_tab, style="width:100%;")) # --- References Spectroscopy Section --- with ui.nav_panel("None", value="references_spectroscopy"): @@ -4294,7 +4411,7 @@ def table_references_rpy(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, ref_rpy_tab, _ = result - return ui.HTML(DT(ref_rpy_tab, style="width=100%;")) + return ui.HTML(DT(ref_rpy_tab, style="width:100%;")) with ui.nav_panel("Table - Cited References"): @render.ui @@ -4306,7 +4423,7 @@ def table_references_spectroscopy(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, _, ref_spectroscopy_tab = result - return ui.HTML(DT(ref_spectroscopy_tab, style="width=100%;")) + return ui.HTML(DT(ref_spectroscopy_tab, style="width:100%;")) # --- Most Frequent Words --- with ui.nav_panel("None", value="most_frequent_words"): @@ -4524,7 +4641,7 @@ def table_frequent_words(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, frequent_words_tab = result - return ui.HTML(DT(frequent_words_tab, style="width=100%;")) + return ui.HTML(DT(frequent_words_tab, style="width:100%;")) # --- WordCloud Section --- with ui.nav_panel("None", value="wordcloud"): @@ -4742,7 +4859,7 @@ def table_wordcloud(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, wordcloud_tab = result - return ui.HTML(DT(wordcloud_tab, style="width=100%;")) + return ui.HTML(DT(wordcloud_tab, style="width:100%;")) # --- TreeMap Section --- with ui.nav_panel("None", value="treemap"): @@ -4960,7 +5077,7 @@ def table_treemap(): style="height: 400px; display: flex; flex-direction: column; justify-content: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" ) _, treemap_tab = result - return ui.HTML(DT(treemap_tab, style="width=100%;")) + return ui.HTML(DT(treemap_tab, style="width:100%;")) # --- References Spectroscopy Section --- with ui.nav_panel("None", value="words_frequency_over_time"): @@ -5895,7 +6012,12 @@ def table_co_occurrence_network(): result = co_occurrence_network_results.get() if result is not None: _, _, co_occurrence_network_tab, _ = result - return ui.HTML(DT(co_occurrence_network_tab, style="width=100%;")) + if co_occurrence_network_tab is None or co_occurrence_network_tab.empty: + return ui.div( + ui.p("No data available for this network.", style="text-align: center; color: #999; font-size: 16px;"), + style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" + ) + return ui.HTML(DT(co_occurrence_network_tab, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run co-occurrence network", style="text-align: center; color: #999; font-size: 16px;"), @@ -6116,7 +6238,7 @@ def table_thematic_map(): result = thematic_map_results.get() if result is not None: _, _, thematic_map_table, _, _ = result - return ui.HTML(DT(thematic_map_table, style="width=100%;")) + return ui.HTML(DT(thematic_map_table, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run thematic map", style="text-align: center; color: #999; font-size: 16px;"), @@ -6129,7 +6251,7 @@ def clusters_thematic_map(): result = thematic_map_results.get() if result is not None: _, _, _, thematic_map_cluster, _ = result - return ui.HTML(DT(thematic_map_cluster, style="width=100%;")) + return ui.HTML(DT(thematic_map_cluster, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run thematic map", style="text-align: center; color: #999; font-size: 16px;"), @@ -6142,7 +6264,7 @@ def documents_thematic_map(): result = thematic_map_results.get() if result is not None: _, _, _, _, thematic_map_documents = result - return ui.HTML(DT(thematic_map_documents, maxBytes="10MB", style="width=100%;")) + return ui.HTML(DT(thematic_map_documents, maxBytes="10MB", style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run thematic map", style="text-align: center; color: #999; font-size: 16px;"), @@ -6444,7 +6566,7 @@ def table_thematic_evolution(): result = thematic_evolution_results.get() if result is not None: _, thematic_evolution_table, _ = result - return ui.HTML(DT(thematic_evolution_table, style="width=100%;")) + return ui.HTML(DT(thematic_evolution_table, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), @@ -6483,7 +6605,7 @@ def table_thematic_evolution_2(): if result is not None: _, _, TM = result if len(TM) > 0: - return ui.HTML(DT(TM[0]["words"], style="width=100%;")) + return ui.HTML(DT(TM[0]["words"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6496,7 +6618,7 @@ def clusters_thematic_evolution_2(): if result is not None: _, _, TM = result if len(TM) > 0: - return ui.HTML(DT(TM[0]["clusters"], style="width=100%;")) + return ui.HTML(DT(TM[0]["clusters"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6509,7 +6631,7 @@ def documents_thematic_evolution_2(): if result is not None: _, _, TM = result if len(TM) > 0: - return ui.HTML(DT(TM[0]["documentToClusters"], maxBytes="10MB", style="width=100%;")) + return ui.HTML(DT(TM[0]["documentToClusters"], maxBytes="10MB", style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6547,7 +6669,7 @@ def table_thematic_evolution_3(): if result is not None: _, _, TM = result if len(TM) > 1: - return ui.HTML(DT(TM[1]["words"], style="width=100%;")) + return ui.HTML(DT(TM[1]["words"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6560,7 +6682,7 @@ def clusters_thematic_evolution_3(): if result is not None: _, _, TM = result if len(TM) > 1: - return ui.HTML(DT(TM[1]["clusters"], style="width=100%;")) + return ui.HTML(DT(TM[1]["clusters"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6573,7 +6695,7 @@ def documents_thematic_evolution_3(): if result is not None: _, _, TM = result if len(TM) > 1: - return ui.HTML(DT(TM[1]["documentToClusters"], maxBytes="10MB", style="width=100%;")) + return ui.HTML(DT(TM[1]["documentToClusters"], maxBytes="10MB", style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6611,7 +6733,7 @@ def table_thematic_evolution_4(): if result is not None: _, _, TM = result if len(TM) > 2: - return ui.HTML(DT(TM[2]["words"], style="width=100%;")) + return ui.HTML(DT(TM[2]["words"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6624,7 +6746,7 @@ def clusters_thematic_evolution_4(): if result is not None: _, _, TM = result if len(TM) > 2: - return ui.HTML(DT(TM[2]["clusters"], style="width=100%;")) + return ui.HTML(DT(TM[2]["clusters"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6637,7 +6759,7 @@ def documents_thematic_evolution_4(): if result is not None: _, _, TM = result if len(TM) > 2: - return ui.HTML(DT(TM[2]["documentToClusters"], maxBytes="10MB", style="width=100%;")) + return ui.HTML(DT(TM[2]["documentToClusters"], maxBytes="10MB", style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6675,7 +6797,7 @@ def table_thematic_evolution_5(): if result is not None: _, _, TM = result if len(TM) > 3: - return ui.HTML(DT(TM[3]["words"], style="width=100%;")) + return ui.HTML(DT(TM[3]["words"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6688,7 +6810,7 @@ def clusters_thematic_evolution_5(): if result is not None: _, _, TM = result if len(TM) > 3: - return ui.HTML(DT(TM[3]["clusters"], style="width=100%;")) + return ui.HTML(DT(TM[3]["clusters"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6701,7 +6823,7 @@ def documents_thematic_evolution_5(): if result is not None: _, _, TM = result if len(TM) > 3: - return ui.HTML(DT(TM[3]["documentToClusters"], maxBytes="10MB", style="width=100%;")) + return ui.HTML(DT(TM[3]["documentToClusters"], maxBytes="10MB", style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6739,7 +6861,7 @@ def table_thematic_evolution_6(): if result is not None: _, _, TM = result if len(TM) > 4: - return ui.HTML(DT(TM[4]["words"]), style="width=100%;") + return ui.HTML(DT(TM[4]["words"]), style="width:100%;") return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6752,7 +6874,7 @@ def clusters_thematic_evolution_6(): if result is not None: _, _, TM = result if len(TM) > 4: - return ui.HTML(DT(TM[4]["clusters"], style="width=100%;")) + return ui.HTML(DT(TM[4]["clusters"], style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6765,7 +6887,7 @@ def documents_thematic_evolution_6(): if result is not None: _, _, TM = result if len(TM) > 4: - return ui.HTML(DT(TM[4]["documentToClusters"], maxBytes="10MB", style="width=100%;")) + return ui.HTML(DT(TM[4]["documentToClusters"], maxBytes="10MB", style="width:100%;")) return ui.div( ui.p("Click the Run Analysis button to run thematic evolution", style="text-align: center; color: #999; font-size: 16px;"), style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 300px; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;" @@ -6946,7 +7068,10 @@ def loading_modal(): try: # Run analysis with current parameters field = input.wordmap_field() - ngram = input.ngram_fa() if field in ["TI", "AB"] else 1 + try: + ngram = input.ngram_fa() if field in ["TI", "AB"] else 1 + except Exception: + ngram = 1 file_upload_terms_wm = None file_upload_synonyms_wm = None @@ -6998,12 +7123,14 @@ def loading_modal(): result = get_factorial_analysis(df, ngram, field, terms_data_wm, synonyms_data_wm, n_terms, n_clusters, num_documents, method, dimX, dimY, topWordPlot, threshold, labelsize, size) factorial_analysis_results.set(result) except Exception as e: - ui.notification_show(f"❌ Error in analysis: {str(e)}", type="error", duration=10) + import traceback + traceback.print_exc() + ui.notification_show(f"❌ Error in analysis: {str(e)} ({type(e).__name__})", type="error", duration=10) finally: ui.modal_remove() # Show modal for terms/synonyms after main processing if needed - if modal_content_wm: + if 'modal_content_wm' in dir() and modal_content_wm: file_modal_wm = ui.modal( *modal_content_wm, easy_close=True, @@ -7051,7 +7178,7 @@ def show_words_by_cluster(): result = factorial_analysis_results.get() if result is not None: _, _, words_by_cluster, _ = result - return ui.HTML(DT(words_by_cluster, style="width=100%;")) + return ui.HTML(DT(words_by_cluster, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run factorial analysis", style="text-align: center; color: #999; font-size: 16px;"), @@ -7064,7 +7191,7 @@ def show_articles_by_cluster(): result = factorial_analysis_results.get() if result is not None: _, _, _, articles_by_cluster = result - return ui.HTML(DT(articles_by_cluster, style="width=100%;")) + return ui.HTML(DT(articles_by_cluster, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to run factorial analysis", style="text-align: center; color: #999; font-size: 16px;"), @@ -7345,7 +7472,7 @@ def show_cocitation_table(): result = co_citation_network_results.get() if result is not None: _, _, cocit_table, _ = result - return ui.HTML(DT(cocit_table, style="width=100%;")) + return ui.HTML(DT(cocit_table, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to generate the co-citation table.", style="text-align: center; color: #666; font-size: 16px;"), @@ -7560,7 +7687,7 @@ def show_hist_table(): result = historiograph_results.get() if result is not None: _, hist_tab, _ = result - return ui.HTML(DT(hist_tab, style="width=100%;")) + return ui.HTML(DT(hist_tab, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to generate the historiograph table.", style="text-align: center; color: #666; font-size: 16px;"), @@ -7865,7 +7992,7 @@ def show_collaboration_table(): result = collaboration_network_results.get() if result is not None: _, _, collab_table, _ = result - return ui.HTML(DT(collab_table, style="width=100%;")) + return ui.HTML(DT(collab_table, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to generate the collaboration table.", style="text-align: center; color: #666; font-size: 16px;"), @@ -8045,7 +8172,7 @@ def show_world_map_collaboration_table(): result = countries_collaboration_network_results.get() if result is not None: _, world_map_table = result - return ui.HTML(DT(world_map_table, style="width=100%;")) + return ui.HTML(DT(world_map_table, style="width:100%;")) else: return ui.div( ui.p("Click the Run Analysis button to generate the world map collaboration table.", style="text-align: center; color: #666; font-size: 16px;"), @@ -8637,4 +8764,4 @@ def _(): @reactive.effect @reactive.event(input.go_settings_2) def _(): - ui.update_navs("hidden_tabs", selected="settings") + ui.update_navs("hidden_tabs", selected="settings") \ No newline at end of file diff --git a/audit.md b/audit.md new file mode 100644 index 000000000..877df92fc --- /dev/null +++ b/audit.md @@ -0,0 +1,1388 @@ +# Bibliometrix-Python Codebase Audit + +## Purpose +This document maps every file in services/ and functions/ to the columns it depends on and any hardcoded WoS logic it contains. +It is used to verify that our ETL pipeline produces all required columns and to track which files need patching. + +--- + +## www/services/ +1) what it does +2) dependencies +3) columns used +4) WoS-specific logic +5) issues found +6) relevant for ETL: yes/no + +### biblionetwork.py +1) Takes the bibliographic DataFrame and builds a matrix showing how items (authors, sources, references, countries) are connected to each other. For example, two authors are "connected" if they cite the same references. It's the core function for generating all network analyses in the dashboard. +2) **utils.py, cocmatrix.py**. +3) **AU, CR, SO, ID, DE, DB** and derived ones. +4) It has **db_name == "SCOPUS"**. +5) If any of the above columns are absent (AU, CR, etc.), cocMatrix() will fail because it will try to read that column from the DataFrame without finding it. Python will throw a KeyError and the whole things crashes. +6) **Yes**. Our ETL is responsible for producing the DataFrame that gets fed into functions like this one, if it fails to include AU, CR or whatever other column in the output (even as an empty list []) this function crashes immediatly. + +### cocmatrix.py +1) Takes the bibliographic DataFrame and a column name (like AU or CR), and builds a matrix where rows are articles and columns are unique items (authors, keywords, references etc.). Each cell is 1 if that article contains that item, 0 otherwise. It's the building block that biblionetwork.py calls to create all its networks. +2) **utils.py**. +3) **SR, CR, AU, ID, DE, TI, AB** and derived ones. +4) No explicit DB checks. +5) It will crash if SR is missing (M.index = M["SR"] throws a KeyError immediately) and if the requested Field column is missing, it just prints a message and returns None (which then causes biblionetwork.py to crash when it tries to use that None because there is no error handling between the two functions: biblionetwork.py calls cocMatrix() and stores the result in WA; if the column is missing, cocMatrix() prints a message and returns None; biblionetwork.py doesn't check if WA is None — it immediately uses it in crossprod(WA, WA); crossprod tries to do matrix multiplication on None, which crashes with a TypeError). +6) **Yes**, SR must be present and correctly computed. + +### couplingmap.py +1) Builds and visualizes a "coupling map" — a bubble chart where clusters of related documents, authors, or sources are plotted by centrality vs impact. It combines network analysis, citation scoring, and cluster labeling into one visualization. It's one of the more complex files — it orchestrates many other services together. +2) **utils.py, cocmatrix.py, biblionetwork.py, termextraction.py, networkplot.py, histnetwork.py, metatagextraction.py, tabletag.py**. +3) **SR, AU, TC, DI, PY, DE, ID, TI, AB, SO**. +4) No explicit DB checks. +5) It will crash if SR (crashes immediately at metaTagExtraction(df, "SR")), TC (crashes in localCitations() at M['TC'].fillna(0)), AU (crashes in localCitations() at M['AU'].explode()), DI and PY (crashes when building the LCS output DataFrame) are missing. +6) **Yes**. SR, TC, AU, DI, PY, SO must all be present and correctly typed. + +### format_functions.py +1) It takes raw bibliographic data from any supported source (WoS, Scopus, PubMed, Dimensions, Lens, Cochrane) and converts it into a standardized dictionary with WoS-style column names. It has one formatting function per column (format_au_column, format_cr_column etc.) and a main entry point process_single_file() that calls all of them and assembles the final output. **This is the most important file for our ETL, it's basically a rough draft of what the ETL needs to be.** The project specs asks us to build a clean, robust version of what this file is already attempting. So rather than starting from scratch, for our ETL we should: study this file carefully to understand the existing column mappings; replace the fragile direct access (entry['Abstract']) with safe .get() calls; ensure null handling throughout (empty string "" or [] instead of None); make sure SR is always correctly computed. +2) **utils.py, parsers.py**. +3) **AB, AF, AU, AU_UN, AU1_UN, BP, EP, CR, C1, DB, DE, DI, DT, EM, FU, FX, IS, JI, ID, LA, OA, OI, PMID, PU, PY, RP, SC, SN, SO, SR, TC, TI, UT, VL** +4) **Yes**, every single formatting function branches on source (Web_of_Science, Scopus, PubMed, Dimensions, The_Lens, Cochrane) and file_type. This is basically the dispatcher that the specs asks us to build. +5) Yes, several functions access raw source columns directly without safety checks (e.g. entry['Abstract'], entry['Author full names']) which will crash with a KeyError if the raw file has different column names than expected. +6) **Yes**. + +### histnetwork.py +1) Builds a historical citation network. It figures out which papers in the dataset cite other papers in the same dataset (called "Local Citation Score" or LCS). It has two separate implementations: one for WoS and one for Scopus, and returns a network matrix plus citation statistics. +2) **utils.py, cocmatrix.py**. +3) **DB, DI, CR, TC, PY, SR, SR_FULL, TI, DE, ID, AU, BP, EP, LCS** +4) **Yes**. It explicitly checks **db == "Web_of_Science"** or **db == "Scopus"** and calls completely different functions for each. If DB contains anything else (e.g. "PUBMED", "DIMENSIONS"), it prints "Database not compatible" and returns None, meaning it silently fails for any source other than WoS and Scopus. +5) It will crash if: CR is missing (returns None immediately); SR_FULL is missing (crashes in the WoS branch when building LABEL); PY, AU, BP, EP are missing (crashes in the Scopus branch during merges). +6) **Yes**. DB values must exactly match "Web_of_Science" or "Scopus" for this function to work at all, and CR, PY, SR, TC must all be correctly populated. + +### histplot.py +1) Takes the output of histNetwork() and draws a historical citation network chart: papers are plotted as bubbles positioned by publication year on the x-axis, with edges showing which papers cite which. It's purely a visualization function, it doesn't touch the raw DataFrame directly. +2) **utils.py, networkplot.py** +3) **None directly** from the bibliographic DataFrame, it only reads from histResults which is the output of histNetwork(). Internally it uses histResults['NetMatrix'] and histResults['histData'] which contain Paper, Title, Author_Keywords, KeywordsPlus. +4) **No**. +5) **Only indirectly**, if histNetwork() failed to produce a proper NetMatrix or histData, this function will crash. But that's histNetwork()'s problem, not yours. +6) **No**. This is a pure visualization layer, it never reads our standardized DataFrame directly. + +### htmldownload.py +1) Takes an HTML file, renders it to a PNG screenshot using a headless Chrome browser, then overlays the bibliometrix logo on the bottom right. It's a utility for exporting visualizations as images. +2) **utils.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### igraph2vis.py +1) Converts an igraph graph object into an interactive vis.js network visualization, saves it as an HTML file, and returns the path. It handles node sizing, coloring by cluster, edge styling, and label overlap removal. Pure visualization utility. +2) **utils.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### metatagextraction.py +1) Computes derived columns that other functions need but that aren't in the raw data. Given a Field parameter, it generates one of: SR (short reference key), CR_AU (authors from cited references), CR_SO (sources from cited references), AU_CO (countries from affiliations), AU1_CO (first author's country), AU_UN (universities from affiliations). This is the file that generates most of the derived columns we shouldn't be our responsability based on the project specs (**must ask**). +2) **utils.py**. +3) **AU, JI, SO, PY, DB, CR, C1, RP**. +4) **Yes**, in multiple places: SR() checks db == "scopus" to format author names differently; CR_SO() checks db != "SCOPUS" to parse references differently; AU_UN() checks db in ["ISI", "OPENALEX"] for university extraction. +5) SR() crashes if AU, JI, SO, or PY are missing. AU_CO() and AU1_CO() crash if C1 and RP are both missing. CR_AU() and CR_SO() crash if CR is missing or not a list. +6) **Yes**. AU, JI, SO, PY, C1, RP must all be present and correctly typed for SR generation to work +**N.B.** This file is what *generates SR*, which is in our target schema and is required by almost every other function. So while we don't need to generate AU_CO, CR_AU etc., we do need to ensure AU, JI, SO, PY, C1, RP are correctly populated so that SR() inside this file can run without crashing. Our ETL feeds this function indirectly. + +### networkplot.py +1) Takes a co-occurrence/coupling matrix (the output of biblionetwork.py) and builds an interactive network graph from it — handling clustering, layout, node sizing, edge weights, and color assignment. It's the core visualization engine for all network analyses in the dashboard. +2) **utils.py, cocmatrix.py**. +3) None directly from the bibliographic DataFrame, it only receives a pre-built NetMatrix as input. +4) **No**. +5) **No**. +6) **No**. If our ETL produces correct columns so that biblionetwork.py and cocmatrix.py can build the matrix successfully, this function will work automatically. + +### parsers.py +1) Contains three raw file parsers, one each for Web of Science (parse_wos_data), PubMed (parse_pubmed_data), and Cochrane (parse_cochrane_data). Each parser reads a raw text file line by line and returns a list of dictionaries, one per article, with raw field tags as keys. This is the **Extract phase of the ETL**, it turns raw files into Python data structures before any column renaming or type enforcement happens. +2) **utils.py**. +3) **None**, these functions produce raw dictionaries from files, they don't read a DataFrame. +4) **Yes**, parse_wos_data is specifically built around the WoS plaintext format (two-letter tags, ER record separators, continuation lines starting with two spaces). The other parsers handle their own formats independently. +5) **No**. Even though parse_pubmed_data has a minor bug, if a continuation line appears before any key is set, key will be undefined and it will crash with a NameError. +6) **Yes**, these are our Extract phase building blocks, especially parse_wos_data and parse_pubmed_data. + +### plotlydownload.py +1) Takes an existing Plotly figure, adds the bibliometrix logo and a title, scales it up to high resolution, and exports it as a PNG image. Pure export utility. +2) **utils.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### savereport.py +1) Saves analysis results (tables and plots) into a formatted Excel file with multiple sheets. Each sheet contains a styled table and the corresponding visualization. It's the reporting/export layer of the dashboard. +2) **utils.py, plotlydownload.py, htmldownload.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### tabletag.py +1) Takes a specified column from the DataFrame, extracts all individual terms from it, counts their frequency, and returns a sorted dictionary of term → count. Used for word frequency analysis, keyword counts, citation counts etc. For AB and TI fields it first runs text mining to extract meaningful terms before counting. +2) **utils.py, termextraction.py**. +3) **SR, CR, DE, ID, C1, AB, TI (whichever is passed as tag parameter)** +4) **No** explicit DB checks. +5) If SR is missing it crashes immediately on drop_duplicates(subset=["SR"]); if whatever column is passes as tag is missing it crashes when trying to process it. +6) **Yes**, SR must always be present and all the tag columns (CR, DE, ID, AB, TI, C1) must exist and contain properly formatted lists for this function to work correctly. + +### termextraction.py +1) Takes a text column (TI or AB), cleans it, removes stopwords, optionally applies stemming, and extracts n-grams using scikit-learn's CountVectorizer. Stores the result as a new column TI_TM or AB_TM. Called by tabletag.py before word frequency counting. +2) **utils.py**. +3) **TI** (default), **AB** (passed in by tabletag.py). +4) **No** +5) It crashes at M[field].astype(str) if whichever column is passed as field is absent. +6) **Yes**, both TI and AB must be present and populated as strings. + +### thematicmap.py +1) Builds a thematic map, a bubble chart plotting research clusters by their "centrality" vs "density". It combines keyword co-occurrence network analysis, community detection, and cluster characterization into one visualization. One of the most complex files in the codebase, it orchestrates biblionetwork, termextraction, and networkplot together. +2) **utils.py, igraph2vis.py, termextraction.py, biblionetwork.py**. +3) **ID, DE, TI, AB, SR, TC, PY, DI, AU, SO**. +4) **No** explicit DB check, but heavily assumes WoS-style keyword fields (ID, DE) are properly populated. +5) If SR is missing, it crashes in cluster_assignment() immediately. If TC or PY missing, it crashes in cluster_assignment() when computing TCpY. If ID or DE missing, it crashes when building the network matrix via biblionetwork(). +6) **Yes**. ID, DE, TC, PY, DI, AU, SO, SR must all be present and correctly populated + +### utils.py +1) Central imports file for the entire services layer — every other service file starts with from .utils import *. It also defines two important shared things: the columns list (the master list of all expected DataFrame columns) and the ICONS dictionary for the UI. Think of it as the shared foundation the whole codebase builds on. +2) // +3) Defines the master columns list: AB, AF, AU, AU1_UN, AU_UN, BP, C1, CR, DB, DE, DI, DT, EM, EP, FU, FX, ID, IS, JI, LA, OA, OI, PMID, PU, PY, RP, SC, SN, SO, SR, TC, TI, UT, VL. +4) **No**. +5) **No**, it's an imports file. +6) **Yes**. This columns list is used in format_functions.py to add extra columns to each entry, so our ETL output must at minimum cover what's in the target schema. +**N.B.** The columns list defined here is our ground truth for what columns the codebase expects. Cross-referencing it with the target schema from the exam spec: +- Columns in utils.py but not in the target schema are AU1_UN, AU_UN, EM, FU, FX, OA, OI, PU, SC, SN. These are extra columns the codebase uses but our ETL doesn't need to guarantee; +- Column in the target schema but not in utils.py is SR_FULL, generated by metatagextraction.py as a derived column. + +## Master Column Dependency Table — services/ + +### Columns from target schema + +| Column | Used by | +|--------|---------| +| `DB` | biblionetwork, histnetwork, metatagextraction, cocmatrix | +| `SR` | cocmatrix, histnetwork, tabletag, thematicmap, metatagextraction | +| `AU` | biblionetwork, histnetwork, metatagextraction, thematicmap | +| `CR` | biblionetwork, cocmatrix, histnetwork, metatagextraction | +| `TI` | histnetwork, termextraction, thematicmap | +| `AB` | termextraction, thematicmap | +| `DE` | biblionetwork, thematicmap | +| `ID` | biblionetwork, thematicmap | +| `SO` | biblionetwork, histnetwork, metatagextraction | +| `JI` | metatagextraction | +| `PY` | histnetwork, thematicmap, metatagextraction | +| `TC` | histnetwork, thematicmap | +| `DI` | histnetwork, thematicmap | +| `C1` | metatagextraction | +| `RP` | metatagextraction | +| `AF` | format_functions | +| `BP` | histnetwork | +| `EP` | histnetwork | +| `VL` | format_functions | +| `IS` | format_functions | +| `LA` | format_functions | +| `DT` | format_functions | +| `PMID` | format_functions | +| `UT` | format_functions | + + +### Key takeaways + +- `SR` is the most critical column — used by almost everything, computed from `AU`, `JI`, `PY`, `SO` +- `DB` must exactly match `"Web_of_Science"` or `"Scopus"` where branch logic exists +- `CR` must be a parsed Python list, not a raw semicolon-separated string +- `AU` must also be a parsed Python list +- `SR`, `TC`, `PY` have no crash protection — must always be present and correctly typed + + +--- + +## functions/ +1) what it does +2) dependencies +3) columns used +4) WoS-specific logic +5) issues found +6) relevant for ETL: yes/no + + + +### get_affiliationproductionovertime.py +1) Counts cumulative publications per institution over time and draws a line chart for the top-k institutions. +2) **www.services**. +3) **AU_UN**, **PY**. +4) **Indirectly yes**: the function itself has no explicit WoS condition, but it depends on `AU_UN`, which is an internal/derived affiliation column usually built during WoS-oriented preprocessing. +5) Crashes if `AU_UN` is a plain string instead of a list, or if `PY` contains nulls. +6) **Yes**. `AU_UN` must be a `list[str]` per row, `PY` must be non-null and numeric. The ETL must build `AU_UN` from `C1` for non-WoS sources. + +### get_annualproduction.py +1) Counts how many papers were published each year and draws a line chart. +2) **www.services**. +3) **PY**. +4) **No**. +5) Crashes if `PY` is missing, non-numeric, or contains nulls. +6) **Yes**. `PY` must be present, non-null, and numeric. + + +### get_authorlocalimpact.py +1) Calculates impact scores (h-index, g-index, m-index, total citations) for each author and draws a bubble chart of the top authors. +2) **www.services**. +3) **AU**, **TC**, **PY**. +4) **No**. +5) Crashes if `AU` is not a list. Index calculations may produce wrong results due to incorrect use of `transform`. +6) **Yes**. `AU` must be a `list[str]`, `TC` and `PY` must be non-null and numeric. + + +### get_authorproductionovertime.py +1) Counts publications and citations per author per year and draws a scatter plot for the top-k authors. +2) **www.services**. +3) **AU**, **PY**, **TC** (core); **TI**, **SO**, **DI** (secondary — used for the document table, missing ones handled with a warning). +4) **No**, but the fallback author splitting uses a comma which is WoS-specific. +5) Wrong author names for non-WoS sources due to comma-based splitting. Missing `DI` silently returns an empty document table. +6) **Yes**. `AU` must be a `list[str]`, `PY` and `TC` must be numeric, `TI`, `SO`, `DI` must be present as strings. + + +### get_averagecitations.py +1) Calculates average citations per year and draws a line chart. +2) **www.services**. +3) **PY**, **TC**. +4) **No**. +5) Crashes if `PY` or `TC` are missing or non-numeric. Division by zero possible if `PY` equals the current year. +6) **Yes**. `PY` and `TC` must be present, non-null, and numeric. + + +### get_bradfordlaw.py +1) Applies Bradford's Law to rank journals by publications, divides them into three zones, and draws a log-scale chart highlighting the core journals. +2) **www.services**. +3) **SO**. +4) **No**. +5) Crashes if `SO` is missing. Null values in `SO` are silently ignored, potentially skewing zone boundaries. +6) **Yes**. `SO` must be present, non-null, and a string. + + +### get_citedcountries.py +1) Ranks countries by total or average citations and draws a dot chart of the top-k countries. +2) **www.services**. +3) **TC** (core); **C1** or **RP** (secondary — needed by `metaTagExtraction` to extract the country). +4) **Yes**. `metaTagExtraction` is built for WoS-style affiliation strings. +5) If `C1` or `RP` are missing or wrongly formatted, the chart will be empty with no clear error. `TC` non-numeric values will cause a crash. +6) **Yes**. `TC` must be numeric and non-null. `C1` or `RP` must be populated correctly for country extraction to work. + + +### get_citeddocuments.py +1) Ranks papers by total citations or citations per year and draws a dot chart of the top-k documents. +2) **www.services**. +3) **SR**, **TC**, **PY** (core); **DI** (secondary — included in the output table). +4) **No**, but `SR` is expected in WoS format. +5) Empty chart with no error if `SR` is missing. Crashes if `TC` or `PY` are non-numeric. Division by zero possible if `PY` equals current year. +6) **Yes**. `SR` must be correctly built by the ETL, `TC` and `PY` must be numeric, `DI` should be present as a string. + + +### get_clusteringcoupling.py +1) Groups papers or authors into clusters based on shared references or keywords and draws an interactive network. Saves the result as an HTML file. +2) **www.services**; **couplingMap**, **avoid_net_overlaps**. +3) **None directly** — all column access is delegated to `couplingMap`. +4) **Yes**. `couplingMap` is built for WoS-style data, especially `SR` and `CR`. +5) No validation on the network returned by `couplingMap` — a broken network causes a hard crash. Temporary HTML file is never deleted. +6) **Indirect**. The ETL must ensure `SR`, `CR`, `AU`, `TC`, `PY`, `DE`, `ID` are correctly formatted for `couplingMap` to work. + +### get_co_occurence_network.py +1) Builds a word or keyword co-occurrence network, plus a density heatmap, a statistics table, and a degree distribution plot. +2) **www.services**; **biblionetwork**, **network_plot**, **term_extraction**, **cocMatrix**, **avoid_net_overlaps**, **field_by_year**. +3) **None directly** — all column access delegated to internal functions. **PY** is accessed directly inside `field_by_year`. +4) **Yes**. Field names `ID`, `DE`, `TI`, `AB`, `WC` are WoS tags — non-WoS sources will produce an empty network. +5) If no field condition matches, the function silently returns nothing. Cluster colors are random on every run. Temporary HTML file is never deleted. +6) **Indirect**. The ETL must ensure `ID`, `DE`, `TI`, `AB`, `WC`, and `PY` are all present and correctly formatted. + +### get_cocitation.py +1) Builds a co-citation network — meaning it finds which references, authors, or sources are cited together most often across papers, and draws an interactive network where each bubble is a reference/author/source and lines show how often they are cited together. Also produces a density heatmap, a cluster statistics table, and a degree distribution plot. +2) **www.services**. +3) **None directly** — all column access is delegated to `biblionetwork` and `metaTagExtraction`. `CR`, `CR_AU`, and `CR_SO` are checked for existence but not read directly. +4) **Yes**. `biblionetwork` and `metaTagExtraction` are built for WoS-style reference strings. Non-WoS sources with differently formatted references will produce empty or broken networks. +5) If `biblionetwork` returns an empty network the function crashes with no clear error. Cluster colors are randomly generated on every run. Temporary HTML file is never deleted. +6) **Indirect**. The ETL must ensure `CR` is present as a properly split list of reference strings, and `CR_AU`/`CR_SO` can be derived from it if needed. + +### get_collaborationnetwork.py +1) Generates a collaboration network between authors, universities, or countries from a bibliographic DataFrame. It builds a graph via biblionetwork(), then produces four outputs: an interactive PyVis HTML network, a density heatmap, a cluster statistics table, and a normalized degree plot. +2) **www.services** +3) **AU**, then AU_UN, AU_CO. +4) **Yes**. There are two: metaTagExtraction() is called to derive AU_UN and AU_CO, this function is known to have hardcoded WoS parsing logic (affiliation string formats, country extraction patterns), so if affiliations from Scopus/PubMed are formatted differently it will silently produce empty or wrong values; biblionetwork() likely expects AU, AU_UN, AU_CO in WoS delimiter/format (semicolon-separated strings or lists). +5) **Yes**, metaTagExtraction() has hardcoded WoS affiliation parsing, so AU_UN and AU_CO will silently produce empty or wrong values for non-WoS sources. +6) **Indirectly**. The function itself is downstream of the ETL, but the pipeline must guarantee that AU is a proper list[str] and C1 is a list[str] with standardized affiliation strings so metaTagExtraction() can correctly extract AU_UN and AU_CO. + +### get_correspondingauthorcountries.py +1) Extracts the corresponding author's country (AU1_CO) and all author countries (AU_CO) via metaTagExtraction(), then counts articles, single-country publications (SCP), and multi-country publications (MCP) per country. Returns a horizontal bar chart and a summary table. +2) **www.services** +3) AU1_CO (derived), AU_CO (derived), AU, C1, RP (implicitly required by metaTagExtraction()). +4) **Yes**. Both metaTagExtraction(Field="AU_CO") and metaTagExtraction(Field="AU1_CO") rely on WoS-style affiliation parsing of C1 and RP, as flagged in metatagextraction.py. Non-WoS sources will silently produce empty or wrong country values. +5) Issues: data.dropna(subset=["AU1_CO", "AU_CO"]) silently drops all rows if metaTagExtraction() fails to parse affiliations from non-WoS sources, producing an empty DataFrame with no error; no validation that C1 or RP exist before calling metaTagExtraction(), mirroring the crash pattern flagged in metatagextraction.py; top_k_countries is applied after sorting but the earlier top_country_names already takes all countries — the filtering step is redundant and misleading. +6) **Yes**. C1 and RP must be present and correctly formatted as list[str] with standardized affiliation strings so metaTagExtraction() can correctly derive AU_CO and AU1_CO. Without this, the function silently returns an empty result. + +### get_countriesproduction.py +1) Extracts author countries via metaTagExtraction(), counts publication frequency per country, downloads world boundary geodata, and produces an interactive choropleth map and a summary table of scientific production by country. +2) **www.services** +3) AU_CO (derived), C1 (implicitly required by metaTagExtraction()). +4) **Yes**. metaTagExtraction(Field="AU_CO") relies on WoS-style affiliation parsing of C1, as flagged in metatagextraction.py. +5) No validation that C1 exists before calling metaTagExtraction(), mirroring the crash pattern flagged in metatagextraction.py. Country name normalization only corrects "USA" → "UNITED STATES OF AMERICA"; all other country name mismatches between the source data and the shapefile silently result in unmatched rows and zero counts. dropna is never called on AU_CO after explode(), so empty list entries produce NaN rows that pollute the country counts. +6) **Yes**. C1 must be present and correctly formatted as list[str] with standardized affiliation strings so metaTagExtraction() can correctly derive AU_CO. Country name formatting in C1 should also conform to WoS conventions to maximize matches against the shapefile. + +### get_countriesproductionovertime.py +1) Extracts author countries via metaTagExtraction(), pairs each country with its publication year, computes cumulative article counts over time, and returns a line chart of the top-k countries' production over time plus the underlying DataFrame. +2) **www.services**. +3) AU_CO (derived), PY, C1 (implicitly required by metaTagExtraction()) +4) **Yes**. metaTagExtraction(Field="AU_CO") relies on WoS-style affiliation parsing of C1, as flagged in metatagextraction.py. Non-WoS sources will silently produce empty or wrong country values. +5) Issues: no validation that C1 or PY exist before use, mirroring the crash pattern flagged in metatagextraction.py and thematicmap.py; years = data["PY"].repeat(nAFF).values[:len(affiliations)] silently misaligns years with affiliations if any AU_CO entry was NaN and got dropped by dropna() — the repeat is based on the full DataFrame length but AFF has already dropped rows; PY is never cast to a numeric type before astype(int) — if PY contains empty strings (as our ETL schema allows), this will crash. +6) **Yes**. C1 must be present and correctly formatted as list[str] so metaTagExtraction() can derive AU_CO. PY must be present, non-empty, and castable to integer for the year alignment logic to work correctly. + + +### get_data.py +1) Handles file upload from the Shiny dashboard UI. Depending on the selected mode, it processes one or more bibliographic files via biblio_json() or process_multiple_files(), loads the result into the reactive DataFrame df, and returns a status message to display in the UI. +2) **www.services**. +3) **No**. +4) **Indirectly**. biblio_json() and process_multiple_files() are the functions that actually parse and standardize the data — if those have WoS-specific assumptions (as flagged in parsers.py), the DataFrame loaded here will reflect those issues. +5) // +6) **Yes**. This is the entry point where our ETL must be plugged in. The "1B" path in particular must be routed through the standardization pipeline rather than calling pd.read_excel() directly, to ensure all downstream functions receive a correctly typed and validated DataFrame. + +### get_database.py +1) Maps the user's UI selection to a human-readable database name string. Reads two Shiny input controls, input.select() (which tab is active) and input.database() (which source was chosen), and returns a plain string like "Web of Science" or "Scopus". +2) **www.services**. +3) **No**. +4) **None directly**. However this function is the gatekeeper that sets the DB value downstream. The string it returns must match whatever the ETL pipeline uses as the DB column value. +5) Two: DB value mismatch, the exam spec requires DB to hold standardised identifiers like "WEB_OF_SCIENCE" or "SCOPUS" while this function returns display strings ("Web of Science", "Scopus"), which are not the same - if DB is populated from this output, the contract is broken; UnboundLocalError risk, if input.select() returns anything outside "1A", "1B", "1C", the function reaches return database without ever assigning it, so it needs an else branch or a default. +6) **Yes**. Either this function's return values must be updated to match the schema DB identifiers, or the ETL Transform phase must normalise the returned string into the correct DB value before writing to the DataFrame. + +### get_factorialanalysis.py +1) Builds a 2D interactive word map for conceptual structure analysis. It takes a DataFrame and a field (ID, DE, TI, AB), constructs a document-term matrix, runs a dimensionality reduction method (MCA, CA, or MDS), clusters the resulting term coordinates with hierarchical clustering, and returns an annotated Plotly scatter figure plus coordinate/cluster DataFrames. Also contains helpers: _to_seq (flatten values to list), eig_correction (Benzecri eigenvalue correction), avoidOverlaps (label deduplication — currently commented out), and assign_consistent_colors. +2) **www.services**. +3) **ID, DE, TI, AB**. +4) field="ID" default. ID (Keywords Plus) is a WoS-exclusive field — it does not exist in Scopus, PubMed, or Dimensions exports. Using ID as the default silently produces an empty or broken analysis on non-WoS data. +5) // +6) **Yes**. The ETL must ensure that: ID is a list[str] (WoS Keywords Plus) and dor non-WoS sources that lack ID, populate it as [] per the null contract — but also ensure the UI defaults field to DE (author keywords) for those sources, since an all-empty ID column will produce no usable analysis; DE, TI, AB are correctly typed (list[str] for DE, str for TI/AB). + +### get_filters.py +1) Two functions. get_filters() enriches the DataFrame with computed filter metadata: min/max publication year, average citations per year, and Bradford's Law zone assignment per source journal. get_filtered_table() applies user-driven UI filters (year range, language, document type, avg citations, Bradford zone) to the enriched DataFrame, then passes the result to get_table() for display. +2) **www.services**. +3) **PY, TC, SO, LA, DT**. +4) **Yes**. LA and DT value sets are implicitly WoS-formatted. The UI populates filter options from whatever values exist in these columns. WoS uses "English" and "Article"; Scopus may use "English" but "Journal Article" for DT. If not normalised by ETL, the filter checkboxes will show mixed values and users may filter out valid records unintentionally. || Bradford zone logic assumes SO is a clean, standardised journal name. WoS and Scopus capitalise journal names differently, so the same journal can appear as two separate sources, splitting its frequency and producing wrong zone assignments. +5) Division by zero in Average_Citations_Per_Year. If PY == current_year, Years_Since_Publication = 1 — safe. But if PY > current_year (malformed data), the denominator goes negative. No guard exists. ETL should clamp PY to <= current_year. || TC nulls not handled. If TC contains NaN (not coerced to 0 by ETL), Average_Citations_Per_Year will be NaN, silently breaking the citations slider filter in get_filtered_table(). +6) **Yes**. The ETL must: Cast TC to int, nulls → 0 || Cast PY to int, no nulls, clamped to valid range || Normalise SO to a consistent casing (uppercase) across sources || Normalise DT to a controlled vocabulary (e.g. "Article", "Review") so UI filters work identically regardless of source || Normalise LA to a consistent format (e.g. "ENGLISH"). + +### get_frequentwords.py +1) Two functions. get_frequent_words() produces a lollipop scatter chart and full frequency table of the most common words/keywords in a chosen field. It supports n-grams (for TI/AB), custom stopword removal, and synonym merging. table_tag() is the core extraction engine: it deduplicates by SR, routes to either term_extraction() (for free text fields TI/AB) or direct column access (for keyword fields DE/ID), then counts terms using Counter. +2) **www.services**. +3) SR, and one of DE, ID, TI, AB depending on word_type +4) **Yes**. ID (Keywords Plus) is WoS-exclusive. Selecting word_type="ID" on non-WoS data will operate on an empty or absent column with no error. || SR deduplication assumes SR is always populated. SR is a calculated field ("FirstAuthor, Year, Journal") generated by the WoS pipeline. If ETL does not produce it, drop_duplicates(subset='SR') will raise a KeyError. || eval(x) on DE/ID strings assumes the column was serialised as a Python list literal (e.g. "['kw1', 'kw2']"), which is a WoS/internal serialisation convention. Scopus CSV exports use semicolon-delimited strings, causing eval() to raise a SyntaxError or return the wrong structure. +5) remove_terms only applied for DE/ID, not for TI/AB. The guard if remove_terms and tag in ['DE', 'ID'] means stopword removal is silently skipped when analysing titles or abstracts, which is likely unintentional. || SR missing crashes silently. If SR is absent, drop_duplicates(subset='SR') raises KeyError with no informative message to the user. +6) **Yes**. The ETL must: Populate SR for all rows (non-empty string). || Ensure DE and ID are list[str], not raw strings — this eliminates the eval() hazard entirely. || Ensure ID is [] for non-WoS sources so the function degrades gracefully rather than crashing. || TI and AB must be str, not NaN/None. + +### get_historiograph.py +1) Builds an interactive historiographic network map showing citation relationships between key papers over time. It calls metaTagExtraction() and histNetwork() from services to construct the citation graph, then histPlot() for the initial layout. It then rebuilds the graph with networkx, optionally removes isolated nodes, positions nodes on a timeline (x = year, y = cluster), computes node sizes from local citation scores (LCS), and renders an interactive pyvis HTML network saved to a temp file. Returns the plot object, a metadata DataFrame, and the temp HTML filename. +2) **www.services**. +3) **SR, CR, DOI, TI, DE, ID, PY**. +4) **Yes**. histNetwork() parses CR using WoS reference string format ("Author, Year, Journal, Vol, Page"). This is the most WoS-specific dependency in the entire codebase. Non-WoS CR strings will produce zero or wrong citation matches, resulting in an empty or disconnected graph. || metaTagExtraction(df, "SR") regenerates SR from WoS-style author/year/journal fields. If SR was not correctly populated by ETL, this call may produce malformed node identifiers that break edge matching. || node_label="ID" and node_label="DE" are swapped. The code maps "ID" → row.get("Author_Keywords") and "DE" → row.get("KeywordsPlus"), which is the reverse of the standard schema (DE = author keywords, ID = Keywords Plus). This is a WoS internal naming artefact from histNetwork() output columns. +5) DE/ID label mapping is inverted (as noted above). A user selecting node_label="DE" gets Keywords Plus, not author keywords. Needs a one-line swap or renaming in histNetwork() output. || eval() used again for DE/ID node labels (same pattern as get_frequentwords.py). Unsafe and redundant if ETL guarantees list[str]. +6) **Yes, high priority**. The ETL must: Populate SR correctly as "FirstAuthor, Year, Journal" — it is the primary node key for the entire graph. || Normalise CR entries to WoS reference string format, as histNetwork() depends on it for edge construction. This is the single highest-risk dependency in the project for non-WoS sources. || Ensure DOI is str, empty string "" if missing (not NaN). || Ensure DE and ID are list[str] to eliminate the eval() calls. + + +### get_localcitedauthors.py +1) Finds which authors are most cited within the dataset itself (not globally), ranks them by local citation count, and draws a dot chart of the top-k authors. +2) **www.services**. +3) **AU**, **TC** (core); **SR** (must already exist or be built by `metaTagExtraction` before use). +4) **No** explicit DB checks, but `metaTagExtraction` and `histNetwork` are built for WoS-style data. +5) `AU` is exploded without checking if it is a proper list — plain strings will produce wrong results. If `histNetwork` returns an empty result the function crashes immediately. `SR` is rebuilt here by `metaTagExtraction`, which should instead already be present from the ETL. +6) **Yes**. `AU` must be a `list[str]`, `TC` must be non-null and numeric, and `SR` must be correctly built by the ETL pipeline. + +### get_localciteddocuments.py +1) Finds which papers in the dataset are most cited by other papers in the same dataset (local citations), ranks them, and draws a dot chart of the top-k documents. Also returns a table with local citations, global citations, and normalized metrics per document. +2) **www.services**. +3) **SR**, **TC**, **DI**, **PY** (core). +4) **No** explicit DB checks, but `SR` is expected in WoS format and `histNetwork` is built for WoS-style data. +5) `SR` is rebuilt here internally instead of being taken from the ETL pipeline. If `TC` contains nulls, `fillna(0)` handles it, but the LC/GC ratio calculation will produce division by zero for papers with zero global citations. If `histNetwork` returns an empty result the function crashes immediately. +6) **Yes**. `SR` must be correctly built by the ETL, `TC` and `PY` must be non-null and numeric, and `DI` must be present as a string. + +### get_localcitedreferences.py +1) Counts how many times each reference is cited across all papers in the dataset, ranks them, and draws a dot chart of the most cited references. Unlike global citation counts, this only looks at citations within the dataset itself. +2) **www.services**. +3) **CR** only. +4) **No** explicit DB checks, but the fallback string splitting uses the user-provided separator, which means the function can handle non-WoS sources if `CR` is correctly formatted as a list or delimited string. +5) Crashes if `CR` is missing entirely. If `CR` is an empty list or all nulls the chart will be empty with no clear error. The check `isinstance(data["CR"].iloc[0], list)` will crash if the DataFrame is empty. +6) **Yes**. `CR` must be present and correctly formatted as a `list[str]` where each element is an individual reference string. + +### get_localcitedsources.py +1) Counts how many times each journal or source is cited across all papers in the dataset, ranks them, and draws a dot chart of the most locally cited sources. The source names are extracted from the cited references using `metaTagExtraction`. +2) **www.services**. +3) **CR** (needed by `metaTagExtraction` to extract `CR_SO`); **CR_SO** (derived column, used directly for counting). +4) **Yes**. `metaTagExtraction` parses source names from WoS-style reference strings. Non-WoS sources with differently formatted references will likely produce empty or wrong results. +5) Crashes if `CR_SO` is missing or empty. The check `isinstance(data["CR_SO"].iloc[0], list)` will crash if the DataFrame is empty. If `metaTagExtraction` fails silently, the chart will be empty with no clear error. +6) **Yes**. `CR` must be present as a properly formatted list of reference strings so that `metaTagExtraction` can correctly extract the source names into `CR_SO`. + +### get_lotkalaw.py +1) Applies Lotka's Law to measure author productivity — it counts how many authors wrote exactly 1, 2, 3... papers, compares the observed distribution against the theoretical one, and draws a line chart showing both curves side by side. +2) **www.services**. +3) **AU** only. +4) **No** explicit DB checks. +5) Crashes if `AU` is missing or not a list — the list flattening `[author for sublist in data['AU'] for author in sublist]` will fail if any row is a plain string or null. If all authors wrote only one paper, `np.polyfit` may produce unreliable results with no warning. +6) **Yes**. `AU` must be present and correctly formatted as a `list[str]` per row. + +### get_maininformations.py +1) Computes a comprehensive set of summary statistics for the dataset and adds them as new columns to the DataFrame. Metrics include: publication year range, unique sources, annual growth rate (CAGR), unique authors, single-authored documents, international co-authorship percentage, co-authors per document, unique author keywords, references per document, average document age, and average citations per document. Returns the enriched DataFrame. This is the main "overview" function used to populate the summary panel of the dashboard. +2) **www.services**. +3) **PY**, **SO**, **AU**, **TC**, **CR**, **DE** (core); **AU_CO** (derived — extracted by `metaTagExtraction()` if not already present). +4) `metaTagExtraction(df, "AU_CO")` is called to extract country information from WoS-style affiliation strings if `AU_CO` is missing. Non-WoS sources with differently formatted affiliations will produce wrong or empty country counts, causing the international co-authorship metric to be zero or incorrect. +5) `AU` is iterated as a list without a null guard — if any row contains a plain string instead of a list, the flattening `[author for sublist in AU_list for author in sublist]` will iterate over characters and produce wrong author counts silently. Same issue applies to `DE` and `CR`. CAGR calculation divides by `ny = max - min` which will be zero if all papers are from the same year, causing a `ZeroDivisionError`. +6) **Yes, high priority.** Ensure `AU`, `DE`, and `CR` are all `list[str]` — this function iterates over them directly and will silently produce wrong results if they are plain strings. Ensure `PY` is non-null and numeric to avoid crashes in year-range and age calculations. Ensure `TC` is numeric with nulls replaced by `0`. Ensure `C1` or `RP` are correctly populated so that `metaTagExtraction()` can extract `AU_CO` if needed. + +### get_referencesspectroscopy.py +1) Generates a Reference Publication Year Spectroscopy (RPYS) analysis — a technique that identifies which historical years had the most influence on a research field by counting how often papers from each year are cited in the dataset's reference lists. It extracts publication years from each cited reference string, counts citations per year, computes a 5-year moving median deviation to highlight anomalous peaks, and returns an interactive dual-line chart, a year-level summary table, and a reference-level table with Google Scholar links. +2) **www.services**. +3) **CR** only. +4) Year extraction from reference strings uses the regex `r'\b\d{4},'` which matches a 4-digit year followed by a comma — this is the WoS reference string format ("Author, Year, Journal, Vol, Page"). Non-WoS reference formats that place the year differently (e.g. PubMed, Scopus) will produce zero year matches, resulting in an empty chart. +5) `df['CR'].apply(lambda x: [i for i in x])` assumes `CR` is already a list — if it arrives as a plain string it will iterate over characters and produce garbage silently. If `CR` is entirely empty or null the `year_seq.min()` call will crash. The year regex silently assigns `0` to references where no year is found, which then pollutes the year distribution if not filtered out. +6) **Yes, high priority.** Ensure `CR` is a `list[str]` where each element is a properly formatted reference string. The year regex `r'\b\d{4},'` requires the year to be followed by a comma — ETL must ensure CR entries follow the WoS format "Author, Year, Journal, Vol, Page" for year extraction to work correctly across all sources. References with no detectable year should be filtered out rather than assigned year `0`. + +### get_relevantaffiliations.py +1) Ranks institutions by number of publications and draws a dot chart of the top-k affiliations. Depending on the `disambiguation` parameter, it either uses `AU_UN` (a cleaned and disambiguated university name field) or the raw `C1` affiliation strings. Returns the chart and a summary table. +2) **www.services**. +3) **AU_UN** or **C1** depending on the `disambiguation` parameter — only one is used per call. +4) `AU_UN` is a WoS-derived column that contains disambiguated university names — it does not exist natively in non-WoS sources and must be built by the ETL from `C1`. If `disambiguation == "no"`, `C1` is used directly, which is more portable across sources. +5) Crashes immediately if `AU_UN` is missing when `disambiguation == "yes"`, or if `C1` is missing when `disambiguation == "no"` — no guard exists for either case. Both columns are expected to be `list[str]` per row — plain strings will produce wrong results after `explode()`. The docstring mentions `num_of_authors` and `frequency` as parameter names but the actual parameters are `num_of_affiliations` and `disambiguation`, indicating copy-paste drift. +6) **Yes.** Ensure `C1` is present as a `list[str]` of affiliation strings — it is the primary input when `disambiguation == "no"` and the source for building `AU_UN` when `disambiguation == "yes"`. Ensure `AU_UN` is derived from `C1` during the ETL Transform phase and stored as a `list[str]` of cleaned university names. + +### get_relevantauthors.py +1) Ranks authors by number of publications, percentage of documents, or fractionalized count (where each author of a multi-authored paper gets a fractional credit), and draws a dot chart of the top-k authors. Returns the chart and a full ranking table. +2) **www.services**. +3) **AU** only. +4) **No** explicit DB checks, but `AU` is expected in WoS author format. The fallback `lambda x: x if isinstance(x, list) else []` silently replaces non-list values with an empty list instead of trying to parse them, which means authors from non-WoS sources arriving as delimited strings will be completely ignored. +5) Non-list `AU` values are silently dropped rather than parsed, so non-WoS sources that store authors as semicolon-delimited strings will produce an empty chart with no error. The `frequency` parameter values in the docstring (`"N. of Documents"`, `"Percentage"`, `"Fractionalized"`) do not match the actual values checked in the code (`"percentage"`, `"freq_measure"`), meaning the default `"N. of Documents"` always falls through to the raw count branch regardless of user selection. +6) **Yes.** Ensure `AU` is present and correctly formatted as a `list[str]` per row — non-list values are silently ignored, producing wrong author counts. Ensure author names follow a consistent format (e.g. `"Surname, Firstname"`) across all sources to avoid duplicate entries for the same author. + +### get_relevantsources.py +1) Ranks journals or sources by number of publications and draws a dot chart of the top-k sources. Returns the chart and a full ranking table. +2) **www.services**. +3) **SO** only. +4) **No** explicit DB checks, but `SO` is the WoS tag for journal/source name. Sources using a different column name will crash immediately. +5) Crashes if `SO` is missing entirely. No check is performed on whether `SO` values are plain strings — if they arrive as lists the `value_counts()` will produce wrong results. No guard against an empty dataset after `dropna()`. +6) **Yes.** Ensure `SO` is present, non-null, and a plain string representing the journal or source name. Standardize casing consistently across sources (e.g. always uppercase) to avoid the same journal appearing multiple times under different capitalizations. + +### get_sourceslocalimpact.py +1) Calculates impact scores (h-index, g-index, m-index, total citations, number of papers) for each journal or source, ranks them by the chosen metric, and draws a horizontal bar chart of the top-k sources. Returns the chart and the full ranking table. +2) **www.services**. +3) **SO**, **TC**, **PY**. +4) **No** explicit DB checks, but `SO`, `TC`, and `PY` are all WoS column tags. Sources using different names will crash immediately. +5) `h_calc` and `g_calc` are applied via `transform` instead of `agg`, which calls them once per row rather than once per group — this produces incorrect index values silently. `TC` and `PY` are cast with `errors='coerce'` and rows with nulls are dropped, but no warning is raised if a large fraction of rows is lost. Division by zero is possible in `m_index` if `today == PY_start - 1`, though extremely unlikely. +6) **Yes.** Ensure `SO` is present as a non-null string, `TC` is numeric with nulls replaced by `0`, and `PY` is a valid 4-digit year. The `h_calc` and `g_calc` functions need to be fixed to use `agg` instead of `transform` to produce correct index values — this is a bug in the function itself that the ETL cannot work around. + +### get_sourcesproduction.py +1) Computes annual or cumulative publication counts per journal over time, selects the top-k sources by total output, and draws a multi-line chart showing each source's production trajectory. Returns the chart and the year-by-source matrix. +2) **www.services**. +3) **SO**, **PY** — both accessed directly and also passed to `cocMatrix()` internally. +4) **No** explicit DB checks, but `SO` and `PY` are WoS column tags. `cocMatrix()` is also built assuming WoS-style input. +5) `PY` is cast to `str` before `cocMatrix()` and back to `int` after — if `PY` contains nulls or non-numeric values this double cast will crash. If all papers belong to a single source `WSO.shape[1] == 1` is handled, but if `SO` is entirely missing `cocMatrix()` will crash with no clear error. No guard against `num_of_sources_production` being zero. +6) **Yes.** Ensure `SO` is present as a non-null string and `PY` is a valid 4-digit integer — both are cast and used in matrix operations that will crash silently or produce wrong results if the types are incorrect. + +### get_status.py +1) Two small utility functions: `get_status()` converts a list of missing-value percentages into human-readable status labels (Excellent, Good, Acceptable, Poor, Critical, Completely missing), and `get_status_color()` maps each status label to a CSS background color for dashboard display. Used to give a quick visual quality assessment of the dataset columns. +2) **www.services**. +3) **None** — this file does not access any DataFrame column. It only processes a list of percentages passed in as a parameter. +4) **No**. +5) No input validation on `missing_percentage` — if a non-numeric value is passed, the comparisons will fail silently and return `"Unknown"`. The two functions are tightly coupled by string labels but there is no shared constant, so a typo in one function would break the other silently. +6) **No direct ETL relevance.** This is a pure utility file for dashboard display. The ETL pipeline does not need to produce any specific column for this function to work. + +### get_table.py +1) Generates a metadata completeness report for the loaded dataset. It counts missing values, empty strings, and empty lists for every column, calculates the percentage of missing data per column, assigns a quality status (Excellent, Good, Acceptable, Poor, Critical, Completely missing), and displays the results as both a Plotly table and an interactive HTML data table with export buttons. This is the main data quality dashboard panel — it gives users an immediate overview of which columns are well populated and which need attention. +2) **www.services**; **get_status** (imported explicitly for status label and color functions). +3) **All columns present in the DataFrame** — it iterates over every column to compute missing value counts. The `column_descriptions` dictionary defines a fixed set of expected columns: `AB, AU, AU_UN, DB, DE, DT, LA, PU, PY, RP, SC, SO, SR, TC, TI, UT, C1, CR, OI, AU1_UN, EM, DI, BP, EP, SN, VL, ID, FU, FX, JI, OA, IS, PMID`. +4) **No** explicit DB checks, but the `column_descriptions` dictionary is entirely based on WoS field tags. Non-WoS columns not in this dictionary will still appear in the table but with no human-readable description. +5) The status color mapping in `create_plotly_table` uses `"Fair"` and `"Poor"` as keys, but `get_status()` never produces `"Fair"` — it produces `"Acceptable"` instead. This means the color for `"Acceptable"` rows will always fall through to `"white"`, losing the intended visual warning. Missing values are counted as NaN, empty string, single space, or empty list — but not `None`, which may slip through undetected. +6) **Yes.** The ETL must ensure all mandatory columns defined in the schema are present in the DataFrame — even if empty — so this function can report their completeness status correctly. Columns populated with `None` instead of `""` or `[]` will be undercounted in the missing value report, giving a false "Excellent" status. + +### get_thematicevolution.py +1) Tracks how research themes evolve over time by splitting the dataset into user-defined time periods, running a full thematic map analysis on each period, and then computing inclusion, weighted inclusion, and stability indices to measure how strongly themes from one period carry over into the next. The results are visualised as an interactive network where nodes are research clusters and directed edges show thematic continuity between periods. Also returns a summary table of cluster transitions and the raw thematic map results per period. One of the most complex files in the codebase — it internally calls `thematic_map()`, `timeslice()`, and `plot_thematic_evolution()`. +2) **www.services** +3) **None directly** — all column access is delegated to `thematic_map()` and `timeslice()`. `PY` is the only column accessed directly inside `timeslice()`. +4) **Yes**. The field names `ID`, `DE`, `TI`, `AB` are WoS column tags passed to `thematic_map()` internally. Non-WoS sources using different names will produce empty results. `thematic_map()` also assumes WoS-style keyword formatting for `ID` and `DE`. +5) If `years` is not provided the function raises a `ValueError` immediately — no default is computed. If any time period produces zero clusters, the function prints a message and returns early with no chart and no clear error to the user. The `thematic_map()` return value is assumed to be a tuple but is also checked for being a dict — this inconsistency suggests the internal API is unstable and may break silently depending on the version. Temporary HTML file is never deleted. +6) **Yes, high priority.** Ensure `PY` is non-null and numeric — it is the only column used directly by `timeslice()` to split the data into periods, and wrong values will produce empty or misaligned time slices. Ensure `ID` and `DE` are `list[str]` — they are the primary inputs to `thematic_map()` for keyword network construction. For non-WoS sources that lack `ID`, populate it as `[]` per the null contract, but ensure the UI defaults the field to `DE` for those sources since an all-empty `ID` column will produce no usable analysis. + +### get_thematicmap.py +1) A thin wrapper around the internal `thematic_map()` function. It passes all parameters directly to `thematic_map()`, which builds a keyword co-occurrence network, detects research clusters, and positions them on a centrality vs. density bubble chart. Returns the map figure, the HTML network file path, and three DataFrames: term-level data, cluster-level data, and document-to-cluster assignments. +2) **www.services**. +3) **None directly** — all column access is delegated entirely to `thematic_map()`. +4) **Yes**. The field names `ID`, `DE`, `TI`, `AB` are WoS column tags passed through to `thematic_map()`. Non-WoS sources using different names will produce empty results. +5) This file has no error handling of its own — if `thematic_map()` crashes or returns unexpected output, the exception propagates directly to the caller with no useful context. The return value assumes `thematic_map()` always returns exactly 5 values — if the internal API changes this will break silently. +6) **Yes.** Ensure `ID` and `DE` are `list[str]` — they are the primary inputs to `thematic_map()`. For non-WoS sources that lack `ID`, populate it as `[]` and ensure the UI defaults the field to `DE`, since an all-empty `ID` column will produce no usable analysis. Ensure `TI` and `AB` are non-null strings if those fields are selected. + +### get_threefieldplot.py +1) Generates a Sankey diagram showing relationships between three user-selected bibliographic fields (e.g. authors → keywords → journals). For each field it builds a document-attribute matrix, computes co-occurrence counts between adjacent fields, and draws the flows as proportional bands connecting the three columns. Optionally derives extra columns like `CR_SO`, `AU_CO`, `AB_TM`, `TI_TM` via internal functions before building the matrices. +2) **www.services**; **textwrap**. +3) **None directly** — all column access is delegated to `cocMatrix()`, `metaTagExtraction()`, and `term_extraction()`. The actual columns consumed depend entirely on which fields the user selects. +4) **Yes**. All field names (`AU`, `DE`, `ID`, `SO`, `CR`, `TI`, `AB`, `WC`, `AU_CO`, `CR_SO`) are WoS column tags passed to `cocMatrix()`. Non-WoS sources using different names will produce empty matrices and a blank Sankey diagram with no error. +5) If any of the three `cocMatrix()` calls returns an empty matrix, the dot product for edge computation will silently produce an empty edge list and the diagram will render blank with no explanation. If `metaTagExtraction()` fails to extract `CR_SO` or `AU_CO`, those fields will be missing and `cocMatrix()` will crash immediately. +6) **Yes.** Ensure all potential field columns (`AU`, `DE`, `ID`, `SO`, `CR`, `TI`, `AB`, `C1`, `WC`) are present and correctly typed — `list[str]` for multi-value fields and `str` for scalar fields. Ensure `C1` is populated correctly so that `metaTagExtraction()` can derive `AU_CO` and `CR_SO` when those fields are selected. + +### get_treemap.py +1) Counts the most frequent words or keywords in a selected field, and displays them as an interactive treemap where each rectangle's size represents the word's frequency. For title (`TI`) and abstract (`AB`) fields it first runs text mining to extract meaningful terms before counting. Also returns a full frequency table. Contains an internal helper function `table_tag()` that handles the actual word extraction and counting. +2) **www.services**. +3) **SR** (used inside `table_tag()` for deduplication); **DE**, **ID**, **TI**, **AB** (whichever is passed as `word_type`). +4) **No** explicit DB checks, but field names `DE`, `ID`, `TI`, `AB` are all WoS tags. Non-WoS sources using different names will produce empty results. +5) `SR` must be present for deduplication — if missing, `drop_duplicates(subset='SR')` crashes immediately. For `DE` and `ID`, `eval()` is called on string values — this is unsafe if the column contains arbitrary text instead of a properly formatted list string, and redundant if the ETL already guarantees `list[str]`. If `word_type` is not one of the handled cases, `text_data` will be an unprocessed column and the word extraction will silently produce wrong results. +6) **Yes.** Ensure `SR` is present and non-null. Ensure `DE` and `ID` are `list[str]` to eliminate the unsafe `eval()` call. Ensure `TI` and `AB` are non-null strings if those fields are selected. + +### get_trendtopics.py +1) Identifies which words or keywords were most prominent in each time period by computing the median publication year for each term and plotting them as a bubble chart (term vs. year, bubble size = frequency). For title and abstract fields it first runs text mining before counting. Also returns the full trend data table. Contains an internal helper `field_by_year()` that builds the co-occurrence matrix and computes year quantiles per term. +2) **www.services**. +3) **PY** (accessed directly inside `field_by_year()`); **DE**, **ID**, **TI**, **AB**, or any derived field like `TI_TM`, `AB_TM` depending on `field_tt`. +4) **No** explicit DB checks, but field names are all WoS tags. Non-WoS sources using different names will produce empty results. +5) `PY` is used directly in `np.repeat(df['PY'], x)` without null checks — missing or non-numeric values will cause a crash. If the selected field is empty or missing, `cocMatrix()` will return an empty matrix and `np.quantile()` will crash on an empty array. If `term_extraction()` fails, the derived `TI_TM` or `AB_TM` column will be missing and the function crashes immediately. +6) **Yes.** Ensure `PY` is non-null and numeric — it is used directly in quantile calculations per term. Ensure `DE`, `ID`, `TI`, `AB` are correctly typed (`list[str]` for `DE`/`ID`, `str` for `TI`/`AB`) depending on the selected field. + +### get_wordcloud.py +1) Generates an interactive word cloud rendered as a pyvis HTML network where each word is a text-only node, sized and coloured by frequency. It calls table_tag() (defined locally, identical to the one in get_frequentwords.py) to count terms, places nodes at random polar coordinates within a compact radius, applies ForceAtlas2 physics for slight jitter, saves the result to a temp HTML file, and returns the filename plus a full frequency table. +2) **www.services**. +3) **SR, and one of DE, ID, TI, AB**. +4) ID is WoS-exclusive, same as in get_frequentwords.py and get_wordfrequency.py. || SR deduplication assumes SR is always populated, same as get_frequentwords.py. || eval() on DE/ID strings, same unsafe pattern as get_frequentwords.py. +5) remove_terms silently not applied for TI/AB, inherited from table_tag() — same bug as in get_frequentwords.py. +6) **Yes**. Same requirements as get_frequentwords.py: SR must be populated for all rows. || DE and ID must be list[str] to eliminate eval(). || ID must be [] for non-WoS sources. || TI and AB must be str, not NaN/None. || + +### get_wordfrequency.py +1) Plots word/keyword frequency over time as a multi-line chart, one line per term. It calls term_extraction() for free-text fields (TI/AB) or reads keyword columns directly (DE/ID), then passes data to keyword_growth() which builds a year × term frequency DataFrame (cumulative or per-year). Two helpers are defined locally: trim_years() (fills a year range with observed frequencies and optionally cumulates) and keyword_growth() (parses terms, applies synonym merging and stopword removal, selects top-N terms, and assembles the final time series). +2) **www.services**. +3) **PY, and one of DE, ID, TI, AB depending on field_wf** +4) ID is WoS-exclusive (Keywords Plus). Same risk as in get_frequentwords.py — passing field_wf="ID" on non-WoS data silently operates on an empty column. || keyword_growth() splits on sep=";" by default, which matches WoS keyword serialisation. Scopus uses "; " (with trailing space) so terms may arrive with leading spaces (e.g. " MACHINE LEARNING") that survive the .upper() call and prevent correct term matching or synonym replacement. +5) data['Year'].min() and data['Year'].max() in keyword_growth() will raise a ValueError if PY is empty after dropna. No guard exists for empty DataFrames after filtering. || Leading/trailing whitespace in terms not stripped before Counter/groupby. Terms like " MACHINE LEARNING" and "MACHINE LEARNING" are counted separately, fragmenting frequencies. +6) **Yes**. The ETL must: Cast PY to int with no nulls — required by keyword_growth() for year range construction. || Ensure DE and ID are list[str] so the isinstance(x, str) branch in keyword_growth() is never taken, avoiding semicolon-split issues entirely. || Ensure ID is [] for non-WoS sources. || TI and AB must be str, not NaN/None. + +### get_worldmapcollaboration.py +1) Builds an interactive historiographic network map showing citation relationships between key papers over time. It calls metaTagExtraction() and histNetwork() from services to construct the citation graph, then histPlot() for the initial layout. It then rebuilds the graph with networkx, optionally removes isolated nodes, positions nodes on a timeline (x = year, y = cluster), computes node sizes from local citation scores (LCS), and renders an interactive pyvis HTML network saved to a temp file. Returns the plot object, a metadata DataFrame, and the temp HTML filename. +2) **www.services**. +3) **SR, CR, DOI, AU, TI, DE, ID, PY** +4) histNetwork() parses CR using WoS reference string format ("Author, Year, Journal, Vol, Page"). This is the most WoS-specific dependency in the entire codebase. Non-WoS CR strings will produce zero or wrong citation matches, resulting in an empty or disconnected graph. || metaTagExtraction(df, "SR") regenerates SR from WoS-style author/year/journal fields. If SR was not correctly populated by ETL, this call may produce malformed node identifiers that break edge matching. || node_label="ID" and node_label="DE" are swapped. The code maps "ID" → row.get("Author_Keywords") and "DE" → row.get("KeywordsPlus"), which is the reverse of the standard schema (DE = author keywords, ID = Keywords Plus). This is a WoS internal naming artefact from histNetwork() output columns. +5) DE/ID label mapping is inverted (as noted above). A user selecting node_label="DE" gets Keywords Plus, not author keywords. Needs a one-line swap or renaming in histNetwork() output. || eval() used again for DE/ID node labels (same pattern as get_frequentwords.py). Unsafe and redundant if ETL guarantees list[str]. || hist_data["GCS"] cast to int in tooltip without null guard — if GCS is NaN, int(row.get('GCS', 0)) will raise a ValueError because int(float('nan')) fails in Python. +6) **Yes, high priority**. Populate SR correctly as "FirstAuthor, Year, Journal" — it is the primary node key for the entire graph. || Normalise CR entries to WoS reference string format, as histNetwork() depends on it for edge construction. This is the single highest-risk dependency in the project for non-WoS sources. || Ensure DOI is str, empty string "" if missing (not NaN). || Ensure DE and ID are list[str] to eliminate the eval() calls. + + + +--- + +## Summary + +### All columns required across the entire codebase +| Column | Used by | +|--------|---------| +| AU | biblionetwork.py, get_authorlocalimpact.py (core direct dependency), get_authorproductionovertime.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_collaborationnetwork.py (core direct dependency), get_correspondingauthorcountries.py (indirect dependency via metaTagExtraction), get_historiograph.py (secondary direct dependency), get_localcitedauthors.py (core direct dependency), get_lotkalaw.py (core direct dependency), get_maininformations.py (core direct dependency), get_relevantauthors.py (core direct dependency), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via cocMatrix), get_worldmapcollaboration.py (secondary direct dependency) | +| AB | get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency), get_frequentwords.py (conditional direct dependency), get_table.py (schema-level expected column), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix / term_extraction), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency), get_wordfrequency.py (conditional direct dependency) | +| TI | get_authorproductionovertime.py (secondary direct dependency), get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency), get_frequentwords.py (conditional direct dependency), get_historiograph.py (secondary direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (indirect dependency via thematic_map), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix / term_extraction), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency), get_wordfrequency.py (conditional direct dependency), get_worldmapcollaboration.py (secondary direct dependency)| +| PY | get_affiliationproductionovertime.py (core direct dependency), get_annualproduction.py (core direct dependency), get_authorlocalimpact.py (core direct dependency), get_authorproductionovertime.py (core direct dependency), get_averagecitations.py (core direct dependency), get_citeddocuments.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_co_occurence_network.py (indirect dependency), get_countriesproductionovertime.py (core direct dependency), get_filters.py (core direct dependency), get_historiograph.py (core direct dependency), get_localciteddocuments.py (core direct dependency), get_maininformations.py (core direct dependency), get_sourceslocalimpact.py (core direct dependency), get_sourcesproduction.py (core direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (core direct dependency via timeslice), get_trendtopics.py (core direct dependency), get_wordfrequency.py (core direct dependency), get_worldmapcollaboration.py (core direct dependency)| +| AU_UN | get_affiliationproductionovertime.py (core direct dependency), get_collaborationnetwork.py (indirect derived dependency via metaTagExtraction), get_relevantaffiliations.py (conditional core direct dependency, used when disambiguation == "yes"), get_table.py (schema-level expected column) | +| TC | get_authorlocalimpact.py (core direct dependency), get_authorproductionovertime.py (core direct dependency), get_averagecitations.py (core direct dependency), get_citedcountries.py (core direct dependency), get_citeddocuments.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_filters.py (core direct dependency), get_localcitedauthors.py (core direct dependency), get_localciteddocuments.py (core direct dependency), get_maininformations.py (core direct dependency), get_sourceslocalimpact.py (core direct dependency), get_table.py (schema-level expected column) | +| SO | get_authorproductionovertime.py (secondary direct dependency), get_bradfordlaw.py (core direct dependency), get_filters.py (core direct dependency), get_maininformations.py (core direct dependency), get_relevantsources.py (core direct dependency), get_sourceslocalimpact.py (core direct dependency), get_sourcesproduction.py (core direct dependency), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via cocMatrix) | +| C1 | get_citedcountries.py (indirect dependency via metaTagExtraction), get_collaborationnetwork.py (indirect dependency via metaTagExtraction), get_correspondingauthorcountries.py (indirect dependency via metaTagExtraction), get_countriesproduction.py (indirect dependency via metaTagExtraction), get_countriesproductionovertime.py (indirect dependency via metaTagExtraction), get_maininformations.py (indirect dependency via metaTagExtraction, needed only if AU_CO must be derived), get_relevantaffiliations.py (conditional core direct dependency, used when disambiguation == "no"; also source field for deriving AU_UN), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via metaTagExtraction for AU_CO / CR_SO derivation) | +| RP | get_citedcountries.py (indirect dependency via metaTagExtraction), get_correspondingauthorcountries.py (indirect dependency via metaTagExtraction), get_maininformations.py (indirect dependency via metaTagExtraction, needed only if AU_CO must be derived), get_table.py (schema-level expected column)| +| SR | get_citeddocuments.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_frequentwords.py (core direct dependency), get_historiograph.py (core direct dependency / also regenerated via metaTagExtraction), get_localcitedauthors.py (indirect / regenerated via metaTagExtraction), get_localciteddocuments.py (core direct dependency / also regenerated internally), get_table.py (schema-level expected column), get_treemap.py (core direct dependency for deduplication), get_wordcloud.py (core direct dependency for deduplication), get_worldmapcollaboration.py (core direct dependency / also regenerated via metaTagExtraction) | +| DI | get_authorproductionovertime.py (secondary direct dependency), get_citeddocuments.py (secondary direct dependency), get_localciteddocuments.py (secondary direct dependency), get_table.py (schema-level expected column)| +| DOI | get_historiograph.py (secondary direct dependency), get_worldmapcollaboration.py (secondary direct dependency) | +| CR | get_clusteringcoupling.py (indirect dependency via couplingMap), get_cocitation.py (indirect dependency via biblionetwork), get_historiograph.py (core direct dependency via histNetwork; WoS-style parsing dependency), get_localcitedreferences.py (core direct dependency), get_localcitedsources.py (indirect dependency via metaTagExtraction), get_maininformations.py (core direct dependency), get_referencesspectroscopy.py (core direct dependency), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via cocMatrix / metaTagExtraction), get_worldmapcollaboration.py (core direct dependency via histNetwork; WoS-style parsing dependency)| +| DE | get_clusteringcoupling.py (indirect dependency via couplingMap), get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency), get_frequentwords.py (conditional direct dependency), get_historiograph.py (secondary direct dependency), get_maininformations.py (core direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (indirect dependency via thematic_map), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency), get_wordfrequency.py (conditional direct dependency), get_worldmapcollaboration.py (secondary direct dependency; DE/ID mapping inversion noted) | +| ID | get_clusteringcoupling.py (indirect dependency via couplingMap), get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency; default WoS-specific field), get_frequentwords.py (conditional direct dependency), get_historiograph.py (secondary direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (indirect dependency via thematic_map; WoS-default field), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency; WoS-specific field), get_wordfrequency.py (conditional direct dependency; WoS-specific field), get_worldmapcollaboration.py (secondary direct dependency; DE/ID mapping inversion noted) | +| WC | get_co_occurence_network.py (indirect dependency), get_threefieldplot.py (indirect dependency via cocMatrix) | +| CR_AU | get_cocitation.py (indirect derived dependency via metaTagExtraction / biblionetwork) | +| CR_SO | get_cocitation.py (indirect derived dependency via metaTagExtraction / biblionetwork), get_localcitedsources.py (indirect derived dependency via metaTagExtraction; then used directly for counting), get_threefieldplot.py (indirect derived dependency via metaTagExtraction) | +| AU_CO | get_collaborationnetwork.py (indirect derived dependency via metaTagExtraction), get_correspondingauthorcountries.py (indirect derived dependency via metaTagExtraction), get_countriesproduction.py (indirect derived dependency via metaTagExtraction), get_countriesproductionovertime.py (indirect derived dependency via metaTagExtraction), get_maininformations.py (indirect derived dependency via metaTagExtraction), get_threefieldplot.py (indirect derived dependency via metaTagExtraction) | +| AU1_CO |get_correspondingauthorcountries.py (indirect derived dependency via metaTagExtraction), get_table.py (schema-level expected column) | +| DB | get_database.py (core direct dependency), get_table.py (schema-level expected column) | +| LA | get_filters.py (core direct dependency), get_table.py (schema-level expected column) | +| DT | get_filters.py (core direct dependency), get_table.py (schema-level expected column) | +| PU | get_table.py (schema-level expected column) | +| SC | get_table.py (schema-level expected column) | +| UT | get_table.py (schema-level expected column) | +| OI | get_table.py (schema-level expected column) | +| EM | get_table.py (schema-level expected column) | +| BP | get_table.py (schema-level expected column) | +| EP | get_table.py (schema-level expected column) | +| SN | get_table.py (schema-level expected column) | +| VL | get_table.py (schema-level expected column) | +| FU | get_table.py (schema-level expected column) | +| FX | get_table.py (schema-level expected column) | +| JI | get_table.py (schema-level expected column) | +| OA | get_table.py (schema-level expected column) | +| IS | get_table.py (schema-level expected column) | +| PMID | get_table.py (schema-level expected column) | +| TI_TM | get_threefieldplot.py (indirect derived dependency via term_extraction) | +| AB_TM | get_threefieldplot.py (indirect derived dependency via term_extraction) | + +### Files that need patching +| File | Line | Issue | +|------|------|-------| +| histnetwork.py | 37 | if db == "Web_of_Science" | +| biblionetwork.py | 94 | if db == "web_of_science" | +| format_functions.py | multiple | if source == "Web_of_Science" | +| couplingmap.py | multiple | Root dependency: assumes WoS-style SR/CR reconstruction through metaTagExtraction(), biblionetwork(), and histNetwork(); breaks bibliographic coupling on non-WoS sources | +| get_authorproductionovertime.py | 28 |fallback str(x).split(",") assumes WoS comma-separated author format +| get_citedcountries.py | 17 | metaTagExtraction(df, "AU1_CO") assumes WoS-style affiliation parsing | +| get_citeddocuments.py | 17 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style fields | +| get_clusteringcoupling.py | 10 | couplingMap() built for WoS-style SR and CR | +| get_co_occurence_network.py | 38 | field names ID, DE, TI, AB, WC are WoS tags — ID is WoS-exclusive | +| get_cocitation.py | 47 | metaTagExtraction(M, Field="CR_AU") and metaTagExtraction(M, Field="CR_SO") parse WoS-style reference strings | +| get_collaborationnetwork.py | 55, 63 | metaTagExtraction(M, Field="AU_UN") and metaTagExtraction(M, Field="AU_CO") assume WoS-style affiliation strings | + get_correspondingauthorcountries.py | 16,17 | metaTagExtraction(df, Field="AU_CO") and (df, Field="AU1_CO") assume WoS-style affiliation parsing | +| get_countriesproduction.py | 15 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing | +| get_countriesproductionovertime.py | 15 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing | +| get_database.py | 18-29 | database = "Web of Science" returns display string instead of standardized identifier (e.g. "WEB_OF_SCIENCE") | +| get_factorialanalysis.py | 42 | field="ID" default assumes WoS Keywords Plus — field does not exist in non-WoS sources | +| get_filters.py | 77-78 | LA and DT filter values assume WoS vocabulary ("Article", "English") — non-WoS sources may use different values | +| get_frequentwords.py | 106 -119 | drop_duplicates(subset='SR') assumes SR always populated — crashes with KeyError if SR missing, eval(x) on DE/ID assumes WoS-style Python list serialization — breaks with Scopus semicolon-delimited strings | +| get_historiograph.py | 30, 153-159 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style author/year/journal fields eval() on Author_Keywords / KeywordsPlus — unsafe; DE/ID label mapping is inverted (WoS naming artefact) | +| get_localcitedauthors.py | 22, 29 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style author/year/journal fields / histNetwork() parses CR assuming WoS format Author, Year, Journal, Vol, Page| +| get_localciteddocuments.py | 16, 29 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style fields / histNetwork() parses CR assuming WoS format Author, Year, Journal, Vol, Page | +| get_localcitedsources.py | 17 | metaTagExtraction(df, "CR_SO") parses source names from WoS-style reference strings | +| get_maininformations.py | 101 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing (C1/RP) to derive country per author | +| get_referencesspectroscopy.py | 35 | regex r'\b\d{4},' extracts year assuming WoS reference format Author, Year, Journal, Vol, Page — non-WoS formats produce zero year matches | +| get_relevantaffiliations.py | 20 | data["AU_UN"] is a WoS-derived column — does not exist natively in non-WoS sources and must be built by ETL from C1 | +| get_relevantauthors.py | 22 | fallback else [] silently drops non-list AU values — non-WoS sources with semicolon-delimited strings produce empty results | +| get_table.py | 91-125 | column_descriptions dictionary contains only WoS field tags — non-WoS columns appear without human-readable description | +| get_thematicevolution.py | 4, 98 | field="ID" default assumes WoS Keywords Plus — thematic_map() produces empty results on non-WoS sources | +| get_thematicmap.py | 4 | field="ID" default assumes WoS Keywords Plus — thematic_map() produces empty results on non-WoS sources | +| get_threefieldplot.py | 24, 26 | metaTagExtraction(df, "CR_SO") and metaTagExtraction(df, "AU_CO") assume WoS-style reference strings and affiliation parsing | +| get_treemap.py | 81, 91 | drop_duplicates(subset='SR') assumes SR always populated / eval(x) on DE/ID assumes WoS-style Python list serialization | +| get_trendtopics.py | 40, 105 | field_tt="ID" routed directly to cocMatrix() — WoS-exclusive field produces empty results on non-WoS sources / np.repeat(df['PY'], x) used without null check | +| get_wordcloud.py | 112, 125 | drop_duplicates(subset='SR') assumes SR always populated / eval(x) on DE/ID assumes WoS-style Python list serialization | +| get_wordfrequency.py | 135 | x.split(sep) with default sep=";" assumes WoS keyword serialization — Scopus uses "; " producing terms with leading spaces | +| get_worldmapcollaboration.py | 12 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing (C1/RP) to derive country per author | + +## Notes +- `metatagextraction.py` is the primary root dependency for most WoS-specific parsing issues. + It reconstructs derived fields such as `AU_CO`, `AU1_CO`, `AU_UN`, `CR_SO`, `CR_AU`, and `SR`. + Patching this file first resolves cascading failures across many caller modules. + +- `couplingmap.py` is a secondary root dependency for bibliographic coupling workflows. + It relies on WoS-style `SR` and `CR` normalization through `metaTagExtraction()`, + `biblionetwork()`, and `histNetwork()`. + Caller-side fixes alone are insufficient if coupling normalization remains WoS-dependent. + + +# Bibliometrix-Python Codebase Audit + +## Purpose +This document maps every file in services/ and functions/ to the columns it depends on and any hardcoded WoS logic it contains. +It is used to verify that our ETL pipeline produces all required columns and to track which files need patching. + +--- + +## www/services/ +1) what it does +2) dependencies +3) columns used +4) WoS-specific logic +5) issues found +6) relevant for ETL: yes/no + +### biblionetwork.py +1) Takes the bibliographic DataFrame and builds a matrix showing how items (authors, sources, references, countries) are connected to each other. For example, two authors are "connected" if they cite the same references. It's the core function for generating all network analyses in the dashboard. +2) **utils.py, cocmatrix.py**. +3) **AU, CR, SO, ID, DE, DB** and derived ones. +4) It has **db_name == "SCOPUS"**. +5) If any of the above columns are absent (AU, CR, etc.), cocMatrix() will fail because it will try to read that column from the DataFrame without finding it. Python will throw a KeyError and the whole things crashes. +6) **Yes**. Our ETL is responsible for producing the DataFrame that gets fed into functions like this one, if it fails to include AU, CR or whatever other column in the output (even as an empty list []) this function crashes immediatly. + +### cocmatrix.py +1) Takes the bibliographic DataFrame and a column name (like AU or CR), and builds a matrix where rows are articles and columns are unique items (authors, keywords, references etc.). Each cell is 1 if that article contains that item, 0 otherwise. It's the building block that biblionetwork.py calls to create all its networks. +2) **utils.py**. +3) **SR, CR, AU, ID, DE, TI, AB** and derived ones. +4) No explicit DB checks. +5) It will crash if SR is missing (M.index = M["SR"] throws a KeyError immediately) and if the requested Field column is missing, it just prints a message and returns None (which then causes biblionetwork.py to crash when it tries to use that None because there is no error handling between the two functions: biblionetwork.py calls cocMatrix() and stores the result in WA; if the column is missing, cocMatrix() prints a message and returns None; biblionetwork.py doesn't check if WA is None — it immediately uses it in crossprod(WA, WA); crossprod tries to do matrix multiplication on None, which crashes with a TypeError). +6) **Yes**, SR must be present and correctly computed. + +### couplingmap.py +1) Builds and visualizes a "coupling map" — a bubble chart where clusters of related documents, authors, or sources are plotted by centrality vs impact. It combines network analysis, citation scoring, and cluster labeling into one visualization. It's one of the more complex files — it orchestrates many other services together. +2) **utils.py, cocmatrix.py, biblionetwork.py, termextraction.py, networkplot.py, histnetwork.py, metatagextraction.py, tabletag.py**. +3) **SR, AU, TC, DI, PY, DE, ID, TI, AB, SO**. +4) No explicit DB checks. +5) It will crash if SR (crashes immediately at metaTagExtraction(df, "SR")), TC (crashes in localCitations() at M['TC'].fillna(0)), AU (crashes in localCitations() at M['AU'].explode()), DI and PY (crashes when building the LCS output DataFrame) are missing. +6) **Yes**. SR, TC, AU, DI, PY, SO must all be present and correctly typed. + +### format_functions.py +1) It takes raw bibliographic data from any supported source (WoS, Scopus, PubMed, Dimensions, Lens, Cochrane) and converts it into a standardized dictionary with WoS-style column names. It has one formatting function per column (format_au_column, format_cr_column etc.) and a main entry point process_single_file() that calls all of them and assembles the final output. **This is the most important file for our ETL, it's basically a rough draft of what the ETL needs to be.** The project specs asks us to build a clean, robust version of what this file is already attempting. So rather than starting from scratch, for our ETL we should: study this file carefully to understand the existing column mappings; replace the fragile direct access (entry['Abstract']) with safe .get() calls; ensure null handling throughout (empty string "" or [] instead of None); make sure SR is always correctly computed. +2) **utils.py, parsers.py**. +3) **AB, AF, AU, AU_UN, AU1_UN, BP, EP, CR, C1, DB, DE, DI, DT, EM, FU, FX, IS, JI, ID, LA, OA, OI, PMID, PU, PY, RP, SC, SN, SO, SR, TC, TI, UT, VL** +4) **Yes**, every single formatting function branches on source (Web_of_Science, Scopus, PubMed, Dimensions, The_Lens, Cochrane) and file_type. This is basically the dispatcher that the specs asks us to build. +5) Yes, several functions access raw source columns directly without safety checks (e.g. entry['Abstract'], entry['Author full names']) which will crash with a KeyError if the raw file has different column names than expected. +6) **Yes**. + +### histnetwork.py +1) Builds a historical citation network. It figures out which papers in the dataset cite other papers in the same dataset (called "Local Citation Score" or LCS). It has two separate implementations: one for WoS and one for Scopus, and returns a network matrix plus citation statistics. +2) **utils.py, cocmatrix.py**. +3) **DB, DI, CR, TC, PY, SR, SR_FULL, TI, DE, ID, AU, BP, EP, LCS** +4) **Yes**. It explicitly checks **db == "Web_of_Science"** or **db == "Scopus"** and calls completely different functions for each. If DB contains anything else (e.g. "PUBMED", "DIMENSIONS"), it prints "Database not compatible" and returns None, meaning it silently fails for any source other than WoS and Scopus. +5) It will crash if: CR is missing (returns None immediately); SR_FULL is missing (crashes in the WoS branch when building LABEL); PY, AU, BP, EP are missing (crashes in the Scopus branch during merges). +6) **Yes**. DB values must exactly match "Web_of_Science" or "Scopus" for this function to work at all, and CR, PY, SR, TC must all be correctly populated. + +### histplot.py +1) Takes the output of histNetwork() and draws a historical citation network chart: papers are plotted as bubbles positioned by publication year on the x-axis, with edges showing which papers cite which. It's purely a visualization function, it doesn't touch the raw DataFrame directly. +2) **utils.py, networkplot.py** +3) **None directly** from the bibliographic DataFrame, it only reads from histResults which is the output of histNetwork(). Internally it uses histResults['NetMatrix'] and histResults['histData'] which contain Paper, Title, Author_Keywords, KeywordsPlus. +4) **No**. +5) **Only indirectly**, if histNetwork() failed to produce a proper NetMatrix or histData, this function will crash. But that's histNetwork()'s problem, not yours. +6) **No**. This is a pure visualization layer, it never reads our standardized DataFrame directly. + +### htmldownload.py +1) Takes an HTML file, renders it to a PNG screenshot using a headless Chrome browser, then overlays the bibliometrix logo on the bottom right. It's a utility for exporting visualizations as images. +2) **utils.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### igraph2vis.py +1) Converts an igraph graph object into an interactive vis.js network visualization, saves it as an HTML file, and returns the path. It handles node sizing, coloring by cluster, edge styling, and label overlap removal. Pure visualization utility. +2) **utils.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### metatagextraction.py +1) Computes derived columns that other functions need but that aren't in the raw data. Given a Field parameter, it generates one of: SR (short reference key), CR_AU (authors from cited references), CR_SO (sources from cited references), AU_CO (countries from affiliations), AU1_CO (first author's country), AU_UN (universities from affiliations). This is the file that generates most of the derived columns we shouldn't be our responsability based on the project specs (**must ask**). +2) **utils.py**. +3) **AU, JI, SO, PY, DB, CR, C1, RP**. +4) **Yes**, in multiple places: SR() checks db == "scopus" to format author names differently; CR_SO() checks db != "SCOPUS" to parse references differently; AU_UN() checks db in ["ISI", "OPENALEX"] for university extraction. +5) SR() crashes if AU, JI, SO, or PY are missing. AU_CO() and AU1_CO() crash if C1 and RP are both missing. CR_AU() and CR_SO() crash if CR is missing or not a list. +6) **Yes**. AU, JI, SO, PY, C1, RP must all be present and correctly typed for SR generation to work +**N.B.** This file is what *generates SR*, which is in our target schema and is required by almost every other function. So while we don't need to generate AU_CO, CR_AU etc., we do need to ensure AU, JI, SO, PY, C1, RP are correctly populated so that SR() inside this file can run without crashing. Our ETL feeds this function indirectly. + +### networkplot.py +1) Takes a co-occurrence/coupling matrix (the output of biblionetwork.py) and builds an interactive network graph from it — handling clustering, layout, node sizing, edge weights, and color assignment. It's the core visualization engine for all network analyses in the dashboard. +2) **utils.py, cocmatrix.py**. +3) None directly from the bibliographic DataFrame, it only receives a pre-built NetMatrix as input. +4) **No**. +5) **No**. +6) **No**. If our ETL produces correct columns so that biblionetwork.py and cocmatrix.py can build the matrix successfully, this function will work automatically. + +### parsers.py +1) Contains three raw file parsers, one each for Web of Science (parse_wos_data), PubMed (parse_pubmed_data), and Cochrane (parse_cochrane_data). Each parser reads a raw text file line by line and returns a list of dictionaries, one per article, with raw field tags as keys. This is the **Extract phase of the ETL**, it turns raw files into Python data structures before any column renaming or type enforcement happens. +2) **utils.py**. +3) **None**, these functions produce raw dictionaries from files, they don't read a DataFrame. +4) **Yes**, parse_wos_data is specifically built around the WoS plaintext format (two-letter tags, ER record separators, continuation lines starting with two spaces). The other parsers handle their own formats independently. +5) **No**. Even though parse_pubmed_data has a minor bug, if a continuation line appears before any key is set, key will be undefined and it will crash with a NameError. +6) **Yes**, these are our Extract phase building blocks, especially parse_wos_data and parse_pubmed_data. + +### plotlydownload.py +1) Takes an existing Plotly figure, adds the bibliometrix logo and a title, scales it up to high resolution, and exports it as a PNG image. Pure export utility. +2) **utils.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### savereport.py +1) Saves analysis results (tables and plots) into a formatted Excel file with multiple sheets. Each sheet contains a styled table and the corresponding visualization. It's the reporting/export layer of the dashboard. +2) **utils.py, plotlydownload.py, htmldownload.py**. +3) **None**. +4) **No**. +5) **No**. +6) **No**. + +### tabletag.py +1) Takes a specified column from the DataFrame, extracts all individual terms from it, counts their frequency, and returns a sorted dictionary of term → count. Used for word frequency analysis, keyword counts, citation counts etc. For AB and TI fields it first runs text mining to extract meaningful terms before counting. +2) **utils.py, termextraction.py**. +3) **SR, CR, DE, ID, C1, AB, TI (whichever is passed as tag parameter)** +4) **No** explicit DB checks. +5) If SR is missing it crashes immediately on drop_duplicates(subset=["SR"]); if whatever column is passes as tag is missing it crashes when trying to process it. +6) **Yes**, SR must always be present and all the tag columns (CR, DE, ID, AB, TI, C1) must exist and contain properly formatted lists for this function to work correctly. + +### termextraction.py +1) Takes a text column (TI or AB), cleans it, removes stopwords, optionally applies stemming, and extracts n-grams using scikit-learn's CountVectorizer. Stores the result as a new column TI_TM or AB_TM. Called by tabletag.py before word frequency counting. +2) **utils.py**. +3) **TI** (default), **AB** (passed in by tabletag.py). +4) **No** +5) It crashes at M[field].astype(str) if whichever column is passed as field is absent. +6) **Yes**, both TI and AB must be present and populated as strings. + +### thematicmap.py +1) Builds a thematic map, a bubble chart plotting research clusters by their "centrality" vs "density". It combines keyword co-occurrence network analysis, community detection, and cluster characterization into one visualization. One of the most complex files in the codebase, it orchestrates biblionetwork, termextraction, and networkplot together. +2) **utils.py, igraph2vis.py, termextraction.py, biblionetwork.py**. +3) **ID, DE, TI, AB, SR, TC, PY, DI, AU, SO**. +4) **No** explicit DB check, but heavily assumes WoS-style keyword fields (ID, DE) are properly populated. +5) If SR is missing, it crashes in cluster_assignment() immediately. If TC or PY missing, it crashes in cluster_assignment() when computing TCpY. If ID or DE missing, it crashes when building the network matrix via biblionetwork(). +6) **Yes**. ID, DE, TC, PY, DI, AU, SO, SR must all be present and correctly populated + +### utils.py +1) Central imports file for the entire services layer — every other service file starts with from .utils import *. It also defines two important shared things: the columns list (the master list of all expected DataFrame columns) and the ICONS dictionary for the UI. Think of it as the shared foundation the whole codebase builds on. +2) // +3) Defines the master columns list: AB, AF, AU, AU1_UN, AU_UN, BP, C1, CR, DB, DE, DI, DT, EM, EP, FU, FX, ID, IS, JI, LA, OA, OI, PMID, PU, PY, RP, SC, SN, SO, SR, TC, TI, UT, VL. +4) **No**. +5) **No**, it's an imports file. +6) **Yes**. This columns list is used in format_functions.py to add extra columns to each entry, so our ETL output must at minimum cover what's in the target schema. +**N.B.** The columns list defined here is our ground truth for what columns the codebase expects. Cross-referencing it with the target schema from the exam spec: +- Columns in utils.py but not in the target schema are AU1_UN, AU_UN, EM, FU, FX, OA, OI, PU, SC, SN. These are extra columns the codebase uses but our ETL doesn't need to guarantee; +- Column in the target schema but not in utils.py is SR_FULL, generated by metatagextraction.py as a derived column. + +## Master Column Dependency Table — services/ + +### Columns from target schema + +| Column | Used by | +|--------|---------| +| `DB` | biblionetwork, histnetwork, metatagextraction, cocmatrix | +| `SR` | cocmatrix, histnetwork, tabletag, thematicmap, metatagextraction | +| `AU` | biblionetwork, histnetwork, metatagextraction, thematicmap | +| `CR` | biblionetwork, cocmatrix, histnetwork, metatagextraction | +| `TI` | histnetwork, termextraction, thematicmap | +| `AB` | termextraction, thematicmap | +| `DE` | biblionetwork, thematicmap | +| `ID` | biblionetwork, thematicmap | +| `SO` | biblionetwork, histnetwork, metatagextraction | +| `JI` | metatagextraction | +| `PY` | histnetwork, thematicmap, metatagextraction | +| `TC` | histnetwork, thematicmap | +| `DI` | histnetwork, thematicmap | +| `C1` | metatagextraction | +| `RP` | metatagextraction | +| `AF` | format_functions | +| `BP` | histnetwork | +| `EP` | histnetwork | +| `VL` | format_functions | +| `IS` | format_functions | +| `LA` | format_functions | +| `DT` | format_functions | +| `PMID` | format_functions | +| `UT` | format_functions | + + +### Key takeaways + +- `SR` is the most critical column — used by almost everything, computed from `AU`, `JI`, `PY`, `SO` +- `DB` must exactly match `"Web_of_Science"` or `"Scopus"` where branch logic exists +- `CR` must be a parsed Python list, not a raw semicolon-separated string +- `AU` must also be a parsed Python list +- `SR`, `TC`, `PY` have no crash protection — must always be present and correctly typed + + +--- + +## functions/ +1) what it does +2) dependencies +3) columns used +4) WoS-specific logic +5) issues found +6) relevant for ETL: yes/no + + + +### get_affiliationproductionovertime.py +1) Counts cumulative publications per institution over time and draws a line chart for the top-k institutions. +2) **www.services**. +3) **AU_UN**, **PY**. +4) **Indirectly yes**: the function itself has no explicit WoS condition, but it depends on `AU_UN`, which is an internal/derived affiliation column usually built during WoS-oriented preprocessing. +5) Crashes if `AU_UN` is a plain string instead of a list, or if `PY` contains nulls. +6) **Yes**. `AU_UN` must be a `list[str]` per row, `PY` must be non-null and numeric. The ETL must build `AU_UN` from `C1` for non-WoS sources. + +### get_annualproduction.py +1) Counts how many papers were published each year and draws a line chart. +2) **www.services**. +3) **PY**. +4) **No**. +5) Crashes if `PY` is missing, non-numeric, or contains nulls. +6) **Yes**. `PY` must be present, non-null, and numeric. + + +### get_authorlocalimpact.py +1) Calculates impact scores (h-index, g-index, m-index, total citations) for each author and draws a bubble chart of the top authors. +2) **www.services**. +3) **AU**, **TC**, **PY**. +4) **No**. +5) Crashes if `AU` is not a list. Index calculations may produce wrong results due to incorrect use of `transform`. +6) **Yes**. `AU` must be a `list[str]`, `TC` and `PY` must be non-null and numeric. + + +### get_authorproductionovertime.py +1) Counts publications and citations per author per year and draws a scatter plot for the top-k authors. +2) **www.services**. +3) **AU**, **PY**, **TC** (core); **TI**, **SO**, **DI** (secondary — used for the document table, missing ones handled with a warning). +4) **No**, but the fallback author splitting uses a comma which is WoS-specific. +5) Wrong author names for non-WoS sources due to comma-based splitting. Missing `DI` silently returns an empty document table. +6) **Yes**. `AU` must be a `list[str]`, `PY` and `TC` must be numeric, `TI`, `SO`, `DI` must be present as strings. + + +### get_averagecitations.py +1) Calculates average citations per year and draws a line chart. +2) **www.services**. +3) **PY**, **TC**. +4) **No**. +5) Crashes if `PY` or `TC` are missing or non-numeric. Division by zero possible if `PY` equals the current year. +6) **Yes**. `PY` and `TC` must be present, non-null, and numeric. + + +### get_bradfordlaw.py +1) Applies Bradford's Law to rank journals by publications, divides them into three zones, and draws a log-scale chart highlighting the core journals. +2) **www.services**. +3) **SO**. +4) **No**. +5) Crashes if `SO` is missing. Null values in `SO` are silently ignored, potentially skewing zone boundaries. +6) **Yes**. `SO` must be present, non-null, and a string. + + +### get_citedcountries.py +1) Ranks countries by total or average citations and draws a dot chart of the top-k countries. +2) **www.services**. +3) **TC** (core); **C1** or **RP** (secondary — needed by `metaTagExtraction` to extract the country). +4) **Yes**. `metaTagExtraction` is built for WoS-style affiliation strings. +5) If `C1` or `RP` are missing or wrongly formatted, the chart will be empty with no clear error. `TC` non-numeric values will cause a crash. +6) **Yes**. `TC` must be numeric and non-null. `C1` or `RP` must be populated correctly for country extraction to work. + + +### get_citeddocuments.py +1) Ranks papers by total citations or citations per year and draws a dot chart of the top-k documents. +2) **www.services**. +3) **SR**, **TC**, **PY** (core); **DI** (secondary — included in the output table). +4) **No**, but `SR` is expected in WoS format. +5) Empty chart with no error if `SR` is missing. Crashes if `TC` or `PY` are non-numeric. Division by zero possible if `PY` equals current year. +6) **Yes**. `SR` must be correctly built by the ETL, `TC` and `PY` must be numeric, `DI` should be present as a string. + + +### get_clusteringcoupling.py +1) Groups papers or authors into clusters based on shared references or keywords and draws an interactive network. Saves the result as an HTML file. +2) **www.services**; **couplingMap**, **avoid_net_overlaps**. +3) **None directly** — all column access is delegated to `couplingMap`. +4) **Yes**. `couplingMap` is built for WoS-style data, especially `SR` and `CR`. +5) No validation on the network returned by `couplingMap` — a broken network causes a hard crash. Temporary HTML file is never deleted. +6) **Indirect**. The ETL must ensure `SR`, `CR`, `AU`, `TC`, `PY`, `DE`, `ID` are correctly formatted for `couplingMap` to work. + +### get_co_occurence_network.py +1) Builds a word or keyword co-occurrence network, plus a density heatmap, a statistics table, and a degree distribution plot. +2) **www.services**; **biblionetwork**, **network_plot**, **term_extraction**, **cocMatrix**, **avoid_net_overlaps**, **field_by_year**. +3) **None directly** — all column access delegated to internal functions. **PY** is accessed directly inside `field_by_year`. +4) **Yes**. Field names `ID`, `DE`, `TI`, `AB`, `WC` are WoS tags — non-WoS sources will produce an empty network. +5) If no field condition matches, the function silently returns nothing. Cluster colors are random on every run. Temporary HTML file is never deleted. +6) **Indirect**. The ETL must ensure `ID`, `DE`, `TI`, `AB`, `WC`, and `PY` are all present and correctly formatted. + +### get_cocitation.py +1) Builds a co-citation network — meaning it finds which references, authors, or sources are cited together most often across papers, and draws an interactive network where each bubble is a reference/author/source and lines show how often they are cited together. Also produces a density heatmap, a cluster statistics table, and a degree distribution plot. +2) **www.services**. +3) **None directly** — all column access is delegated to `biblionetwork` and `metaTagExtraction`. `CR`, `CR_AU`, and `CR_SO` are checked for existence but not read directly. +4) **Yes**. `biblionetwork` and `metaTagExtraction` are built for WoS-style reference strings. Non-WoS sources with differently formatted references will produce empty or broken networks. +5) If `biblionetwork` returns an empty network the function crashes with no clear error. Cluster colors are randomly generated on every run. Temporary HTML file is never deleted. +6) **Indirect**. The ETL must ensure `CR` is present as a properly split list of reference strings, and `CR_AU`/`CR_SO` can be derived from it if needed. + +### get_collaborationnetwork.py +1) Generates a collaboration network between authors, universities, or countries from a bibliographic DataFrame. It builds a graph via biblionetwork(), then produces four outputs: an interactive PyVis HTML network, a density heatmap, a cluster statistics table, and a normalized degree plot. +2) **www.services** +3) **AU**, then AU_UN, AU_CO. +4) **Yes**. There are two: metaTagExtraction() is called to derive AU_UN and AU_CO, this function is known to have hardcoded WoS parsing logic (affiliation string formats, country extraction patterns), so if affiliations from Scopus/PubMed are formatted differently it will silently produce empty or wrong values; biblionetwork() likely expects AU, AU_UN, AU_CO in WoS delimiter/format (semicolon-separated strings or lists). +5) **Yes**, metaTagExtraction() has hardcoded WoS affiliation parsing, so AU_UN and AU_CO will silently produce empty or wrong values for non-WoS sources. +6) **Indirectly**. The function itself is downstream of the ETL, but the pipeline must guarantee that AU is a proper list[str] and C1 is a list[str] with standardized affiliation strings so metaTagExtraction() can correctly extract AU_UN and AU_CO. + +### get_correspondingauthorcountries.py +1) Extracts the corresponding author's country (AU1_CO) and all author countries (AU_CO) via metaTagExtraction(), then counts articles, single-country publications (SCP), and multi-country publications (MCP) per country. Returns a horizontal bar chart and a summary table. +2) **www.services** +3) AU1_CO (derived), AU_CO (derived), AU, C1, RP (implicitly required by metaTagExtraction()). +4) **Yes**. Both metaTagExtraction(Field="AU_CO") and metaTagExtraction(Field="AU1_CO") rely on WoS-style affiliation parsing of C1 and RP, as flagged in metatagextraction.py. Non-WoS sources will silently produce empty or wrong country values. +5) Issues: data.dropna(subset=["AU1_CO", "AU_CO"]) silently drops all rows if metaTagExtraction() fails to parse affiliations from non-WoS sources, producing an empty DataFrame with no error; no validation that C1 or RP exist before calling metaTagExtraction(), mirroring the crash pattern flagged in metatagextraction.py; top_k_countries is applied after sorting but the earlier top_country_names already takes all countries — the filtering step is redundant and misleading. +6) **Yes**. C1 and RP must be present and correctly formatted as list[str] with standardized affiliation strings so metaTagExtraction() can correctly derive AU_CO and AU1_CO. Without this, the function silently returns an empty result. + +### get_countriesproduction.py +1) Extracts author countries via metaTagExtraction(), counts publication frequency per country, downloads world boundary geodata, and produces an interactive choropleth map and a summary table of scientific production by country. +2) **www.services** +3) AU_CO (derived), C1 (implicitly required by metaTagExtraction()). +4) **Yes**. metaTagExtraction(Field="AU_CO") relies on WoS-style affiliation parsing of C1, as flagged in metatagextraction.py. +5) No validation that C1 exists before calling metaTagExtraction(), mirroring the crash pattern flagged in metatagextraction.py. Country name normalization only corrects "USA" → "UNITED STATES OF AMERICA"; all other country name mismatches between the source data and the shapefile silently result in unmatched rows and zero counts. dropna is never called on AU_CO after explode(), so empty list entries produce NaN rows that pollute the country counts. +6) **Yes**. C1 must be present and correctly formatted as list[str] with standardized affiliation strings so metaTagExtraction() can correctly derive AU_CO. Country name formatting in C1 should also conform to WoS conventions to maximize matches against the shapefile. + +### get_countriesproductionovertime.py +1) Extracts author countries via metaTagExtraction(), pairs each country with its publication year, computes cumulative article counts over time, and returns a line chart of the top-k countries' production over time plus the underlying DataFrame. +2) **www.services**. +3) AU_CO (derived), PY, C1 (implicitly required by metaTagExtraction()) +4) **Yes**. metaTagExtraction(Field="AU_CO") relies on WoS-style affiliation parsing of C1, as flagged in metatagextraction.py. Non-WoS sources will silently produce empty or wrong country values. +5) Issues: no validation that C1 or PY exist before use, mirroring the crash pattern flagged in metatagextraction.py and thematicmap.py; years = data["PY"].repeat(nAFF).values[:len(affiliations)] silently misaligns years with affiliations if any AU_CO entry was NaN and got dropped by dropna() — the repeat is based on the full DataFrame length but AFF has already dropped rows; PY is never cast to a numeric type before astype(int) — if PY contains empty strings (as our ETL schema allows), this will crash. +6) **Yes**. C1 must be present and correctly formatted as list[str] so metaTagExtraction() can derive AU_CO. PY must be present, non-empty, and castable to integer for the year alignment logic to work correctly. + + +### get_data.py +1) Handles file upload from the Shiny dashboard UI. Depending on the selected mode, it processes one or more bibliographic files via biblio_json() or process_multiple_files(), loads the result into the reactive DataFrame df, and returns a status message to display in the UI. +2) **www.services**. +3) **No**. +4) **Indirectly**. biblio_json() and process_multiple_files() are the functions that actually parse and standardize the data — if those have WoS-specific assumptions (as flagged in parsers.py), the DataFrame loaded here will reflect those issues. +5) // +6) **Yes**. This is the entry point where our ETL must be plugged in. The "1B" path in particular must be routed through the standardization pipeline rather than calling pd.read_excel() directly, to ensure all downstream functions receive a correctly typed and validated DataFrame. + +### get_database.py +1) Maps the user's UI selection to a human-readable database name string. Reads two Shiny input controls, input.select() (which tab is active) and input.database() (which source was chosen), and returns a plain string like "Web of Science" or "Scopus". +2) **www.services**. +3) **No**. +4) **None directly**. However this function is the gatekeeper that sets the DB value downstream. The string it returns must match whatever the ETL pipeline uses as the DB column value. +5) Two: DB value mismatch, the exam spec requires DB to hold standardised identifiers like "WEB_OF_SCIENCE" or "SCOPUS" while this function returns display strings ("Web of Science", "Scopus"), which are not the same - if DB is populated from this output, the contract is broken; UnboundLocalError risk, if input.select() returns anything outside "1A", "1B", "1C", the function reaches return database without ever assigning it, so it needs an else branch or a default. +6) **Yes**. Either this function's return values must be updated to match the schema DB identifiers, or the ETL Transform phase must normalise the returned string into the correct DB value before writing to the DataFrame. + +### get_factorialanalysis.py +1) Builds a 2D interactive word map for conceptual structure analysis. It takes a DataFrame and a field (ID, DE, TI, AB), constructs a document-term matrix, runs a dimensionality reduction method (MCA, CA, or MDS), clusters the resulting term coordinates with hierarchical clustering, and returns an annotated Plotly scatter figure plus coordinate/cluster DataFrames. Also contains helpers: _to_seq (flatten values to list), eig_correction (Benzecri eigenvalue correction), avoidOverlaps (label deduplication — currently commented out), and assign_consistent_colors. +2) **www.services**. +3) **ID, DE, TI, AB**. +4) field="ID" default. ID (Keywords Plus) is a WoS-exclusive field — it does not exist in Scopus, PubMed, or Dimensions exports. Using ID as the default silently produces an empty or broken analysis on non-WoS data. +5) // +6) **Yes**. The ETL must ensure that: ID is a list[str] (WoS Keywords Plus) and dor non-WoS sources that lack ID, populate it as [] per the null contract — but also ensure the UI defaults field to DE (author keywords) for those sources, since an all-empty ID column will produce no usable analysis; DE, TI, AB are correctly typed (list[str] for DE, str for TI/AB). + +### get_filters.py +1) Two functions. get_filters() enriches the DataFrame with computed filter metadata: min/max publication year, average citations per year, and Bradford's Law zone assignment per source journal. get_filtered_table() applies user-driven UI filters (year range, language, document type, avg citations, Bradford zone) to the enriched DataFrame, then passes the result to get_table() for display. +2) **www.services**. +3) **PY, TC, SO, LA, DT**. +4) **Yes**. LA and DT value sets are implicitly WoS-formatted. The UI populates filter options from whatever values exist in these columns. WoS uses "English" and "Article"; Scopus may use "English" but "Journal Article" for DT. If not normalised by ETL, the filter checkboxes will show mixed values and users may filter out valid records unintentionally. || Bradford zone logic assumes SO is a clean, standardised journal name. WoS and Scopus capitalise journal names differently, so the same journal can appear as two separate sources, splitting its frequency and producing wrong zone assignments. +5) Division by zero in Average_Citations_Per_Year. If PY == current_year, Years_Since_Publication = 1 — safe. But if PY > current_year (malformed data), the denominator goes negative. No guard exists. ETL should clamp PY to <= current_year. || TC nulls not handled. If TC contains NaN (not coerced to 0 by ETL), Average_Citations_Per_Year will be NaN, silently breaking the citations slider filter in get_filtered_table(). +6) **Yes**. The ETL must: Cast TC to int, nulls → 0 || Cast PY to int, no nulls, clamped to valid range || Normalise SO to a consistent casing (uppercase) across sources || Normalise DT to a controlled vocabulary (e.g. "Article", "Review") so UI filters work identically regardless of source || Normalise LA to a consistent format (e.g. "ENGLISH"). + +### get_frequentwords.py +1) Two functions. get_frequent_words() produces a lollipop scatter chart and full frequency table of the most common words/keywords in a chosen field. It supports n-grams (for TI/AB), custom stopword removal, and synonym merging. table_tag() is the core extraction engine: it deduplicates by SR, routes to either term_extraction() (for free text fields TI/AB) or direct column access (for keyword fields DE/ID), then counts terms using Counter. +2) **www.services**. +3) SR, and one of DE, ID, TI, AB depending on word_type +4) **Yes**. ID (Keywords Plus) is WoS-exclusive. Selecting word_type="ID" on non-WoS data will operate on an empty or absent column with no error. || SR deduplication assumes SR is always populated. SR is a calculated field ("FirstAuthor, Year, Journal") generated by the WoS pipeline. If ETL does not produce it, drop_duplicates(subset='SR') will raise a KeyError. || eval(x) on DE/ID strings assumes the column was serialised as a Python list literal (e.g. "['kw1', 'kw2']"), which is a WoS/internal serialisation convention. Scopus CSV exports use semicolon-delimited strings, causing eval() to raise a SyntaxError or return the wrong structure. +5) remove_terms only applied for DE/ID, not for TI/AB. The guard if remove_terms and tag in ['DE', 'ID'] means stopword removal is silently skipped when analysing titles or abstracts, which is likely unintentional. || SR missing crashes silently. If SR is absent, drop_duplicates(subset='SR') raises KeyError with no informative message to the user. +6) **Yes**. The ETL must: Populate SR for all rows (non-empty string). || Ensure DE and ID are list[str], not raw strings — this eliminates the eval() hazard entirely. || Ensure ID is [] for non-WoS sources so the function degrades gracefully rather than crashing. || TI and AB must be str, not NaN/None. + +### get_historiograph.py +1) Builds an interactive historiographic network map showing citation relationships between key papers over time. It calls metaTagExtraction() and histNetwork() from services to construct the citation graph, then histPlot() for the initial layout. It then rebuilds the graph with networkx, optionally removes isolated nodes, positions nodes on a timeline (x = year, y = cluster), computes node sizes from local citation scores (LCS), and renders an interactive pyvis HTML network saved to a temp file. Returns the plot object, a metadata DataFrame, and the temp HTML filename. +2) **www.services**. +3) **SR, CR, DOI, TI, DE, ID, PY**. +4) **Yes**. histNetwork() parses CR using WoS reference string format ("Author, Year, Journal, Vol, Page"). This is the most WoS-specific dependency in the entire codebase. Non-WoS CR strings will produce zero or wrong citation matches, resulting in an empty or disconnected graph. || metaTagExtraction(df, "SR") regenerates SR from WoS-style author/year/journal fields. If SR was not correctly populated by ETL, this call may produce malformed node identifiers that break edge matching. || node_label="ID" and node_label="DE" are swapped. The code maps "ID" → row.get("Author_Keywords") and "DE" → row.get("KeywordsPlus"), which is the reverse of the standard schema (DE = author keywords, ID = Keywords Plus). This is a WoS internal naming artefact from histNetwork() output columns. +5) DE/ID label mapping is inverted (as noted above). A user selecting node_label="DE" gets Keywords Plus, not author keywords. Needs a one-line swap or renaming in histNetwork() output. || eval() used again for DE/ID node labels (same pattern as get_frequentwords.py). Unsafe and redundant if ETL guarantees list[str]. +6) **Yes, high priority**. The ETL must: Populate SR correctly as "FirstAuthor, Year, Journal" — it is the primary node key for the entire graph. || Normalise CR entries to WoS reference string format, as histNetwork() depends on it for edge construction. This is the single highest-risk dependency in the project for non-WoS sources. || Ensure DOI is str, empty string "" if missing (not NaN). || Ensure DE and ID are list[str] to eliminate the eval() calls. + + +### get_localcitedauthors.py +1) Finds which authors are most cited within the dataset itself (not globally), ranks them by local citation count, and draws a dot chart of the top-k authors. +2) **www.services**. +3) **AU**, **TC** (core); **SR** (must already exist or be built by `metaTagExtraction` before use). +4) **No** explicit DB checks, but `metaTagExtraction` and `histNetwork` are built for WoS-style data. +5) `AU` is exploded without checking if it is a proper list — plain strings will produce wrong results. If `histNetwork` returns an empty result the function crashes immediately. `SR` is rebuilt here by `metaTagExtraction`, which should instead already be present from the ETL. +6) **Yes**. `AU` must be a `list[str]`, `TC` must be non-null and numeric, and `SR` must be correctly built by the ETL pipeline. + +### get_localciteddocuments.py +1) Finds which papers in the dataset are most cited by other papers in the same dataset (local citations), ranks them, and draws a dot chart of the top-k documents. Also returns a table with local citations, global citations, and normalized metrics per document. +2) **www.services**. +3) **SR**, **TC**, **DI**, **PY** (core). +4) **No** explicit DB checks, but `SR` is expected in WoS format and `histNetwork` is built for WoS-style data. +5) `SR` is rebuilt here internally instead of being taken from the ETL pipeline. If `TC` contains nulls, `fillna(0)` handles it, but the LC/GC ratio calculation will produce division by zero for papers with zero global citations. If `histNetwork` returns an empty result the function crashes immediately. +6) **Yes**. `SR` must be correctly built by the ETL, `TC` and `PY` must be non-null and numeric, and `DI` must be present as a string. + +### get_localcitedreferences.py +1) Counts how many times each reference is cited across all papers in the dataset, ranks them, and draws a dot chart of the most cited references. Unlike global citation counts, this only looks at citations within the dataset itself. +2) **www.services**. +3) **CR** only. +4) **No** explicit DB checks, but the fallback string splitting uses the user-provided separator, which means the function can handle non-WoS sources if `CR` is correctly formatted as a list or delimited string. +5) Crashes if `CR` is missing entirely. If `CR` is an empty list or all nulls the chart will be empty with no clear error. The check `isinstance(data["CR"].iloc[0], list)` will crash if the DataFrame is empty. +6) **Yes**. `CR` must be present and correctly formatted as a `list[str]` where each element is an individual reference string. + +### get_localcitedsources.py +1) Counts how many times each journal or source is cited across all papers in the dataset, ranks them, and draws a dot chart of the most locally cited sources. The source names are extracted from the cited references using `metaTagExtraction`. +2) **www.services**. +3) **CR** (needed by `metaTagExtraction` to extract `CR_SO`); **CR_SO** (derived column, used directly for counting). +4) **Yes**. `metaTagExtraction` parses source names from WoS-style reference strings. Non-WoS sources with differently formatted references will likely produce empty or wrong results. +5) Crashes if `CR_SO` is missing or empty. The check `isinstance(data["CR_SO"].iloc[0], list)` will crash if the DataFrame is empty. If `metaTagExtraction` fails silently, the chart will be empty with no clear error. +6) **Yes**. `CR` must be present as a properly formatted list of reference strings so that `metaTagExtraction` can correctly extract the source names into `CR_SO`. + +### get_lotkalaw.py +1) Applies Lotka's Law to measure author productivity — it counts how many authors wrote exactly 1, 2, 3... papers, compares the observed distribution against the theoretical one, and draws a line chart showing both curves side by side. +2) **www.services**. +3) **AU** only. +4) **No** explicit DB checks. +5) Crashes if `AU` is missing or not a list — the list flattening `[author for sublist in data['AU'] for author in sublist]` will fail if any row is a plain string or null. If all authors wrote only one paper, `np.polyfit` may produce unreliable results with no warning. +6) **Yes**. `AU` must be present and correctly formatted as a `list[str]` per row. + +### get_maininformations.py +1) Computes a comprehensive set of summary statistics for the dataset and adds them as new columns to the DataFrame. Metrics include: publication year range, unique sources, annual growth rate (CAGR), unique authors, single-authored documents, international co-authorship percentage, co-authors per document, unique author keywords, references per document, average document age, and average citations per document. Returns the enriched DataFrame. This is the main "overview" function used to populate the summary panel of the dashboard. +2) **www.services**. +3) **PY**, **SO**, **AU**, **TC**, **CR**, **DE** (core); **AU_CO** (derived — extracted by `metaTagExtraction()` if not already present). +4) `metaTagExtraction(df, "AU_CO")` is called to extract country information from WoS-style affiliation strings if `AU_CO` is missing. Non-WoS sources with differently formatted affiliations will produce wrong or empty country counts, causing the international co-authorship metric to be zero or incorrect. +5) `AU` is iterated as a list without a null guard — if any row contains a plain string instead of a list, the flattening `[author for sublist in AU_list for author in sublist]` will iterate over characters and produce wrong author counts silently. Same issue applies to `DE` and `CR`. CAGR calculation divides by `ny = max - min` which will be zero if all papers are from the same year, causing a `ZeroDivisionError`. +6) **Yes, high priority.** Ensure `AU`, `DE`, and `CR` are all `list[str]` — this function iterates over them directly and will silently produce wrong results if they are plain strings. Ensure `PY` is non-null and numeric to avoid crashes in year-range and age calculations. Ensure `TC` is numeric with nulls replaced by `0`. Ensure `C1` or `RP` are correctly populated so that `metaTagExtraction()` can extract `AU_CO` if needed. + +### get_referencesspectroscopy.py +1) Generates a Reference Publication Year Spectroscopy (RPYS) analysis — a technique that identifies which historical years had the most influence on a research field by counting how often papers from each year are cited in the dataset's reference lists. It extracts publication years from each cited reference string, counts citations per year, computes a 5-year moving median deviation to highlight anomalous peaks, and returns an interactive dual-line chart, a year-level summary table, and a reference-level table with Google Scholar links. +2) **www.services**. +3) **CR** only. +4) Year extraction from reference strings uses the regex `r'\b\d{4},'` which matches a 4-digit year followed by a comma — this is the WoS reference string format ("Author, Year, Journal, Vol, Page"). Non-WoS reference formats that place the year differently (e.g. PubMed, Scopus) will produce zero year matches, resulting in an empty chart. +5) `df['CR'].apply(lambda x: [i for i in x])` assumes `CR` is already a list — if it arrives as a plain string it will iterate over characters and produce garbage silently. If `CR` is entirely empty or null the `year_seq.min()` call will crash. The year regex silently assigns `0` to references where no year is found, which then pollutes the year distribution if not filtered out. +6) **Yes, high priority.** Ensure `CR` is a `list[str]` where each element is a properly formatted reference string. The year regex `r'\b\d{4},'` requires the year to be followed by a comma — ETL must ensure CR entries follow the WoS format "Author, Year, Journal, Vol, Page" for year extraction to work correctly across all sources. References with no detectable year should be filtered out rather than assigned year `0`. + +### get_relevantaffiliations.py +1) Ranks institutions by number of publications and draws a dot chart of the top-k affiliations. Depending on the `disambiguation` parameter, it either uses `AU_UN` (a cleaned and disambiguated university name field) or the raw `C1` affiliation strings. Returns the chart and a summary table. +2) **www.services**. +3) **AU_UN** or **C1** depending on the `disambiguation` parameter — only one is used per call. +4) `AU_UN` is a WoS-derived column that contains disambiguated university names — it does not exist natively in non-WoS sources and must be built by the ETL from `C1`. If `disambiguation == "no"`, `C1` is used directly, which is more portable across sources. +5) Crashes immediately if `AU_UN` is missing when `disambiguation == "yes"`, or if `C1` is missing when `disambiguation == "no"` — no guard exists for either case. Both columns are expected to be `list[str]` per row — plain strings will produce wrong results after `explode()`. The docstring mentions `num_of_authors` and `frequency` as parameter names but the actual parameters are `num_of_affiliations` and `disambiguation`, indicating copy-paste drift. +6) **Yes.** Ensure `C1` is present as a `list[str]` of affiliation strings — it is the primary input when `disambiguation == "no"` and the source for building `AU_UN` when `disambiguation == "yes"`. Ensure `AU_UN` is derived from `C1` during the ETL Transform phase and stored as a `list[str]` of cleaned university names. + +### get_relevantauthors.py +1) Ranks authors by number of publications, percentage of documents, or fractionalized count (where each author of a multi-authored paper gets a fractional credit), and draws a dot chart of the top-k authors. Returns the chart and a full ranking table. +2) **www.services**. +3) **AU** only. +4) **No** explicit DB checks, but `AU` is expected in WoS author format. The fallback `lambda x: x if isinstance(x, list) else []` silently replaces non-list values with an empty list instead of trying to parse them, which means authors from non-WoS sources arriving as delimited strings will be completely ignored. +5) Non-list `AU` values are silently dropped rather than parsed, so non-WoS sources that store authors as semicolon-delimited strings will produce an empty chart with no error. The `frequency` parameter values in the docstring (`"N. of Documents"`, `"Percentage"`, `"Fractionalized"`) do not match the actual values checked in the code (`"percentage"`, `"freq_measure"`), meaning the default `"N. of Documents"` always falls through to the raw count branch regardless of user selection. +6) **Yes.** Ensure `AU` is present and correctly formatted as a `list[str]` per row — non-list values are silently ignored, producing wrong author counts. Ensure author names follow a consistent format (e.g. `"Surname, Firstname"`) across all sources to avoid duplicate entries for the same author. + +### get_relevantsources.py +1) Ranks journals or sources by number of publications and draws a dot chart of the top-k sources. Returns the chart and a full ranking table. +2) **www.services**. +3) **SO** only. +4) **No** explicit DB checks, but `SO` is the WoS tag for journal/source name. Sources using a different column name will crash immediately. +5) Crashes if `SO` is missing entirely. No check is performed on whether `SO` values are plain strings — if they arrive as lists the `value_counts()` will produce wrong results. No guard against an empty dataset after `dropna()`. +6) **Yes.** Ensure `SO` is present, non-null, and a plain string representing the journal or source name. Standardize casing consistently across sources (e.g. always uppercase) to avoid the same journal appearing multiple times under different capitalizations. + +### get_sourceslocalimpact.py +1) Calculates impact scores (h-index, g-index, m-index, total citations, number of papers) for each journal or source, ranks them by the chosen metric, and draws a horizontal bar chart of the top-k sources. Returns the chart and the full ranking table. +2) **www.services**. +3) **SO**, **TC**, **PY**. +4) **No** explicit DB checks, but `SO`, `TC`, and `PY` are all WoS column tags. Sources using different names will crash immediately. +5) `h_calc` and `g_calc` are applied via `transform` instead of `agg`, which calls them once per row rather than once per group — this produces incorrect index values silently. `TC` and `PY` are cast with `errors='coerce'` and rows with nulls are dropped, but no warning is raised if a large fraction of rows is lost. Division by zero is possible in `m_index` if `today == PY_start - 1`, though extremely unlikely. +6) **Yes.** Ensure `SO` is present as a non-null string, `TC` is numeric with nulls replaced by `0`, and `PY` is a valid 4-digit year. The `h_calc` and `g_calc` functions need to be fixed to use `agg` instead of `transform` to produce correct index values — this is a bug in the function itself that the ETL cannot work around. + +### get_sourcesproduction.py +1) Computes annual or cumulative publication counts per journal over time, selects the top-k sources by total output, and draws a multi-line chart showing each source's production trajectory. Returns the chart and the year-by-source matrix. +2) **www.services**. +3) **SO**, **PY** — both accessed directly and also passed to `cocMatrix()` internally. +4) **No** explicit DB checks, but `SO` and `PY` are WoS column tags. `cocMatrix()` is also built assuming WoS-style input. +5) `PY` is cast to `str` before `cocMatrix()` and back to `int` after — if `PY` contains nulls or non-numeric values this double cast will crash. If all papers belong to a single source `WSO.shape[1] == 1` is handled, but if `SO` is entirely missing `cocMatrix()` will crash with no clear error. No guard against `num_of_sources_production` being zero. +6) **Yes.** Ensure `SO` is present as a non-null string and `PY` is a valid 4-digit integer — both are cast and used in matrix operations that will crash silently or produce wrong results if the types are incorrect. + +### get_status.py +1) Two small utility functions: `get_status()` converts a list of missing-value percentages into human-readable status labels (Excellent, Good, Acceptable, Poor, Critical, Completely missing), and `get_status_color()` maps each status label to a CSS background color for dashboard display. Used to give a quick visual quality assessment of the dataset columns. +2) **www.services**. +3) **None** — this file does not access any DataFrame column. It only processes a list of percentages passed in as a parameter. +4) **No**. +5) No input validation on `missing_percentage` — if a non-numeric value is passed, the comparisons will fail silently and return `"Unknown"`. The two functions are tightly coupled by string labels but there is no shared constant, so a typo in one function would break the other silently. +6) **No direct ETL relevance.** This is a pure utility file for dashboard display. The ETL pipeline does not need to produce any specific column for this function to work. + +### get_table.py +1) Generates a metadata completeness report for the loaded dataset. It counts missing values, empty strings, and empty lists for every column, calculates the percentage of missing data per column, assigns a quality status (Excellent, Good, Acceptable, Poor, Critical, Completely missing), and displays the results as both a Plotly table and an interactive HTML data table with export buttons. This is the main data quality dashboard panel — it gives users an immediate overview of which columns are well populated and which need attention. +2) **www.services**; **get_status** (imported explicitly for status label and color functions). +3) **All columns present in the DataFrame** — it iterates over every column to compute missing value counts. The `column_descriptions` dictionary defines a fixed set of expected columns: `AB, AU, AU_UN, DB, DE, DT, LA, PU, PY, RP, SC, SO, SR, TC, TI, UT, C1, CR, OI, AU1_UN, EM, DI, BP, EP, SN, VL, ID, FU, FX, JI, OA, IS, PMID`. +4) **No** explicit DB checks, but the `column_descriptions` dictionary is entirely based on WoS field tags. Non-WoS columns not in this dictionary will still appear in the table but with no human-readable description. +5) The status color mapping in `create_plotly_table` uses `"Fair"` and `"Poor"` as keys, but `get_status()` never produces `"Fair"` — it produces `"Acceptable"` instead. This means the color for `"Acceptable"` rows will always fall through to `"white"`, losing the intended visual warning. Missing values are counted as NaN, empty string, single space, or empty list — but not `None`, which may slip through undetected. +6) **Yes.** The ETL must ensure all mandatory columns defined in the schema are present in the DataFrame — even if empty — so this function can report their completeness status correctly. Columns populated with `None` instead of `""` or `[]` will be undercounted in the missing value report, giving a false "Excellent" status. + +### get_thematicevolution.py +1) Tracks how research themes evolve over time by splitting the dataset into user-defined time periods, running a full thematic map analysis on each period, and then computing inclusion, weighted inclusion, and stability indices to measure how strongly themes from one period carry over into the next. The results are visualised as an interactive network where nodes are research clusters and directed edges show thematic continuity between periods. Also returns a summary table of cluster transitions and the raw thematic map results per period. One of the most complex files in the codebase — it internally calls `thematic_map()`, `timeslice()`, and `plot_thematic_evolution()`. +2) **www.services** +3) **None directly** — all column access is delegated to `thematic_map()` and `timeslice()`. `PY` is the only column accessed directly inside `timeslice()`. +4) **Yes**. The field names `ID`, `DE`, `TI`, `AB` are WoS column tags passed to `thematic_map()` internally. Non-WoS sources using different names will produce empty results. `thematic_map()` also assumes WoS-style keyword formatting for `ID` and `DE`. +5) If `years` is not provided the function raises a `ValueError` immediately — no default is computed. If any time period produces zero clusters, the function prints a message and returns early with no chart and no clear error to the user. The `thematic_map()` return value is assumed to be a tuple but is also checked for being a dict — this inconsistency suggests the internal API is unstable and may break silently depending on the version. Temporary HTML file is never deleted. +6) **Yes, high priority.** Ensure `PY` is non-null and numeric — it is the only column used directly by `timeslice()` to split the data into periods, and wrong values will produce empty or misaligned time slices. Ensure `ID` and `DE` are `list[str]` — they are the primary inputs to `thematic_map()` for keyword network construction. For non-WoS sources that lack `ID`, populate it as `[]` per the null contract, but ensure the UI defaults the field to `DE` for those sources since an all-empty `ID` column will produce no usable analysis. + +### get_thematicmap.py +1) A thin wrapper around the internal `thematic_map()` function. It passes all parameters directly to `thematic_map()`, which builds a keyword co-occurrence network, detects research clusters, and positions them on a centrality vs. density bubble chart. Returns the map figure, the HTML network file path, and three DataFrames: term-level data, cluster-level data, and document-to-cluster assignments. +2) **www.services**. +3) **None directly** — all column access is delegated entirely to `thematic_map()`. +4) **Yes**. The field names `ID`, `DE`, `TI`, `AB` are WoS column tags passed through to `thematic_map()`. Non-WoS sources using different names will produce empty results. +5) This file has no error handling of its own — if `thematic_map()` crashes or returns unexpected output, the exception propagates directly to the caller with no useful context. The return value assumes `thematic_map()` always returns exactly 5 values — if the internal API changes this will break silently. +6) **Yes.** Ensure `ID` and `DE` are `list[str]` — they are the primary inputs to `thematic_map()`. For non-WoS sources that lack `ID`, populate it as `[]` and ensure the UI defaults the field to `DE`, since an all-empty `ID` column will produce no usable analysis. Ensure `TI` and `AB` are non-null strings if those fields are selected. + +### get_threefieldplot.py +1) Generates a Sankey diagram showing relationships between three user-selected bibliographic fields (e.g. authors → keywords → journals). For each field it builds a document-attribute matrix, computes co-occurrence counts between adjacent fields, and draws the flows as proportional bands connecting the three columns. Optionally derives extra columns like `CR_SO`, `AU_CO`, `AB_TM`, `TI_TM` via internal functions before building the matrices. +2) **www.services**; **textwrap**. +3) **None directly** — all column access is delegated to `cocMatrix()`, `metaTagExtraction()`, and `term_extraction()`. The actual columns consumed depend entirely on which fields the user selects. +4) **Yes**. All field names (`AU`, `DE`, `ID`, `SO`, `CR`, `TI`, `AB`, `WC`, `AU_CO`, `CR_SO`) are WoS column tags passed to `cocMatrix()`. Non-WoS sources using different names will produce empty matrices and a blank Sankey diagram with no error. +5) If any of the three `cocMatrix()` calls returns an empty matrix, the dot product for edge computation will silently produce an empty edge list and the diagram will render blank with no explanation. If `metaTagExtraction()` fails to extract `CR_SO` or `AU_CO`, those fields will be missing and `cocMatrix()` will crash immediately. +6) **Yes.** Ensure all potential field columns (`AU`, `DE`, `ID`, `SO`, `CR`, `TI`, `AB`, `C1`, `WC`) are present and correctly typed — `list[str]` for multi-value fields and `str` for scalar fields. Ensure `C1` is populated correctly so that `metaTagExtraction()` can derive `AU_CO` and `CR_SO` when those fields are selected. + +### get_treemap.py +1) Counts the most frequent words or keywords in a selected field, and displays them as an interactive treemap where each rectangle's size represents the word's frequency. For title (`TI`) and abstract (`AB`) fields it first runs text mining to extract meaningful terms before counting. Also returns a full frequency table. Contains an internal helper function `table_tag()` that handles the actual word extraction and counting. +2) **www.services**. +3) **SR** (used inside `table_tag()` for deduplication); **DE**, **ID**, **TI**, **AB** (whichever is passed as `word_type`). +4) **No** explicit DB checks, but field names `DE`, `ID`, `TI`, `AB` are all WoS tags. Non-WoS sources using different names will produce empty results. +5) `SR` must be present for deduplication — if missing, `drop_duplicates(subset='SR')` crashes immediately. For `DE` and `ID`, `eval()` is called on string values — this is unsafe if the column contains arbitrary text instead of a properly formatted list string, and redundant if the ETL already guarantees `list[str]`. If `word_type` is not one of the handled cases, `text_data` will be an unprocessed column and the word extraction will silently produce wrong results. +6) **Yes.** Ensure `SR` is present and non-null. Ensure `DE` and `ID` are `list[str]` to eliminate the unsafe `eval()` call. Ensure `TI` and `AB` are non-null strings if those fields are selected. + +### get_trendtopics.py +1) Identifies which words or keywords were most prominent in each time period by computing the median publication year for each term and plotting them as a bubble chart (term vs. year, bubble size = frequency). For title and abstract fields it first runs text mining before counting. Also returns the full trend data table. Contains an internal helper `field_by_year()` that builds the co-occurrence matrix and computes year quantiles per term. +2) **www.services**. +3) **PY** (accessed directly inside `field_by_year()`); **DE**, **ID**, **TI**, **AB**, or any derived field like `TI_TM`, `AB_TM` depending on `field_tt`. +4) **No** explicit DB checks, but field names are all WoS tags. Non-WoS sources using different names will produce empty results. +5) `PY` is used directly in `np.repeat(df['PY'], x)` without null checks — missing or non-numeric values will cause a crash. If the selected field is empty or missing, `cocMatrix()` will return an empty matrix and `np.quantile()` will crash on an empty array. If `term_extraction()` fails, the derived `TI_TM` or `AB_TM` column will be missing and the function crashes immediately. +6) **Yes.** Ensure `PY` is non-null and numeric — it is used directly in quantile calculations per term. Ensure `DE`, `ID`, `TI`, `AB` are correctly typed (`list[str]` for `DE`/`ID`, `str` for `TI`/`AB`) depending on the selected field. + +### get_wordcloud.py +1) Generates an interactive word cloud rendered as a pyvis HTML network where each word is a text-only node, sized and coloured by frequency. It calls table_tag() (defined locally, identical to the one in get_frequentwords.py) to count terms, places nodes at random polar coordinates within a compact radius, applies ForceAtlas2 physics for slight jitter, saves the result to a temp HTML file, and returns the filename plus a full frequency table. +2) **www.services**. +3) **SR, and one of DE, ID, TI, AB**. +4) ID is WoS-exclusive, same as in get_frequentwords.py and get_wordfrequency.py. || SR deduplication assumes SR is always populated, same as get_frequentwords.py. || eval() on DE/ID strings, same unsafe pattern as get_frequentwords.py. +5) remove_terms silently not applied for TI/AB, inherited from table_tag() — same bug as in get_frequentwords.py. +6) **Yes**. Same requirements as get_frequentwords.py: SR must be populated for all rows. || DE and ID must be list[str] to eliminate eval(). || ID must be [] for non-WoS sources. || TI and AB must be str, not NaN/None. || + +### get_wordfrequency.py +1) Plots word/keyword frequency over time as a multi-line chart, one line per term. It calls term_extraction() for free-text fields (TI/AB) or reads keyword columns directly (DE/ID), then passes data to keyword_growth() which builds a year × term frequency DataFrame (cumulative or per-year). Two helpers are defined locally: trim_years() (fills a year range with observed frequencies and optionally cumulates) and keyword_growth() (parses terms, applies synonym merging and stopword removal, selects top-N terms, and assembles the final time series). +2) **www.services**. +3) **PY, and one of DE, ID, TI, AB depending on field_wf** +4) ID is WoS-exclusive (Keywords Plus). Same risk as in get_frequentwords.py — passing field_wf="ID" on non-WoS data silently operates on an empty column. || keyword_growth() splits on sep=";" by default, which matches WoS keyword serialisation. Scopus uses "; " (with trailing space) so terms may arrive with leading spaces (e.g. " MACHINE LEARNING") that survive the .upper() call and prevent correct term matching or synonym replacement. +5) data['Year'].min() and data['Year'].max() in keyword_growth() will raise a ValueError if PY is empty after dropna. No guard exists for empty DataFrames after filtering. || Leading/trailing whitespace in terms not stripped before Counter/groupby. Terms like " MACHINE LEARNING" and "MACHINE LEARNING" are counted separately, fragmenting frequencies. +6) **Yes**. The ETL must: Cast PY to int with no nulls — required by keyword_growth() for year range construction. || Ensure DE and ID are list[str] so the isinstance(x, str) branch in keyword_growth() is never taken, avoiding semicolon-split issues entirely. || Ensure ID is [] for non-WoS sources. || TI and AB must be str, not NaN/None. + +### get_worldmapcollaboration.py +1) Builds an interactive historiographic network map showing citation relationships between key papers over time. It calls metaTagExtraction() and histNetwork() from services to construct the citation graph, then histPlot() for the initial layout. It then rebuilds the graph with networkx, optionally removes isolated nodes, positions nodes on a timeline (x = year, y = cluster), computes node sizes from local citation scores (LCS), and renders an interactive pyvis HTML network saved to a temp file. Returns the plot object, a metadata DataFrame, and the temp HTML filename. +2) **www.services**. +3) **SR, CR, DOI, AU, TI, DE, ID, PY** +4) histNetwork() parses CR using WoS reference string format ("Author, Year, Journal, Vol, Page"). This is the most WoS-specific dependency in the entire codebase. Non-WoS CR strings will produce zero or wrong citation matches, resulting in an empty or disconnected graph. || metaTagExtraction(df, "SR") regenerates SR from WoS-style author/year/journal fields. If SR was not correctly populated by ETL, this call may produce malformed node identifiers that break edge matching. || node_label="ID" and node_label="DE" are swapped. The code maps "ID" → row.get("Author_Keywords") and "DE" → row.get("KeywordsPlus"), which is the reverse of the standard schema (DE = author keywords, ID = Keywords Plus). This is a WoS internal naming artefact from histNetwork() output columns. +5) DE/ID label mapping is inverted (as noted above). A user selecting node_label="DE" gets Keywords Plus, not author keywords. Needs a one-line swap or renaming in histNetwork() output. || eval() used again for DE/ID node labels (same pattern as get_frequentwords.py). Unsafe and redundant if ETL guarantees list[str]. || hist_data["GCS"] cast to int in tooltip without null guard — if GCS is NaN, int(row.get('GCS', 0)) will raise a ValueError because int(float('nan')) fails in Python. +6) **Yes, high priority**. Populate SR correctly as "FirstAuthor, Year, Journal" — it is the primary node key for the entire graph. || Normalise CR entries to WoS reference string format, as histNetwork() depends on it for edge construction. This is the single highest-risk dependency in the project for non-WoS sources. || Ensure DOI is str, empty string "" if missing (not NaN). || Ensure DE and ID are list[str] to eliminate the eval() calls. + + + +--- + +## Summary + +### All columns required across the entire codebase +| Column | Used by | +|--------|---------| +| AU | biblionetwork.py, get_authorlocalimpact.py (core direct dependency), get_authorproductionovertime.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_collaborationnetwork.py (core direct dependency), get_correspondingauthorcountries.py (indirect dependency via metaTagExtraction), get_historiograph.py (secondary direct dependency), get_localcitedauthors.py (core direct dependency), get_lotkalaw.py (core direct dependency), get_maininformations.py (core direct dependency), get_relevantauthors.py (core direct dependency), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via cocMatrix), get_worldmapcollaboration.py (secondary direct dependency) | +| AB | get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency), get_frequentwords.py (conditional direct dependency), get_table.py (schema-level expected column), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix / term_extraction), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency), get_wordfrequency.py (conditional direct dependency) | +| TI | get_authorproductionovertime.py (secondary direct dependency), get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency), get_frequentwords.py (conditional direct dependency), get_historiograph.py (secondary direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (indirect dependency via thematic_map), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix / term_extraction), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency), get_wordfrequency.py (conditional direct dependency), get_worldmapcollaboration.py (secondary direct dependency)| +| PY | get_affiliationproductionovertime.py (core direct dependency), get_annualproduction.py (core direct dependency), get_authorlocalimpact.py (core direct dependency), get_authorproductionovertime.py (core direct dependency), get_averagecitations.py (core direct dependency), get_citeddocuments.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_co_occurence_network.py (indirect dependency), get_countriesproductionovertime.py (core direct dependency), get_filters.py (core direct dependency), get_historiograph.py (core direct dependency), get_localciteddocuments.py (core direct dependency), get_maininformations.py (core direct dependency), get_sourceslocalimpact.py (core direct dependency), get_sourcesproduction.py (core direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (core direct dependency via timeslice), get_trendtopics.py (core direct dependency), get_wordfrequency.py (core direct dependency), get_worldmapcollaboration.py (core direct dependency)| +| AU_UN | get_affiliationproductionovertime.py (core direct dependency), get_collaborationnetwork.py (indirect derived dependency via metaTagExtraction), get_relevantaffiliations.py (conditional core direct dependency, used when disambiguation == "yes"), get_table.py (schema-level expected column) | +| TC | get_authorlocalimpact.py (core direct dependency), get_authorproductionovertime.py (core direct dependency), get_averagecitations.py (core direct dependency), get_citedcountries.py (core direct dependency), get_citeddocuments.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_filters.py (core direct dependency), get_localcitedauthors.py (core direct dependency), get_localciteddocuments.py (core direct dependency), get_maininformations.py (core direct dependency), get_sourceslocalimpact.py (core direct dependency), get_table.py (schema-level expected column) | +| SO | get_authorproductionovertime.py (secondary direct dependency), get_bradfordlaw.py (core direct dependency), get_filters.py (core direct dependency), get_maininformations.py (core direct dependency), get_relevantsources.py (core direct dependency), get_sourceslocalimpact.py (core direct dependency), get_sourcesproduction.py (core direct dependency), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via cocMatrix) | +| C1 | get_citedcountries.py (indirect dependency via metaTagExtraction), get_collaborationnetwork.py (indirect dependency via metaTagExtraction), get_correspondingauthorcountries.py (indirect dependency via metaTagExtraction), get_countriesproduction.py (indirect dependency via metaTagExtraction), get_countriesproductionovertime.py (indirect dependency via metaTagExtraction), get_maininformations.py (indirect dependency via metaTagExtraction, needed only if AU_CO must be derived), get_relevantaffiliations.py (conditional core direct dependency, used when disambiguation == "no"; also source field for deriving AU_UN), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via metaTagExtraction for AU_CO / CR_SO derivation) | +| RP | get_citedcountries.py (indirect dependency via metaTagExtraction), get_correspondingauthorcountries.py (indirect dependency via metaTagExtraction), get_maininformations.py (indirect dependency via metaTagExtraction, needed only if AU_CO must be derived), get_table.py (schema-level expected column)| +| SR | get_citeddocuments.py (core direct dependency), get_clusteringcoupling.py (indirect dependency via couplingMap), get_frequentwords.py (core direct dependency), get_historiograph.py (core direct dependency / also regenerated via metaTagExtraction), get_localcitedauthors.py (indirect / regenerated via metaTagExtraction), get_localciteddocuments.py (core direct dependency / also regenerated internally), get_table.py (schema-level expected column), get_treemap.py (core direct dependency for deduplication), get_wordcloud.py (core direct dependency for deduplication), get_worldmapcollaboration.py (core direct dependency / also regenerated via metaTagExtraction) | +| DI | get_authorproductionovertime.py (secondary direct dependency), get_citeddocuments.py (secondary direct dependency), get_localciteddocuments.py (secondary direct dependency), get_table.py (schema-level expected column)| +| DOI | get_historiograph.py (secondary direct dependency), get_worldmapcollaboration.py (secondary direct dependency) | +| CR | get_clusteringcoupling.py (indirect dependency via couplingMap), get_cocitation.py (indirect dependency via biblionetwork), get_historiograph.py (core direct dependency via histNetwork; WoS-style parsing dependency), get_localcitedreferences.py (core direct dependency), get_localcitedsources.py (indirect dependency via metaTagExtraction), get_maininformations.py (core direct dependency), get_referencesspectroscopy.py (core direct dependency), get_table.py (schema-level expected column), get_threefieldplot.py (indirect dependency via cocMatrix / metaTagExtraction), get_worldmapcollaboration.py (core direct dependency via histNetwork; WoS-style parsing dependency)| +| DE | get_clusteringcoupling.py (indirect dependency via couplingMap), get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency), get_frequentwords.py (conditional direct dependency), get_historiograph.py (secondary direct dependency), get_maininformations.py (core direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (indirect dependency via thematic_map), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency), get_wordfrequency.py (conditional direct dependency), get_worldmapcollaboration.py (secondary direct dependency; DE/ID mapping inversion noted) | +| ID | get_clusteringcoupling.py (indirect dependency via couplingMap), get_co_occurence_network.py (indirect dependency), get_factorialanalysis.py (core direct dependency; default WoS-specific field), get_frequentwords.py (conditional direct dependency), get_historiograph.py (secondary direct dependency), get_table.py (schema-level expected column), get_thematicevolution.py (indirect dependency via thematic_map; WoS-default field), get_thematicmap.py (indirect dependency via thematic_map), get_threefieldplot.py (indirect dependency via cocMatrix), get_treemap.py (conditional direct dependency), get_trendtopics.py (conditional direct dependency), get_wordcloud.py (conditional direct dependency; WoS-specific field), get_wordfrequency.py (conditional direct dependency; WoS-specific field), get_worldmapcollaboration.py (secondary direct dependency; DE/ID mapping inversion noted) | +| WC | get_co_occurence_network.py (indirect dependency), get_threefieldplot.py (indirect dependency via cocMatrix) | +| CR_AU | get_cocitation.py (indirect derived dependency via metaTagExtraction / biblionetwork) | +| CR_SO | get_cocitation.py (indirect derived dependency via metaTagExtraction / biblionetwork), get_localcitedsources.py (indirect derived dependency via metaTagExtraction; then used directly for counting), get_threefieldplot.py (indirect derived dependency via metaTagExtraction) | +| AU_CO | get_collaborationnetwork.py (indirect derived dependency via metaTagExtraction), get_correspondingauthorcountries.py (indirect derived dependency via metaTagExtraction), get_countriesproduction.py (indirect derived dependency via metaTagExtraction), get_countriesproductionovertime.py (indirect derived dependency via metaTagExtraction), get_maininformations.py (indirect derived dependency via metaTagExtraction), get_threefieldplot.py (indirect derived dependency via metaTagExtraction) | +| AU1_CO |get_correspondingauthorcountries.py (indirect derived dependency via metaTagExtraction), get_table.py (schema-level expected column) | +| DB | get_database.py (core direct dependency), get_table.py (schema-level expected column) | +| LA | get_filters.py (core direct dependency), get_table.py (schema-level expected column) | +| DT | get_filters.py (core direct dependency), get_table.py (schema-level expected column) | +| PU | get_table.py (schema-level expected column) | +| SC | get_table.py (schema-level expected column) | +| UT | get_table.py (schema-level expected column) | +| OI | get_table.py (schema-level expected column) | +| EM | get_table.py (schema-level expected column) | +| BP | get_table.py (schema-level expected column) | +| EP | get_table.py (schema-level expected column) | +| SN | get_table.py (schema-level expected column) | +| VL | get_table.py (schema-level expected column) | +| FU | get_table.py (schema-level expected column) | +| FX | get_table.py (schema-level expected column) | +| JI | get_table.py (schema-level expected column) | +| OA | get_table.py (schema-level expected column) | +| IS | get_table.py (schema-level expected column) | +| PMID | get_table.py (schema-level expected column) | +| TI_TM | get_threefieldplot.py (indirect derived dependency via term_extraction) | +| AB_TM | get_threefieldplot.py (indirect derived dependency via term_extraction) | + +### Files that need patching +| File | Line | Issue | +|------|------|-------| +| histnetwork.py | 37 | if db == "Web_of_Science" | +| biblionetwork.py | 94 | if db == "web_of_science" | +| format_functions.py | multiple | if source == "Web_of_Science" | +| couplingmap.py | multiple | Root dependency: assumes WoS-style SR/CR reconstruction through metaTagExtraction(), biblionetwork(), and histNetwork(); breaks bibliographic coupling on non-WoS sources | +| get_authorproductionovertime.py | 28 |fallback str(x).split(",") assumes WoS comma-separated author format +| get_citedcountries.py | 17 | metaTagExtraction(df, "AU1_CO") assumes WoS-style affiliation parsing | +| get_citeddocuments.py | 17 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style fields | +| get_clusteringcoupling.py | 10 | couplingMap() built for WoS-style SR and CR | +| get_co_occurence_network.py | 38 | field names ID, DE, TI, AB, WC are WoS tags — ID is WoS-exclusive | +| get_cocitation.py | 47 | metaTagExtraction(M, Field="CR_AU") and metaTagExtraction(M, Field="CR_SO") parse WoS-style reference strings | +| get_collaborationnetwork.py | 55, 63 | metaTagExtraction(M, Field="AU_UN") and metaTagExtraction(M, Field="AU_CO") assume WoS-style affiliation strings | + get_correspondingauthorcountries.py | 16,17 | metaTagExtraction(df, Field="AU_CO") and (df, Field="AU1_CO") assume WoS-style affiliation parsing | +| get_countriesproduction.py | 15 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing | +| get_countriesproductionovertime.py | 15 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing | +| get_database.py | 18-29 | database = "Web of Science" returns display string instead of standardized identifier (e.g. "WEB_OF_SCIENCE") | +| get_factorialanalysis.py | 42 | field="ID" default assumes WoS Keywords Plus — field does not exist in non-WoS sources | +| get_filters.py | 77-78 | LA and DT filter values assume WoS vocabulary ("Article", "English") — non-WoS sources may use different values | +| get_frequentwords.py | 106 -119 | drop_duplicates(subset='SR') assumes SR always populated — crashes with KeyError if SR missing, eval(x) on DE/ID assumes WoS-style Python list serialization — breaks with Scopus semicolon-delimited strings | +| get_historiograph.py | 30, 153-159 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style author/year/journal fields eval() on Author_Keywords / KeywordsPlus — unsafe; DE/ID label mapping is inverted (WoS naming artefact) | +| get_localcitedauthors.py | 22, 29 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style author/year/journal fields / histNetwork() parses CR assuming WoS format Author, Year, Journal, Vol, Page| +| get_localciteddocuments.py | 16, 29 | metaTagExtraction(df, "SR") rebuilds SR from WoS-style fields / histNetwork() parses CR assuming WoS format Author, Year, Journal, Vol, Page | +| get_localcitedsources.py | 17 | metaTagExtraction(df, "CR_SO") parses source names from WoS-style reference strings | +| get_maininformations.py | 101 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing (C1/RP) to derive country per author | +| get_referencesspectroscopy.py | 35 | regex r'\b\d{4},' extracts year assuming WoS reference format Author, Year, Journal, Vol, Page — non-WoS formats produce zero year matches | +| get_relevantaffiliations.py | 20 | data["AU_UN"] is a WoS-derived column — does not exist natively in non-WoS sources and must be built by ETL from C1 | +| get_relevantauthors.py | 22 | fallback else [] silently drops non-list AU values — non-WoS sources with semicolon-delimited strings produce empty results | +| get_table.py | 91-125 | column_descriptions dictionary contains only WoS field tags — non-WoS columns appear without human-readable description | +| get_thematicevolution.py | 4, 98 | field="ID" default assumes WoS Keywords Plus — thematic_map() produces empty results on non-WoS sources | +| get_thematicmap.py | 4 | field="ID" default assumes WoS Keywords Plus — thematic_map() produces empty results on non-WoS sources | +| get_threefieldplot.py | 24, 26 | metaTagExtraction(df, "CR_SO") and metaTagExtraction(df, "AU_CO") assume WoS-style reference strings and affiliation parsing | +| get_treemap.py | 81, 91 | drop_duplicates(subset='SR') assumes SR always populated / eval(x) on DE/ID assumes WoS-style Python list serialization | +| get_trendtopics.py | 40, 105 | field_tt="ID" routed directly to cocMatrix() — WoS-exclusive field produces empty results on non-WoS sources / np.repeat(df['PY'], x) used without null check | +| get_wordcloud.py | 112, 125 | drop_duplicates(subset='SR') assumes SR always populated / eval(x) on DE/ID assumes WoS-style Python list serialization | +| get_wordfrequency.py | 135 | x.split(sep) with default sep=";" assumes WoS keyword serialization — Scopus uses "; " producing terms with leading spaces | +| get_worldmapcollaboration.py | 12 | metaTagExtraction(df, "AU_CO") assumes WoS-style affiliation parsing (C1/RP) to derive country per author | + +## Notes +- `metatagextraction.py` is the primary root dependency for most WoS-specific parsing issues. + It reconstructs derived fields such as `AU_CO`, `AU1_CO`, `AU_UN`, `CR_SO`, `CR_AU`, and `SR`. + Patching this file first resolves cascading failures across many caller modules. + +- `couplingmap.py` is a secondary root dependency for bibliographic coupling workflows. + It relies on WoS-style `SR` and `CR` normalization through `metaTagExtraction()`, + `biblionetwork()`, and `histNetwork()`. + Caller-side fixes alone are insufficient if coupling normalization remains WoS-dependent. + + +### metatagextraction.py + +Root dependency file for metadata generation. + +Generated fields: +- SR +- CR_AU +- CR_SO +- AU_CO +- AU1_CO +- AU_UN + +Crash risks: +- KeyError if AU missing +- KeyError if DB missing +- KeyError if JI missing +- IndexError if dataframe empty + +WoS-specific logic: +- Assumes WoS citation formatting in CR +- Assumes WoS affiliation structure in C1/RP +- Uses WoS journal fields JI/SO + +Relevant for ETL: +VERY HIGH + +## couplingmap.py + +### Purpose +Builds bibliographic coupling maps and thematic clustering visualizations for documents, authors, and sources. + +### Core Dependencies +- CR +- SR +- AU +- PY +- TC +- SO + +### WoS-Specific Logic +- Assumes WoS-style `CR` formatting for bibliographic coupling. +- Uses `SR` normalization through `metaTagExtraction()`. +- `histNetwork()` and `biblionetwork()` depend on WoS-compatible references. +- Coupling quality may fail on non-WoS sources unless ETL normalization is applied first. + +### Indirect Dependencies +- metaTagExtraction() +- histNetwork() +- biblionetwork() +- cocMatrix() +- term_extraction() + +### Crash Risks +- Missing `CR` → coupling network generation fails. +- Missing `SR` → merge/join logic breaks. +- Missing `PY` or `TC` → normalizeCitationScore() calculations fail. +- Non-list `AU` values may break explode() operations. +- Empty network matrices cause network_plot() failure. + +### Relevant for ETL +YES + +### Notes +`couplingmap.py` is a secondary root dependency for bibliographic coupling workflows and relies heavily on normalized WoS-compatible references. \ No newline at end of file diff --git a/execution_evidence.ipynb b/execution_evidence.ipynb new file mode 100644 index 000000000..127f76144 --- /dev/null +++ b/execution_evidence.ipynb @@ -0,0 +1,1139 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "558c1015", + "metadata": {}, + "source": [ + "# ETL Pipeline Demo — Bibliometrix Python\n", + "## Advanced Level \n", + "\n", + "This notebook demonstrates the ETL pipeline developed for the Bibliometrix-Python project.\n", + "The pipeline extracts data from OpenAlex and PubMed APIs, transforms it into the WoS standard schema, and validates the output." + ] + }, + { + "cell_type": "markdown", + "id": "59969788", + "metadata": {}, + "source": [ + "---\n", + "## PHASE 1: EXTRACT\n", + "Data is retrieved via REST APIs from OpenAlex and PubMed.\n", + "The `retrieve()` function handles pagination, rate limits, and retries automatically." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1538bd20", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== EXTRACT: OpenAlex ===\n", + "Records retrieved: 10\n", + "Sample raw keys: ['id', 'doi', 'title', 'display_name', 'relevance_score', 'publication_year', 'publication_date', 'ids']\n", + "\n", + "Sample title: Scikit-learn: Machine Learning in Python\n" + ] + } + ], + "source": [ + "from www.services.api_retriever import retrieve\n", + "\n", + "print(\"=== EXTRACT: OpenAlex ===\")\n", + "records_oa = retrieve(query=\"machine learning\", platform=\"openalex\", total=10)\n", + "print(f\"Records retrieved: {len(records_oa)}\")\n", + "print(f\"Sample raw keys: {list(records_oa[0].keys())[:8]}\")\n", + "print(f\"\\nSample title: {records_oa[0].get('title', 'N/A')}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3326a7bb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== EXTRACT: PubMed ===\n", + "Records retrieved: 10\n", + "Sample raw keys: ['uid', 'pubdate', 'epubdate', 'source', 'authors', 'lastauthor', 'title', 'sorttitle']\n", + "\n", + "Sample title: Astrobiology in the Time of Artificial Intelligence.\n" + ] + } + ], + "source": [ + "print(\"=== EXTRACT: PubMed ===\")\n", + "records_pm = retrieve(query=\"machine learning\", platform=\"pubmed\", total=10)\n", + "print(f\"Records retrieved: {len(records_pm)}\")\n", + "print(f\"Sample raw keys: {list(records_pm[0].keys())[:8]}\")\n", + "print(f\"\\nSample title: {records_pm[0].get('title', 'N/A')}\")" + ] + }, + { + "cell_type": "markdown", + "id": "15f16fdf", + "metadata": {}, + "source": [ + "---\n", + "## PHASE 2: TRANSFORM\n", + "Raw API responses are mapped to the WoS standard schema using mapping dictionaries.\n", + "Multi-value fields are cast to `list[str]`, scalar fields to `str`, and `TC` to `int`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c4445edb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== TRANSFORM: OpenAlex ===\n", + "Shape: (10, 26)\n", + "Columns: ['UT', 'DI', 'TI', 'PY', 'LA', 'DT', 'TC', 'SO', 'JI', 'AU', 'AF', 'C1', 'RP', 'AB', 'VL', 'IS', 'BP', 'EP', 'DE', 'AU_CO', 'CR', 'ID', 'PMID', 'DB', 'SR', 'SR_FULL']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AUTIPYSOTCDB
0[Fabián Pedregosa, Gaël Varoquaux, Alexandre G...Scikit-learn: Machine Learning in Python2012ARXIV (CORNELL UNIVERSITY)63735OPENALEX
1[]Genetic algorithms in search, optimization, an...1989CHOICE REVIEWS ONLINE49340OPENALEX
2[J. R. Quinlan]C4.5: Programs for Machine Learning199223702OPENALEX
\n", + "
" + ], + "text/plain": [ + " AU \\\n", + "0 [Fabián Pedregosa, Gaël Varoquaux, Alexandre G... \n", + "1 [] \n", + "2 [J. R. Quinlan] \n", + "\n", + " TI PY \\\n", + "0 Scikit-learn: Machine Learning in Python 2012 \n", + "1 Genetic algorithms in search, optimization, an... 1989 \n", + "2 C4.5: Programs for Machine Learning 1992 \n", + "\n", + " SO TC DB \n", + "0 ARXIV (CORNELL UNIVERSITY) 63735 OPENALEX \n", + "1 CHOICE REVIEWS ONLINE 49340 OPENALEX \n", + "2 23702 OPENALEX " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from www.services.standardizer import standardize\n", + "import pandas as pd\n", + "\n", + "print(\"=== TRANSFORM: OpenAlex ===\")\n", + "df_oa = standardize(records_oa, source=\"openalex\")\n", + "print(f\"Shape: {df_oa.shape}\")\n", + "print(f\"Columns: {df_oa.columns.tolist()}\")\n", + "df_oa[['AU', 'TI', 'PY', 'SO', 'TC', 'DB']].head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9bd22907", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== TRANSFORM: PubMed ===\n", + "Shape: (10, 26)\n", + "Columns: ['UT', 'TI', 'SO', 'JI', 'PY', 'VL', 'IS', 'LA', 'DT', 'RP', 'AU', 'AF', 'DI', 'PMID', 'BP', 'EP', 'CR', 'AB', 'C1', 'AU_CO', 'DE', 'ID', 'TC', 'DB', 'SR', 'SR_FULL']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AUTIPYSOTCDB
0[Scharf C]Astrobiology in the Time of Artificial Intelli...2026Astrobiology0PUBMED
1[Wu H, Wang Y, Song J, Yan B, Wang C, Liu J, H...Expression of the High Affinity Neurotensin Re...2026Annals of surgery0PUBMED
2[Marra JD, Zini G]Next Generation Digital Morphology: Blast Prec...2026International journal of laboratory hematology0PUBMED
\n", + "
" + ], + "text/plain": [ + " AU \\\n", + "0 [Scharf C] \n", + "1 [Wu H, Wang Y, Song J, Yan B, Wang C, Liu J, H... \n", + "2 [Marra JD, Zini G] \n", + "\n", + " TI PY \\\n", + "0 Astrobiology in the Time of Artificial Intelli... 2026 \n", + "1 Expression of the High Affinity Neurotensin Re... 2026 \n", + "2 Next Generation Digital Morphology: Blast Prec... 2026 \n", + "\n", + " SO TC DB \n", + "0 Astrobiology 0 PUBMED \n", + "1 Annals of surgery 0 PUBMED \n", + "2 International journal of laboratory hematology 0 PUBMED " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"=== TRANSFORM: PubMed ===\")\n", + "df_pm = standardize(records_pm, source=\"pubmed\")\n", + "print(f\"Shape: {df_pm.shape}\")\n", + "print(f\"Columns: {df_pm.columns.tolist()}\")\n", + "df_pm[['AU', 'TI', 'PY', 'SO', 'TC', 'DB']].head(3)" + ] + }, + { + "cell_type": "markdown", + "id": "853b67c2", + "metadata": {}, + "source": [ + "### Inspect multi-value fields\n", + "Author keywords (`DE`) and cited references (`CR`) must be `list[str]`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ca057af5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Multi-value fields (OpenAlex) ===\n", + "AU type: \n", + "AU sample: ['Fabián Pedregosa', 'Gaël Varoquaux', 'Alexandre Gramfort', 'Vincent Michel', 'Bertrand Thirion', 'Olivier Grisel', 'Mathieu Blondel', 'Müller, Andreas', 'Nothman, Joel', 'Louppe, Gilles', 'Peter Prettenhofer', 'Ron J. Weiss', 'Vincent Dubourg', 'Jake Vanderplas', 'Alexandre Passos', 'David Cournapeau', 'Matthieu Brucher', 'Matthieu Perrot', 'Édouard Duchesnay']\n", + "\n", + "DE type: \n", + "DE sample: ['Python (programming language)', 'Documentation', 'Computer science', 'MIT License', 'Artificial intelligence', 'Machine learning', 'Programming language', 'License', 'Software engineering', 'Operating system']\n", + "\n", + "CR type: \n", + "CR sample (first 2): ['Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY', 'Friedman J, 2010, PUBMED']\n" + ] + } + ], + "source": [ + "print(\"=== Multi-value fields (OpenAlex) ===\")\n", + "print(f\"AU type: {type(df_oa['AU'].iloc[0])}\")\n", + "print(f\"AU sample: {df_oa['AU'].iloc[0]}\")\n", + "print(f\"\\nDE type: {type(df_oa['DE'].iloc[0])}\")\n", + "print(f\"DE sample: {df_oa['DE'].iloc[0]}\")\n", + "print(f\"\\nCR type: {type(df_oa['CR'].iloc[0])}\")\n", + "print(f\"CR sample (first 2): {df_oa['CR'].iloc[0][:2]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "01fe89ce", + "metadata": {}, + "source": [ + "---\n", + "## PHASE 3: VALIDATE\n", + "The validation module checks:\n", + "1. All mandatory columns exist\n", + "2. No NaN or None values remain\n", + "3. Multi-value columns are correctly typed as lists" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3e2ba653", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== VALIDATE: OpenAlex ===\n", + "Running validation...\n", + " PASS — all mandatory columns present\n", + " PASS — no null values found\n", + " PASS — all column types correct\n", + "Validation passed.\n", + "\n", + "SR sample: Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRDEAB
0Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)[Python (programming language), Documentation,...Scikit-learn is a Python module integrating a ...
1NA, 1989, CHOICE REVIEWS ONLINE[Computer science, Artificial intelligence, Ma...From the Publisher:\\r\\nThis book brings togeth...
2Quinlan J, 1992,[Computer science, Unix, Classifier (UML), Mac...Classifier systems play a major role in machin...
\n", + "
" + ], + "text/plain": [ + " SR \\\n", + "0 Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY) \n", + "1 NA, 1989, CHOICE REVIEWS ONLINE \n", + "2 Quinlan J, 1992, \n", + "\n", + " DE \\\n", + "0 [Python (programming language), Documentation,... \n", + "1 [Computer science, Artificial intelligence, Ma... \n", + "2 [Computer science, Unix, Classifier (UML), Mac... \n", + "\n", + " AB \n", + "0 Scikit-learn is a Python module integrating a ... \n", + "1 From the Publisher:\\r\\nThis book brings togeth... \n", + "2 Classifier systems play a major role in machin... " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from www.services.validator import validate\n", + "\n", + "print(\"=== VALIDATE: OpenAlex ===\")\n", + "df_oa = validate(df_oa)\n", + "print(f\"\\nSR sample: {df_oa['SR'].iloc[0]}\")\n", + "df_oa[['SR', 'DE', 'AB']].head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "28c44964", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== VALIDATE: PubMed ===\n", + "Running validation...\n", + " PASS — all mandatory columns present\n", + " PASS — no null values found\n", + " PASS — all column types correct\n", + "Validation passed.\n", + "\n", + "SR sample: Scharf C, 2026, ASTROBIOLOGY\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRDEAB
0Scharf C, 2026, ASTROBIOLOGY[]
1Wu H, 2026, ANN SURG[]
2Marra J, 2026, INT J LAB HEMATOL[]
\n", + "
" + ], + "text/plain": [ + " SR DE AB\n", + "0 Scharf C, 2026, ASTROBIOLOGY [] \n", + "1 Wu H, 2026, ANN SURG [] \n", + "2 Marra J, 2026, INT J LAB HEMATOL [] " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"=== VALIDATE: PubMed ===\")\n", + "df_pm = validate(df_pm)\n", + "print(f\"\\nSR sample: {df_pm['SR'].iloc[0]}\")\n", + "df_pm[['SR', 'DE', 'AB']].head(3)" + ] + }, + { + "cell_type": "markdown", + "id": "3fc74816", + "metadata": {}, + "source": [ + "---\n", + "## FULL PIPELINE — 200 records\n", + "End-to-end demonstration with 200 records per platform, exported to CSV." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c6956dcb", + "metadata": {}, + "outputs": [], + "source": [ + "from www.services.io_utils import save_standardized_csv, load_standardized_csv, LIST_COLUMNS, STR_COLUMNS\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3b4b5076", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== FULL PIPELINE: OpenAlex (200 records) ===\n", + "Running validation...\n", + " PASS — all mandatory columns present\n", + " PASS — no null values found\n", + " PASS — all column types correct\n", + "Validation passed.\n", + "Shape: (200, 26)\n", + "CSV saved: test_openalex_200.csv\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AUTIPYSOTCSR
0[Fabián Pedregosa, Gaël Varoquaux, Alexandre G...Scikit-learn: Machine Learning in Python2012ARXIV (CORNELL UNIVERSITY)63735Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)
1[]Genetic algorithms in search, optimization, an...1989CHOICE REVIEWS ONLINE49340NA, 1989, CHOICE REVIEWS ONLINE
2[J. R. Quinlan]C4.5: Programs for Machine Learning199223702Quinlan J, 1992,
3[Arthur Asuncion]UCI Machine Learning Repository2007MEDICAL ENTOMOLOGY AND ZOOLOGY24350Asuncion A, 2007, MEDICAL ENTOMOLOGY AND ZOOLOGY
4[Ian H. Witten, Eibe Frank]Data Mining: Practical Machine Learning Tools ...2011ELSEVIER EBOOKS25716Witten I, 2011, ELSEVIER EBOOKS
5[Nasser M. Nasrabadi]Pattern Recognition and Machine Learning2007JOURNAL OF ELECTRONIC IMAGING22083Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING
6[David E. Goldberg]Genetic Algorithms in Search, Optimization and...198817773Goldberg D, 1988,
7[]Proceedings of the 24th international conferen...200711733NA, 2007,
8[Carl Edward Rasmussen, Christopher K. I. Will...Gaussian Processes for Machine Learning2005THE MIT PRESS EBOOKS10489Rasmussen C, 2005, THE MIT PRESS EBOOKS
9[Kevin P. Murphy]Machine learning a probabilistic perspective20129328Murphy K, 2012,
\n", + "
" + ], + "text/plain": [ + " AU \\\n", + "0 [Fabián Pedregosa, Gaël Varoquaux, Alexandre G... \n", + "1 [] \n", + "2 [J. R. Quinlan] \n", + "3 [Arthur Asuncion] \n", + "4 [Ian H. Witten, Eibe Frank] \n", + "5 [Nasser M. Nasrabadi] \n", + "6 [David E. Goldberg] \n", + "7 [] \n", + "8 [Carl Edward Rasmussen, Christopher K. I. Will... \n", + "9 [Kevin P. Murphy] \n", + "\n", + " TI PY \\\n", + "0 Scikit-learn: Machine Learning in Python 2012 \n", + "1 Genetic algorithms in search, optimization, an... 1989 \n", + "2 C4.5: Programs for Machine Learning 1992 \n", + "3 UCI Machine Learning Repository 2007 \n", + "4 Data Mining: Practical Machine Learning Tools ... 2011 \n", + "5 Pattern Recognition and Machine Learning 2007 \n", + "6 Genetic Algorithms in Search, Optimization and... 1988 \n", + "7 Proceedings of the 24th international conferen... 2007 \n", + "8 Gaussian Processes for Machine Learning 2005 \n", + "9 Machine learning a probabilistic perspective 2012 \n", + "\n", + " SO TC \\\n", + "0 ARXIV (CORNELL UNIVERSITY) 63735 \n", + "1 CHOICE REVIEWS ONLINE 49340 \n", + "2 23702 \n", + "3 MEDICAL ENTOMOLOGY AND ZOOLOGY 24350 \n", + "4 ELSEVIER EBOOKS 25716 \n", + "5 JOURNAL OF ELECTRONIC IMAGING 22083 \n", + "6 17773 \n", + "7 11733 \n", + "8 THE MIT PRESS EBOOKS 10489 \n", + "9 9328 \n", + "\n", + " SR \n", + "0 Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY) \n", + "1 NA, 1989, CHOICE REVIEWS ONLINE \n", + "2 Quinlan J, 1992, \n", + "3 Asuncion A, 2007, MEDICAL ENTOMOLOGY AND ZOOLOGY \n", + "4 Witten I, 2011, ELSEVIER EBOOKS \n", + "5 Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING \n", + "6 Goldberg D, 1988, \n", + "7 NA, 2007, \n", + "8 Rasmussen C, 2005, THE MIT PRESS EBOOKS \n", + "9 Murphy K, 2012, " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"=== FULL PIPELINE: OpenAlex (200 records) ===\")\n", + "records_oa_200 = retrieve(query=\"machine learning\", platform=\"openalex\", total=200)\n", + "df_oa_200 = standardize(records_oa_200, source=\"openalex\")\n", + "df_oa_200 = validate(df_oa_200)\n", + "save_standardized_csv(df_oa_200, \"test_openalex_200.csv\") # \";\" delimiter for multi-value fields, as required by the spec\n", + "print(f\"Shape: {df_oa_200.shape}\")\n", + "print(\"CSV saved: test_openalex_200.csv\")\n", + "df_oa_200[['AU', 'TI', 'PY', 'SO', 'TC', 'SR']].head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "888dab61", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== FULL PIPELINE: PubMed (200 records) ===\n", + "Running validation...\n", + " PASS — all mandatory columns present\n", + " PASS — no null values found\n", + " PASS — all column types correct\n", + "Validation passed.\n", + "Shape: (200, 26)\n", + "CSV saved: test_pubmed_200.csv\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AUTIPYSOTCSR
0[Figueroa-Quiñones J, Ipanaque-Neyra J, Gómez ...Development, validation and use of artificial-...2023F1000Research0Figueroa-Quiñones J, 2023, F1000RES
1[de Mattos BP, Mattjie C, Ravazio R, Barros RC...Craving for a Robust Methodology: A Systematic...2026International journal of mental health and add...0de Mattos B, 2026, INT J MENT HEALTH ADDICT
2[Kuang A, Yu Y, Siddique J, Scholtens D]Imputation of Missing Continuous Glucose Monit...2026Journal of diabetes science and technology0Kuang A, 2026, J DIABETES SCI TECHNOL
3[Marsico C, Renteria C, Grimm JR, Fernandez-Ar...A Machine Learning Approach to Quantitative An...2025Small structures0Marsico C, 2025, SMALL STRUCT
4[Upadhyay K, Fuhg JN, Bouklas N, Ramesh KT]Physics-informed data-driven discovery of cons...2026Computational mechanics0Upadhyay K, 2026, COMPUT MECH
5[Zhan Z, Zhou S, Deng J, Zhang R]Improving electronic health record processing ...2024AMIA ... Annual Symposium proceedings. AMIA Sy...0Zhan Z, 2024, AMIA ANNU SYMP PROC
6[Xie Y, Cui H, Zhang Z, Lu J, Shu K, Nahab F, ...KERAP: A Knowledge-Enhanced Reasoning Approach...2024AMIA ... Annual Symposium proceedings. AMIA Sy...0Xie Y, 2024, AMIA ANNU SYMP PROC
7[Sivarajkumar S, Ameri K, Li C, Wang Y, Jiang M]Automating Adjudication of Cardiovascular Even...2024AMIA ... Annual Symposium proceedings. AMIA Sy...0Sivarajkumar S, 2024, AMIA ANNU SYMP PROC
8[Wang M, Kuan YH, Alba PR, Gan Q, Schoen MW, T...Developing Large Language Model-based Pipeline...2024AMIA ... Annual Symposium proceedings. AMIA Sy...0Wang M, 2024, AMIA ANNU SYMP PROC
9[Nguyen QN, Wu H, Pontikos N, Wang SY]Addressing Generalizability in Clinical Named ...2024AMIA ... Annual Symposium proceedings. AMIA Sy...0Nguyen Q, 2024, AMIA ANNU SYMP PROC
\n", + "
" + ], + "text/plain": [ + " AU \\\n", + "0 [Figueroa-Quiñones J, Ipanaque-Neyra J, Gómez ... \n", + "1 [de Mattos BP, Mattjie C, Ravazio R, Barros RC... \n", + "2 [Kuang A, Yu Y, Siddique J, Scholtens D] \n", + "3 [Marsico C, Renteria C, Grimm JR, Fernandez-Ar... \n", + "4 [Upadhyay K, Fuhg JN, Bouklas N, Ramesh KT] \n", + "5 [Zhan Z, Zhou S, Deng J, Zhang R] \n", + "6 [Xie Y, Cui H, Zhang Z, Lu J, Shu K, Nahab F, ... \n", + "7 [Sivarajkumar S, Ameri K, Li C, Wang Y, Jiang M] \n", + "8 [Wang M, Kuan YH, Alba PR, Gan Q, Schoen MW, T... \n", + "9 [Nguyen QN, Wu H, Pontikos N, Wang SY] \n", + "\n", + " TI PY \\\n", + "0 Development, validation and use of artificial-... 2023 \n", + "1 Craving for a Robust Methodology: A Systematic... 2026 \n", + "2 Imputation of Missing Continuous Glucose Monit... 2026 \n", + "3 A Machine Learning Approach to Quantitative An... 2025 \n", + "4 Physics-informed data-driven discovery of cons... 2026 \n", + "5 Improving electronic health record processing ... 2024 \n", + "6 KERAP: A Knowledge-Enhanced Reasoning Approach... 2024 \n", + "7 Automating Adjudication of Cardiovascular Even... 2024 \n", + "8 Developing Large Language Model-based Pipeline... 2024 \n", + "9 Addressing Generalizability in Clinical Named ... 2024 \n", + "\n", + " SO TC \\\n", + "0 F1000Research 0 \n", + "1 International journal of mental health and add... 0 \n", + "2 Journal of diabetes science and technology 0 \n", + "3 Small structures 0 \n", + "4 Computational mechanics 0 \n", + "5 AMIA ... Annual Symposium proceedings. AMIA Sy... 0 \n", + "6 AMIA ... Annual Symposium proceedings. AMIA Sy... 0 \n", + "7 AMIA ... Annual Symposium proceedings. AMIA Sy... 0 \n", + "8 AMIA ... Annual Symposium proceedings. AMIA Sy... 0 \n", + "9 AMIA ... Annual Symposium proceedings. AMIA Sy... 0 \n", + "\n", + " SR \n", + "0 Figueroa-Quiñones J, 2023, F1000RES \n", + "1 de Mattos B, 2026, INT J MENT HEALTH ADDICT \n", + "2 Kuang A, 2026, J DIABETES SCI TECHNOL \n", + "3 Marsico C, 2025, SMALL STRUCT \n", + "4 Upadhyay K, 2026, COMPUT MECH \n", + "5 Zhan Z, 2024, AMIA ANNU SYMP PROC \n", + "6 Xie Y, 2024, AMIA ANNU SYMP PROC \n", + "7 Sivarajkumar S, 2024, AMIA ANNU SYMP PROC \n", + "8 Wang M, 2024, AMIA ANNU SYMP PROC \n", + "9 Nguyen Q, 2024, AMIA ANNU SYMP PROC " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"=== FULL PIPELINE: PubMed (200 records) ===\")\n", + "records_pm_200 = retrieve(query=\"machine learning\", platform=\"pubmed\", total=200, mindate=\"2015\", maxdate=\"2024\")\n", + "df_pm_200 = standardize(records_pm_200, source=\"pubmed\")\n", + "df_pm_200 = validate(df_pm_200)\n", + "save_standardized_csv(df_pm_200, \"test_pubmed_200.csv\") # \";\" delimiter for multi-value fields, as required by the spec\n", + "print(f\"Shape: {df_pm_200.shape}\")\n", + "print(\"CSV saved: test_pubmed_200.csv\")\n", + "df_pm_200[['AU', 'TI', 'PY', 'SO', 'TC', 'SR']].head(10)\n" + ] + }, + { + "cell_type": "markdown", + "id": "da1a6ba6", + "metadata": {}, + "source": [ + "---\n", + "## PHASE 4: CSV ROUND-TRIP INTEGRITY\n", + "The CSVs just saved are now reloaded from disk to demonstrate that the type contract\n", + "survives a round trip through a file (e.g. when the CSV is manually re-imported into the\n", + "dashboard via the \"Import Raw Data\" tab).\n", + "\n", + "Without this safeguard, `pandas.read_csv()` would treat empty cells as `NaN` instead\n", + "of as an empty string `\"\"`, and the list columns (`AU`, `CR`, `DE`, ...) would come back\n", + "as plain strings instead of `list[str]` — silently breaking the type contract\n", + "enforced by `standardize()` and `validate()`.\n", + "\n", + "**Note:** this does not affect the live flow (API Query → dashboard), which always stays\n", + "in memory and never goes through a CSV file — it only concerns the standalone CSV\n", + "required as a separate deliverable by the spec (\"Provide in output a standardized CSV file\").\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0bee00f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== ROUND-TRIP: OpenAlex CSV reloaded from disk ===\n", + "AU type: | sample: ['Fabián Pedregosa', 'Gaël Varoquaux']\n", + "PMID type: | value: ''\n", + "TC type: \n", + "PY type: | value: 2012\n", + "Running validation...\n", + " PASS — all mandatory columns present\n", + " PASS — no null values found\n", + " PASS — all column types correct\n", + "Validation passed.\n" + ] + } + ], + "source": [ + "print(\"=== ROUND-TRIP: OpenAlex CSV reloaded from disk ===\")\n", + "df_oa_reloaded = load_standardized_csv(\"test_openalex_200.csv\")\n", + "print(f\"AU type: {type(df_oa_reloaded['AU'].iloc[0])} | sample: {df_oa_reloaded['AU'].iloc[0][:2]}\")\n", + "print(f\"PMID type: {type(df_oa_reloaded['PMID'].iloc[0])} | value: {repr(df_oa_reloaded['PMID'].iloc[0])}\")\n", + "print(f\"TC type: {type(df_oa_reloaded['TC'].iloc[0])}\")\n", + "print(f\"PY type: {type(df_oa_reloaded['PY'].iloc[0])} | value: {df_oa_reloaded['PY'].iloc[0]}\")\n", + "df_oa_reloaded = validate(df_oa_reloaded)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dfa9d6a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== ROUND-TRIP: PubMed CSV reloaded from disk ===\n", + "AU type: \n", + "UT type: \n", + "PMID type: \n", + "Running validation...\n", + " PASS — all mandatory columns present\n", + " PASS — no null values found\n", + " PASS — all column types correct\n", + "Validation passed.\n" + ] + } + ], + "source": [ + "print(\"=== ROUND-TRIP: PubMed CSV reloaded from disk ===\")\n", + "df_pm_reloaded = load_standardized_csv(\"test_pubmed_200.csv\")\n", + "\n", + "print(f\"AU type: {type(df_pm_reloaded['AU'].iloc[0])}\")\n", + "print(f\"UT type: {type(df_pm_reloaded['UT'].iloc[0])}\")\n", + "print(f\"PMID type: {type(df_pm_reloaded['PMID'].iloc[0])}\")\n", + "\n", + "df_pm_reloaded = validate(df_pm_reloaded)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "26c84af5-93c4-484e-bbb7-c6e1704b6bc7", + "metadata": {}, + "outputs": [], + "source": [ + "df_oa_200.to_excel(\"test_openalex_200.xlsx\", index=False) \n", + "\n", + "df_pm_200.to_excel(\"test_pubmed_200.xlsx\", index=False) \n" + ] + }, + { + "cell_type": "markdown", + "id": "d6ab48bd", + "metadata": {}, + "source": [ + "---\n", + "## Summary\n", + "\n", + "| Platform | Records | Columns | NaN | SR | CSV round-trip |\n", + "|----------|---------|---------|-----|----|----------------|\n", + "| OpenAlex | 200 | 26 | 0 | ✅ | ✅ |\n", + "| PubMed | 200 | 26 | 0 | ✅ | ✅ |\n", + "\n", + "The ETL pipeline successfully:\n", + "- Extracted data from OpenAlex and PubMed REST APIs\n", + "- Transformed raw JSON into the WoS standard schema\n", + "- Enforced type contracts (list[str], str, int)\n", + "- Validated all mandatory columns\n", + "- Generated standardized CSV files ready for Bibliometrix-Python analysis\n", + "- Verified that the type contract survives a full save-to-disk / reload-from-disk cycle via `load_standardized_csv()`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aed88cb2-e274-4166-98bc-c7800f6fba8e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/functions/get_affiliationproductionovertime.py b/functions/get_affiliationproductionovertime.py index e1b87f583..3d2f33ca7 100644 --- a/functions/get_affiliationproductionovertime.py +++ b/functions/get_affiliationproductionovertime.py @@ -12,9 +12,20 @@ def get_affiliation_production_over_time(df, top_k_affiliations): Returns: A Plotly figure object representing the affiliation's production over time. """ - data = df.get() + # PATCH: AU_UN is a derived field — must be extracted before use. + # Without this call, AU_UN is missing and the function crashes. + df = metaTagExtraction(df, Field="AU_UN") - AFF = data["AU_UN"].dropna().apply(lambda x: [aff for aff in x if aff.strip() != ""]) + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + data = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if data is None or data.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # PATCH: AU_UN may be missing even after extraction (e.g. no affiliations in data) + if "AU_UN" not in data.columns: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + AFF = data["AU_UN"].dropna().apply(lambda x: [aff for aff in x if aff.strip() != ""] if isinstance(x, list) else [s.strip() for s in str(x).split(";") if s.strip()]) nAFF = [len(aff) for aff in AFF] affiliations = [aff for sublist in AFF for aff in sublist] @@ -24,6 +35,10 @@ def get_affiliation_production_over_time(df, top_k_affiliations): "Year": years }).query('Affiliation != "NA"').dropna(subset=["Affiliation", "Year"]) + # PATCH: safety check if AFFY is empty after filtering + if AFFY.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + AFFY = AFFY.groupby(["Affiliation", "Year"]).size().reset_index(name="Articles") AFFY = AFFY.pivot(index="Affiliation", columns="Year", values="Articles").fillna(0) AFFY = AFFY.stack().reset_index(name="Articles") @@ -34,7 +49,10 @@ def get_affiliation_production_over_time(df, top_k_affiliations): AffOverTime = AFFY[AFFY["Affiliation"].isin(Affselected["Affiliation"])] AffOverTime["Year"] = AffOverTime["Year"].astype(int) - # Create the plot + # PATCH: safety check if AffOverTime is empty + if AffOverTime.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + fig = px.line( AffOverTime, x="Year", @@ -43,7 +61,6 @@ def get_affiliation_production_over_time(df, top_k_affiliations): labels={"Year": "Year", "Articles": "Cumulative Articles", "Affiliation": "Affiliation"}, ) - # Customize the layout fig.update_layout( xaxis=dict( tickmode='array', @@ -67,11 +84,10 @@ def get_affiliation_production_over_time(df, top_k_affiliations): ) ) - # Customize the grid fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF') fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF') fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - return fig, AffOverTime + return fig, AffOverTime \ No newline at end of file diff --git a/functions/get_annualproduction.py b/functions/get_annualproduction.py index dd27105c2..6a1aeddc8 100644 --- a/functions/get_annualproduction.py +++ b/functions/get_annualproduction.py @@ -11,7 +11,8 @@ def get_annual_production(df): Returns: A Plotly figure object representing the annual scientific production. """ - data = df.get() + data = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + data["PY"] = pd.to_numeric(data["PY"], errors="coerce").fillna(0).astype(int) # PATCH: ensure PY is int # Calculate the number of publications per year publications_per_year = data["PY"].value_counts().sort_index().reset_index() @@ -22,7 +23,7 @@ def get_annual_production(df): max_year = publications_per_year["Year"].max() # Ensure all years in the range are present - all_years = pd.DataFrame({"Year": range(min_year, max_year + 1)}) + all_years = pd.DataFrame({"Year": range(int(min_year), int(max_year) + 1)}) publications_per_year = all_years.merge(publications_per_year, on="Year", how="left").fillna(0) # Create the plot diff --git a/functions/get_authorlocalimpact.py b/functions/get_authorlocalimpact.py index 74a68e263..491e4bdff 100644 --- a/functions/get_authorlocalimpact.py +++ b/functions/get_authorlocalimpact.py @@ -13,7 +13,13 @@ def get_authors_local_impact(df, num_of_authors_local_impact, author_local_impac Returns: A Plotly figure object and a DataFrame of the most impactful sources. """ - df = df.get() + # PATCH: original code called df.get() without arguments, which crashes + # on a pandas DataFrame because pandas .get() requires a column name. + # Fixed by checking isinstance(df, pd.DataFrame) first. + if isinstance(df, pd.DataFrame): + df = df.copy() + else: + df = df.get() today = pd.Timestamp.now().year # Ensure 'TC' and 'PY' are numeric diff --git a/functions/get_authorproductionovertime.py b/functions/get_authorproductionovertime.py index 65edaca96..e435faa75 100644 --- a/functions/get_authorproductionovertime.py +++ b/functions/get_authorproductionovertime.py @@ -16,7 +16,13 @@ def get_author_production_over_time(df, top_k_authors): table_authors_production (pd.DataFrame): Table summarizing authors' production with TC and TCpY. table_documents (pd.DataFrame): Detailed table with additional document information. """ - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes + # on a pandas DataFrame because pandas .get() requires a column name. + # Fixed by checking isinstance(df, pd.DataFrame) first. + if isinstance(df, pd.DataFrame): + data = df.copy() + else: + data = df.get() # Ensure "PY" is numeric data["PY"] = pd.to_numeric(data["PY"], errors="coerce") diff --git a/functions/get_averagecitations.py b/functions/get_averagecitations.py index d752aa9b7..135d9a341 100644 --- a/functions/get_averagecitations.py +++ b/functions/get_averagecitations.py @@ -11,7 +11,13 @@ def get_average_citations(df): Returns: A Plotly figure object representing the average citations per year. """ - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes + # on a pandas DataFrame because pandas .get() requires a column name. + # Fixed by checking isinstance(df, pd.DataFrame) first. + if isinstance(df, pd.DataFrame): + data = df.copy() + else: + data = df.get() # Calculate the current year current_year = pd.Timestamp.now().year + 1 @@ -23,6 +29,11 @@ def get_average_citations(df): ).reset_index() # Calculate mean total citations per year and citable years + + # PATCH: PY column is stored as string in the standardized DataFrame + # but this line requires arithmetic subtraction which needs integers. + # Fixed by converting PY to numeric before the calculation. + table["PY"] = pd.to_numeric(table["PY"], errors="coerce") table["MeanTCperYear"] = round(table["MeanTCperArt"] / (current_year - table["PY"]), 2) table["CitableYears"] = current_year - table["PY"] table = table.dropna().rename(columns={"PY": "Year"}) diff --git a/functions/get_bradfordlaw.py b/functions/get_bradfordlaw.py index 86580591f..3a1a3cc26 100644 --- a/functions/get_bradfordlaw.py +++ b/functions/get_bradfordlaw.py @@ -12,7 +12,13 @@ def get_bradford_law(df): A Plotly figure object and a DataFrame of the Bradford's Law zones. """ # Sort data by frequency of occurrence (equivalent to R's sort(table(M$SO), decreasing = TRUE)) - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes + # on a pandas DataFrame because pandas .get() requires a column name. + # Fixed by checking isinstance(df, pd.DataFrame) first. + if isinstance(df, pd.DataFrame): + data = df.copy() + else: + data = df.get() source_counts = data["SO"].value_counts() # Total number of sources diff --git a/functions/get_citedcountries.py b/functions/get_citedcountries.py index ac95a8d0c..b55523f8c 100644 --- a/functions/get_citedcountries.py +++ b/functions/get_citedcountries.py @@ -15,7 +15,14 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure): """ # Extract metadata tags for cited countries df = metaTagExtraction(df, "AU1_CO") - df = df.get() + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + df = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if df is None or df.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # PATCH: filter rows where AU1_CO is an empty string (common with OpenAlex/PubMed + # records that lack affiliation data). dropna alone does not catch empty strings. + df = df[df["AU1_CO"].str.strip() != ""] # Prepare the table for ranking countries tab = ( @@ -99,6 +106,12 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure): ) # Set x-axis ticks + # PATCH: if no countries found (e.g. PubMed has no affiliation data), + # x_values will be empty and max_x will be NaN, causing int() conversion + # to crash. Return empty figure instead. + if x_values.empty or pd.isna(x_values.max()): + return go.FigureWidget(go.Figure()), pd.DataFrame() + max_x = x_values.max() tick_step = 5 if max_x <= 50 else int(max_x // 10) or 1 x_ticks = list(range(0, int(max_x) + tick_step, tick_step)) @@ -139,4 +152,4 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure): fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - return fig, table + return fig, table \ No newline at end of file diff --git a/functions/get_citeddocuments.py b/functions/get_citeddocuments.py index 14491f74a..b0668e4c4 100644 --- a/functions/get_citeddocuments.py +++ b/functions/get_citeddocuments.py @@ -1,116 +1,261 @@ + from www.services import * def get_cited_documents(df, num_of_cited_docs, cited_docs_measure): """ Generate a plot and table of the most cited documents. - - Args: - df: A DataFrame object containing the data. - num_of_cited_docs: The number of top cited documents to display. - cited_docs_measure: The measure to use for ranking (either "TC" for total citations or "TCperYear" for citations per year). - - Returns: - A Plotly figure object and a DataFrame of the most cited documents. """ - # Extract metadata tags for cited documents - df = metaTagExtraction(df, "SR") - df = df.get() - # Prepare the table for ranking documents + # SAFETY CHECK + if df is None: + return None, pd.DataFrame() + + # EXTRACT SR + _df = df.get() if hasattr(df, 'get') and not isinstance(df, pd.DataFrame) else df + if 'SR' not in _df.columns or _df['SR'].eq('').all(): + df = metaTagExtraction(df, "SR") + + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() + + # EMPTY CHECK + if data is None or data.empty: + return None, pd.DataFrame() + + # REQUIRED COLUMNS + required_cols = ["SR", "TC", "PY"] + + for col in required_cols: + + if col not in data.columns: + + if col in ["TC", "PY"]: + data[col] = 0 + else: + data[col] = "" + + # OPTIONAL COLUMN + if "DI" not in data.columns: + data["DI"] = "" + + # SAFE NUMERIC CONVERSION + data["TC"] = pd.to_numeric( + data["TC"], + errors="coerce" + ).fillna(0) + + data["PY"] = pd.to_numeric( + data["PY"], + errors="coerce" + ) + + # CURRENT YEAR current_year = pd.to_datetime("today").year - df["TCperYear"] = df["TC"] / (current_year + 1 - df["PY"]) - - # Calculate NormalizedTC within each publication year - df["NormalizedTC"] = df.groupby("PY")["TC"].transform(lambda x: x / x.mean()).round(2) - + + # PREVENT DIVISION BY ZERO + data["TCperYear"] = data.apply( + lambda row: + row["TC"] / max((current_year + 1 - row["PY"]), 1) + if pd.notna(row["PY"]) + else 0, + axis=1 + ) + + # SAFE NORMALIZATION + data["NormalizedTC"] = data.groupby("PY")["TC"].transform( + lambda x: + (x / x.mean()).round(2) + if x.mean() not in [0, np.nan] + else 0 + ) + + # CLEAN SR + data["SR"] = ( + data["SR"] + .fillna("") + .astype(str) + .str.strip() + ) + + # BUILD TABLE tab = ( - df.reset_index(drop=True).dropna(subset=["SR"]) + data.reset_index(drop=True) + .dropna(subset=["SR"]) + .query("SR != ''") .groupby("SR", as_index=False) - .agg(DI=("DI", "first"), TotalCitation=("TC", "sum"), TCperYear=("TCperYear", lambda x: round(x.sum(), 1)), NormalizedTC=("NormalizedTC", "sum")) + .agg( + DI=("DI", "first"), + TotalCitation=("TC", "sum"), + TCperYear=("TCperYear", lambda x: round(x.sum(), 1)), + NormalizedTC=("NormalizedTC", "sum") + ) .rename(columns={"SR": "Document"}) .sort_values(by="TotalCitation", ascending=False) ) - # Convert columns to numeric to ensure correct calculations - tab["TotalCitation"] = pd.to_numeric(tab["TotalCitation"]) - tab["TCperYear"] = pd.to_numeric(tab["TCperYear"]) - tab["NormalizedTC"] = pd.to_numeric(tab["NormalizedTC"]) - tab = tab.sort_values(by="TotalCitation", ascending=False) - table = tab + # EMPTY CHECK + if tab.empty: + return None, pd.DataFrame() + + # SAFE NUMERIC CONVERSION + for col in ["TotalCitation", "TCperYear", "NormalizedTC"]: + + tab[col] = pd.to_numeric( + tab[col], + errors="coerce" + ).fillna(0) + + table = tab.copy() + + # LIMIT RESULTS + num_of_cited_docs = min( + int(num_of_cited_docs), + len(tab) + ) + tab = tab.head(num_of_cited_docs) - # Select the appropriate measure based on user input + # MEASURE SELECTION if cited_docs_measure == "total_cit": - tab = tab[["Document", "TotalCitation", "NormalizedTC"]] + + tab = tab[ + ["Document", "TotalCitation", "NormalizedTC"] + ] + laby = "Global Citations" + else: - tab = tab.sort_values(by="TCperYear", ascending=False)[["Document", "TCperYear", "NormalizedTC"]] + + tab = ( + tab.sort_values( + by="TCperYear", + ascending=False + )[ + ["Document", "TCperYear", "NormalizedTC"] + ] + ) + laby = "Global Citations per Year" - # Create the plot (horizontal scatter with lines, similar to author plot) + # EMPTY CHECK + if tab.empty: + return None, table + + # PLOT fig = go.Figure() - # Prepare y-ticks and labels y_labels = tab["Document"] y_vals = list(range(len(tab))) - # Add a thick line from each label to its marker + metric_col = tab.columns[1] + + # SAFE MAX VALUE + max_metric = max( + tab[metric_col].max(), + 1 + ) + + # SHAPES for i, row in enumerate(tab.itertuples()): + fig.add_shape( type="line", x0=0, - x1=getattr(row, tab.columns[1]), + x1=getattr(row, metric_col), y0=i, y1=i, - line=dict(color="#e0e0e0", width=5), + line=dict( + color="#e0e0e0", + width=5 + ), layer="below", ) - # Add scatter markers and text + # SCATTER fig.add_trace( + go.Scatter( - x=tab[tab.columns[1]], + x=tab[metric_col], y=y_vals, + mode="markers+text", + marker=dict( - size=18 + 6 * (tab[tab.columns[1]] / tab[tab.columns[1]].max()), - color=tab[tab.columns[1]], - colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]], - line=dict(width=1, color="#E0E0E0"), + size=18 + 6 * ( + tab[metric_col] / max_metric + ), + + color=tab[metric_col], + + colorscale=[ + [0, "#B3D1F2"], + [1, "#5567BB"] + ], + + line=dict( + width=1, + color="#E0E0E0" + ), + opacity=0.95, showscale=False, ), - text=tab[tab.columns[1]], + + text=tab[metric_col], + textposition="top center", - textfont=dict(color="#5567BB", size=13), + + textfont=dict( + color="#5567BB", + size=13 + ), + hovertemplate=( "Document: %{customdata}
" "" + laby + ": %{x}" ), + customdata=tab["Document"], ) ) - # Add horizontal grid lines for each document (lighter) + # GRID LINES for i in range(len(tab)): + fig.add_shape( type="line", x0=0, - x1=tab[tab.columns[1]].max(), + x1=max_metric, y0=i, y1=i, - line=dict(color="#E0E0E0", width=2), + line=dict( + color="#E0E0E0", + width=2 + ), layer="below", ) - # Set x-axis ticks - max_x = tab[tab.columns[1]].max() - tick_step = max(1, int(max_x // 6)) - x_ticks = list(range(0, int(max_x) + tick_step, tick_step)) - if x_ticks[-1] < max_x: - x_ticks.append(int(max_x)) + # X TICKS + tick_step = max( + 1, + int(max_metric // 6) + ) + + x_ticks = list( + range( + 0, + int(max_metric) + tick_step, + tick_step + ) + ) + + if len(x_ticks) == 0: + x_ticks = [0] + # AXES fig.update_yaxes( tickvals=y_vals, ticktext=y_labels, @@ -119,6 +264,7 @@ def get_cited_documents(df, num_of_cited_docs, cited_docs_measure): title="Document", tickfont=dict(size=13), ) + fig.update_xaxes( showgrid=True, gridcolor="#F0F0F0", @@ -127,22 +273,51 @@ def get_cited_documents(df, num_of_cited_docs, cited_docs_measure): title=laby, tickfont=dict(size=13), ) + + # LAYOUT fig.update_layout( plot_bgcolor='white', - font=dict(color="#222222", size=14, family="Segoe UI, Arial"), - margin=dict(l=0, r=0, t=0, b=0), - height=50 + 90 * len(tab), + + font=dict( + color="#222222", + size=14, + family="Segoe UI, Arial" + ), + + margin=dict( + l=0, + r=0, + t=0, + b=0 + ), + + height=max( + 400, + 50 + 90 * len(tab) + ), + showlegend=False, + hoverlabel=dict( bgcolor="white", font_size=13, font_family="Segoe UI, Arial", bordercolor="#5567BB" ), + coloraxis_showscale=False, ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - - return fig, table + + fig._config = fig._config | { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + + return fig, table \ No newline at end of file diff --git a/functions/get_clusteringcoupling.py b/functions/get_clusteringcoupling.py index 8263a46b3..43e46b08c 100644 --- a/functions/get_clusteringcoupling.py +++ b/functions/get_clusteringcoupling.py @@ -24,6 +24,12 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im clustering=clustering_algorithm ) + # PATCH: couplingMap returns None when network is empty + # (e.g. OpenAlex URL-based references produce no valid coupling matrix) + if coupling_map is None: + empty_fig = go.FigureWidget(go.Figure()) + return empty_fig, "", pd.DataFrame(), pd.DataFrame() + ### Plotting the coupling map fig = coupling_map['map'] fig.update_layout( @@ -31,7 +37,7 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im autosize=True, width=None, showlegend=True, - margin=dict(t=20) # aggiunge spazio bianco sopra + margin=dict(t=20) ) fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], @@ -45,59 +51,42 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im if not isinstance(graph, ig.Graph): raise TypeError("Expected an igraph.Graph object in 'couplingMap['net']['graph'].") - # Initialize Pyvis network with matching R settings net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line") net.toggle_physics(False) - # Use colors from df['adjusted_color'] unique_clusters = set(coupling_map['net']['cluster_obj'].membership) cluster_colors = {} - # Get unique cluster IDs and their colors for i, cluster_id in enumerate(unique_clusters): if i < len(cm_clusters): - # Get hex color from DataFrame and convert to rgba hex_color = cm_clusters['adjusted_color'].iloc[i] - # Convert hex to rgb format hex_color = hex_color.lstrip('#') rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) cluster_colors[cluster_id] = f"rgba({rgb[0]},{rgb[1]},{rgb[2]},0.3)" else: - # Fallback for any additional clusters - cluster_colors[cluster_id] = f"rgba(128,128,128,0.3)" # Default gray + cluster_colors[cluster_id] = f"rgba(128,128,128,0.3)" - # Generate layout layout = graph.layout_fruchterman_reingold() - # Get coordinates from layout coords = np.array([[pos[0], pos[1]] for pos in layout]) - # Scale coordinates to fit 800px height - # First normalize to [-1,1] range coords = coords / np.abs(coords).max() - - # Then scale to target dimensions - # Width will be proportional to maintain aspect ratio - coords[:, 0] *= 1000 # Scale x coordinates - coords[:, 1] *= 400 # Scale y coordinates to fit 800px (centered) + coords[:, 0] *= 1000 + coords[:, 1] *= 400 - # Prepare for avoid_net_overlaps node_labels = [v["name"] if "name" in v.attributes() else f"Node {v.index}" for v in graph.vs] node_sizes = [] nodes = [] - # Add nodes with matching R visNetwork settings for idx, vertex in enumerate(graph.vs): cluster_id = coupling_map['net']['cluster_obj'].membership[vertex.index] node_color = cluster_colors[cluster_id] - # Normalize node sizes min_deg, max_deg = min(graph.degree()), max(graph.degree()) node_size = 30 if max_deg == min_deg else (35 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 30) node_size = max(30, min(150, node_size)) font_size = node_size * 2.5 node_sizes.append(node_size) - # Set font opacity based on node size if font_size < 90: font_opacity = 0.4 elif 90 <= font_size < 100: @@ -106,12 +95,6 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im font_opacity = 0.8 else: font_opacity = 1.0 - - # Calculate font opacity using R-like formula - # min_font_size = 80 # Minimum node size - # max_font_size = 150 # Maximum node size - # font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size)) - # font_opacity = max(0.1, min(1, font_opacity)) # Clamp between 0.3 and 0.8 nodes.append({ 'id': vertex.index, @@ -130,28 +113,22 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im 'y': layout[idx][1] * 1000 }) - # Remove overlapping labels noOverlap = True if noOverlap: threshold = 0.05 - ymax = np.ptp(coords[:, 1]) # equivalent to diff(range()) + ymax = np.ptp(coords[:, 1]) xmax = np.ptp(coords[:, 0]) threshold2 = threshold * np.mean([xmax, ymax]) - - # Create data structure for overlap checking labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=threshold2) else: labels_to_remove = [] - #labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=0.05) - # Add nodes to network unique_nodes = {node['id']: node for node in nodes}.values() for node in unique_nodes: if node['label'] in labels_to_remove: node['label'] = '' net.add_node(node['id'], **node) - # Add edges with improved styling matching R implementation added_edges = set() edge_weights = [e.attributes().get('weight', 1) for e in graph.es] max_weight = max(edge_weights) if edge_weights else 1 @@ -161,23 +138,18 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im cluster_source = coupling_map['net']['cluster_obj'].membership[source] cluster_target = coupling_map['net']['cluster_obj'].membership[target] - # Set edge color with proper opacity if cluster_source == cluster_target: base_color = cluster_colors[cluster_source] - # Convert rgba to hex with opacity rgba_values = [int(x) for x in base_color[5:-1].split(',')[:-1]] edge_color = f"rgba({rgba_values[0]},{rgba_values[1]},{rgba_values[2]},0.56)" else: - # Use darker gray for inter-cluster edges (equivalent to #69696960 in R) edge_color = "rgba(105,105,105,0.38)" - # Calculate edge width similar to R implementation edge_weight = edge.attributes().get('weight', 1) - normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) # 2.5 is base edge size + normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) edge_tuple = (source, target) if source < target else (target, source) - # Add edge if not already added if edge_tuple not in added_edges: net.add_edge( source, @@ -185,11 +157,10 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im color=edge_color, width=normalized_weight, smooth={'type': 'horizontal'}, - dashes=False # Set to True if you have line type information + dashes=False ) added_edges.add(edge_tuple) - # Configure network options to match R visNetwork net.set_options(""" var options = { "nodes": { @@ -213,7 +184,6 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im } """) - # Save network to HTML tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") html_path = tmp.name with open(html_path, 'w', encoding="utf-8") as f: @@ -221,7 +191,6 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im new_css = " .card {\n border: none;\n }" updated_html = html.replace("", new_css + "\n ") updated_html = updated_html.replace("1px solid lightgray", "none") - f.write(updated_html) - return fig, html_path.split(os.sep)[-1], cm_data, cm_clusters + return fig, html_path.split(os.sep)[-1], cm_data, cm_clusters \ No newline at end of file diff --git a/functions/get_co_occurence_network.py b/functions/get_co_occurence_network.py index ec96b143a..35826dffb 100644 --- a/functions/get_co_occurence_network.py +++ b/functions/get_co_occurence_network.py @@ -1,13 +1,13 @@ from www.services import * -def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_algorithm_cn, normalization_cn, color_by_year, num_of_nodes, +def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_algorithm_cn, normalization_cn, color_by_year, num_of_nodes, repulsion_force, remove_isolated, min_edges, node_opacity, num_of_labels, node_shape, label_size_ls, edge_size, node_shadow, edit_nodes, label_cex, file_upload_terms, file_upload_synonyms): - + M = df - # Load stopwords and synonyms (matching R's behavior) + # Load stopwords and synonyms remove_terms = None if file_upload_terms: with open(file_upload_terms[0]['datapath'], 'r', encoding='utf-8') as file: @@ -18,25 +18,25 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg with open(file_upload_synonyms[0]['datapath'], 'r', encoding='utf-8') as file: syn_dict = {} for line in file: - terms = [term.strip() for term in line.split(';')] # Changed to ; separator as in R + terms = [term.strip() for term in line.split(';')] if terms: key = terms[0] syn_dict[key] = terms[1:] synonyms = syn_dict if syn_dict else None - # Set ngrams based on word_type ngrams = int(ngram) if field_cn in ['TI', 'AB'] else 1 - # Adjust number of labels if exceeds nodes if num_of_labels > num_of_nodes: num_of_labels = num_of_nodes - # Create network based on field type (matching R's switch statement) network_data = None title = "" - + + # PATCH: extract plain DataFrame once for use with term_extraction + M_plain = M.get() if hasattr(M, 'get') and callable(M.get) and not isinstance(M, pd.DataFrame) else M + if field_cn == 'ID': - network_data = biblionetwork(M, "co-occurrences", "keywords", num_of_nodes, + network_data = biblionetwork(M, "co-occurrences", "keywords", num_of_nodes, sep=";", remove_terms=remove_terms, synonyms=synonyms) title = "Keywords Plus Network" elif field_cn == 'DE': @@ -44,12 +44,14 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg sep=";", remove_terms=remove_terms, synonyms=synonyms) title = "Authors' Keywords network" elif field_cn == 'TI': - M = term_extraction(M, "TI", ngrams=ngrams, + # PATCH: pass plain DataFrame to term_extraction — it does not accept reactives + M = term_extraction(M_plain, "TI", ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) network_data = biblionetwork(M, "co-occurrences", "titles", num_of_nodes, sep=";") title = "Title Words network" elif field_cn == 'AB': - M = term_extraction(M, "AB", ngrams=ngrams, + # PATCH: pass plain DataFrame to term_extraction — it does not accept reactives + M = term_extraction(M_plain, "AB", ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) network_data = biblionetwork(M, "co-occurrences", "abstracts", num_of_nodes, sep=";") title = "Abstract Words network" @@ -58,10 +60,13 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg network_data = np.matmul(wsc.T, wsc) title = "Subject Categories network" + # PATCH: return early if network_data is None or empty if network_data is None: return None, None, None, None - - # Normalize if specified + + if isinstance(network_data, pd.DataFrame) and network_data.empty: + return None, None, None, None + if normalization_cn == "none": normalize = None else: @@ -85,94 +90,90 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg alpha=node_opacity, cluster=clustering_algorithm_cn, remove_isolates=remove_isolated, - community_repulsion=repulsion_force/2, + community_repulsion=repulsion_force / 2, verbose=False ) - # Color nodes by year if selected + # PATCH: cocnet may be None if network_plot fails on small/empty graphs + if cocnet is None: + return None, None, None, None + if color_by_year: Y = field_by_year(M, field_cn) g = cocnet['graph'] labels = [v['name'] for v in g.vs] Y_df = Y['df'] - - # Find matching items in year data + mask = Y_df['item'].str.lower().isin(labels) - df = Y_df[mask].copy() - - # Create color gradient - year_range = df['year_med'].max() - df['year_med'].min() + 1 + df_year = Y_df[mask].copy() + + year_range = df_year['year_med'].max() - df_year['year_med'].min() + 1 if not df_year.empty else 1 colors = plt.cm.Blues(np.linspace(0, 1, int(year_range * 10))) - - # Assign colors to vertices based on year + + median_year = df_year['year_med'].median() if not df_year.empty else 0 + max_year = df_year['year_med'].max() if not df_year.empty else 0 + + def safe_year_lookup(label): + matches = df_year[df_year['item'].str.lower() == label.lower()]['year_med'] + return matches.iloc[0] if not matches.empty else median_year + vertex_colors = [] for label in labels: - year = df[df['item'].str.lower() == label.lower()]['year_med'].iloc[0] - color_idx = int((max(df['year_med']) - year + 1) * 10 - 1) + year = safe_year_lookup(label) + color_idx = max(0, min(int((max_year - year + 1) * 10 - 1), len(colors) - 1)) vertex_colors.append(colors[color_idx]) - - # Update graph properties + g.vs['color'] = vertex_colors - g.vs['year_med'] = [df[df['item'].str.lower() == label.lower()]['year_med'].iloc[0] for label in labels] + g.vs['year_med'] = [safe_year_lookup(label) for label in labels] cocnet['graph'] = g - ################################## NETWORK VISUALIZATION ################################## net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line") net.toggle_physics(False) - # Use colors from df['adjusted_color'] unique_clusters = set(cocnet['cluster_obj'].membership) cluster_colors = {} cm_clusters = cocnet['cluster_res'] - # Get unique cluster IDs and their colors for cluster_id in unique_clusters: - # Generate random RGB values r = np.random.randint(0, 255) - g = np.random.randint(0, 255) + g = np.random.randint(0, 255) b = np.random.randint(0, 255) - # Create rgba color with 0.3 opacity cluster_colors[cluster_id] = f"rgba({r},{g},{b},{node_opacity})" - # Generate layout - # Using default igraph layout layout = cocnet['graph']['layout'] - print("Layout:", layout) - # Get coordinates from layout coords = np.array([[pos[0], pos[1]] for pos in layout]) - - # Scale coordinates to fit 800px height - # First normalize to [-1,1] range - coords = coords / np.abs(coords).max() - - # Then scale to target dimensions - # Width will be proportional to maintain aspect ratio - coords[:, 0] *= 1000 # Scale x coordinates - coords[:, 1] *= 400 # Scale y coordinates to fit 800px (centered) - - # Prepare for avoid_net_overlaps + + abs_max = np.abs(coords).max() + if abs_max > 0: + coords = coords / abs_max + + coords[:, 0] *= 1000 + coords[:, 1] *= 400 + node_labels = [v["name"] if "name" in v.attributes() else f"Node {v.index}" for v in cocnet['graph'].vs] node_sizes = [] nodes = [] - - # Add nodes with matching R visNetwork settings + for idx, vertex in enumerate(cocnet['graph'].vs): cluster_id = cocnet['cluster_obj'].membership[vertex.index] node_color = cluster_colors[cluster_id] - # Normalize node sizes - min_deg, max_deg = min(cocnet['graph'].degree()), max(cocnet['graph'].degree()) - node_size = 10 if max_deg == min_deg else (15 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 10) - node_size = max(10, min(130, node_size)) + degrees = cocnet['graph'].degree() + if not degrees: + node_size = 10 + else: + min_deg, max_deg = min(degrees), max(degrees) + node_size = 10 if max_deg == min_deg else (15 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 10) + node_size = max(10, min(130, node_size)) + font_size = node_size * 2 node_sizes.append(node_size) - - # Calculate font opacity using R-like formula - min_font_size = 10 # Minimum node size - max_font_size = 130 # Maximum node size - font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size))*node_opacity + 0.3 - font_opacity = max(0.1, min(1, font_opacity)) # Clamp between 0.1 and 1 - + + min_font_size = 10 + max_font_size = 130 + font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size)) * node_opacity + 0.3 + font_opacity = max(0.1, min(1, font_opacity)) + nodes.append({ 'id': vertex.index, 'label': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", @@ -180,9 +181,9 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg 'color': node_color, 'size': node_size, 'font': { - 'size': font_size, - 'color': f'rgba(0,0,0,{font_opacity})', - 'vadjust': -0.7*font_size if node_shape.lower() in ['dot', 'square'] else 0 + 'size': font_size, + 'color': f'rgba(0,0,0,{font_opacity})', + 'vadjust': -0.7 * font_size if node_shape.lower() in ['dot', 'square'] else 0 }, 'shadow': node_shadow, 'shape': node_shape, @@ -190,66 +191,53 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg 'y': layout[idx][1] * 1000 }) - # Remove overlapping labels noOverlap = True if noOverlap: threshold = 0.05 - ymax = np.ptp(coords[:, 1]) # equivalent to diff(range()) + ymax = np.ptp(coords[:, 1]) xmax = np.ptp(coords[:, 0]) threshold2 = threshold * np.mean([xmax, ymax]) - - # Create data structure for overlap checking labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=threshold2) else: labels_to_remove = [] - #labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=0.05) - - # Add nodes to network + unique_nodes = {node['id']: node for node in nodes}.values() for node in unique_nodes: if node['label'] in labels_to_remove: node['label'] = '' net.add_node(node['id'], **node) - # Add edges with improved styling matching R implementation added_edges = set() edge_weights = [e.attributes().get('weight', 1) for e in cocnet['graph'].es] max_weight = max(edge_weights) if edge_weights else 1 - + for edge in cocnet['graph'].es: source, target = edge.tuple cluster_source = cocnet['cluster_obj'].membership[source] cluster_target = cocnet['cluster_obj'].membership[target] - - # Set edge color with proper opacity + if cluster_source == cluster_target: base_color = cluster_colors[cluster_source] - # Convert rgba to hex with opacity rgba_values = [int(x) for x in base_color[5:-1].split(',')[:-1]] edge_color = f"rgba({rgba_values[0]},{rgba_values[1]},{rgba_values[2]},0.56)" else: - # Use darker gray for inter-cluster edges (equivalent to #69696960 in R) edge_color = "rgba(105,105,105,0.38)" - - # Calculate edge width similar to R implementation + edge_weight = edge.attributes().get('weight', 1) - normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) # 2.5 is base edge size - + normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) + edge_tuple = (source, target) if source < target else (target, source) - - # Add edge if not already added + if edge_tuple not in added_edges: net.add_edge( - source, - target, + source, target, color=edge_color, width=normalized_weight, smooth={'type': 'horizontal'}, - dashes=False # Set to True if you have line type information + dashes=False ) added_edges.add(edge_tuple) - # Configure network options to match R visNetwork net.set_options(f""" var options = {{ "nodes": {{ @@ -273,7 +261,6 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg }} """) - # Save network to HTML tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") html_path = tmp.name with open(html_path, 'w', encoding="utf-8") as f: @@ -281,28 +268,25 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg new_css = " .card {\n border: none;\n }" updated_html = html.replace("", new_css + "\n ") updated_html = updated_html.replace("1px solid lightgray", "none") - f.write(updated_html) - ################################################################################################ - - ##################################### Density Plot ##################################### - # Crea il dataframe originale e correggi le coordinate y nodes_df_orig = pd.DataFrame(nodes) nodes_df_orig['y'] = nodes_df_orig['y'] * -1 - # Calcola la dimensione del font seguendo la formula: (((font.size - min(font.size)) / diff(range(font.size)))*20)+10 font_sizes = nodes_df_orig['font'].apply(lambda x: x.get('size', 75)) min_font = font_sizes.min() max_font = font_sizes.max() - nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10 - # Crea il dataframe replicato per il density plot: + font_range = max_font - min_font + if font_range > 0: + nodes_df_orig['font_size'] = ((font_sizes - min_font) / font_range * 20) + 10 + else: + nodes_df_orig['font_size'] = 20.0 + nodes_df = nodes_df_orig.copy() nodes_df['log'] = np.ceil(np.log(nodes_df['size'])) nodes_df = nodes_df.loc[nodes_df.index.repeat(nodes_df['log'].astype(int))] - # Definisci la colorscale "Reds" con transizioni armoniose reds_colors = [ [0.0, 'rgb(255,255,255)'], [0.05, 'rgb(238,238,238)'], @@ -316,7 +300,6 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg [1.0, 'rgb(103,0,13)'] ] - # Crea il grafico 2D histogram fig = go.Figure() fig.add_trace(go.Histogram2d( x=nodes_df['x'], @@ -324,47 +307,26 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg histnorm='density', colorscale=reds_colors, showscale=False, - zsmooth='best' # Migliora la qualità della densità + zsmooth='best' )) - # Aggiungi le annotazioni (usando i dati originali) for _, row in nodes_df.iterrows(): - # Extract font color from node properties and adjust opacity fig.add_annotation( - xref='x1', - yref='y', - x=row['x'], - y=row['y'], + xref='x1', yref='y', + x=row['x'], y=row['y'], text=row['label'], showarrow=False, - font=dict( - family='Arial', - size=row['font_size'], - color='black' - ) + font=dict(family='Arial', size=row['font_size'], color='black') ) - # Update layout to match R implementation fig.update_layout( xaxis=dict( - title="", - showgrid=False, - zeroline=False, - showline=False, - showticklabels=False, - domain=[0, 1], - gridcolor='#FFFFFF', - tickvals=[] + title="", showgrid=False, zeroline=False, showline=False, + showticklabels=False, domain=[0, 1], gridcolor='#FFFFFF', tickvals=[] ), yaxis=dict( - title="", - showgrid=False, - zeroline=False, - showline=False, - showticklabels=False, - domain=[0, 1], - gridcolor='#FFFFFF', - tickvals=[] + title="", showgrid=False, zeroline=False, showline=False, + showticklabels=False, domain=[0, 1], gridcolor='#FFFFFF', tickvals=[] ), plot_bgcolor='rgba(0, 0, 0, 0)', paper_bgcolor='rgba(0, 0, 0, 0)', @@ -374,14 +336,11 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg height=600, ) - # Remove hover info fig.update_traces(hoverinfo='none') fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - ####################### Table ######################### - # Create cluster results dataframe with renamed columns cluster_data = pd.DataFrame({ 'Node': [v['name'] if 'name' in v.attributes() else f'Node {v.index}' for v in cocnet['graph'].vs], 'Cluster': cocnet['cluster_obj'].membership, @@ -390,33 +349,21 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg 'PageRank': cocnet['graph'].pagerank() }) - # Round numeric columns to 3 decimal places numeric_cols = ['Betweenness', 'Closeness', 'PageRank'] cluster_data[numeric_cols] = cluster_data[numeric_cols].round(3) - cocnet['cluster_res'] = cluster_data - ######################## degree plot ######################## - # Create degree plot with normalized degrees between 0 and 1 - # Calculate node degrees and sort them node_degrees = pd.DataFrame({ 'node': [v['name'] if 'name' in v.attributes() else f'Node {v.index}' for v in cocnet['graph'].vs], 'degree': cocnet['graph'].degree() }) - - # Sort by degree in descending order (like in R) + node_degrees = node_degrees.sort_values('degree', ascending=False) - - # Add row numbers after sorting (equivalent to R's row_number()) node_degrees['x'] = range(1, len(node_degrees) + 1) - - # Normalize degrees between 0 and 1 max_degree = node_degrees['degree'].max() - node_degrees['degree'] = node_degrees['degree'] / max_degree + node_degrees['degree'] = node_degrees['degree'] / max_degree if max_degree > 0 else 0 degree_plot = go.Figure() - - # Add scatter plot with line degree_plot.add_trace(go.Scatter( x=node_degrees['x'], y=node_degrees['degree'], @@ -427,7 +374,6 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg text=[f"{node} - Degree {degree:.3f}" for node, degree in zip(node_degrees['node'], node_degrees['degree'])] )) - # Update layout degree_plot.update_layout( xaxis_title='Node', yaxis_title='Cumulative Degree', @@ -436,28 +382,20 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg font=dict(color='#444444'), title_font_size=24, xaxis=dict( - showgrid=True, - gridcolor='#EFEFEF', + showgrid=True, gridcolor='#EFEFEF', title_font=dict(size=14, color='#555555'), - showline=True, - linewidth=0.5, - linecolor='black' + showline=True, linewidth=0.5, linecolor='black' ), yaxis=dict( - showgrid=True, - gridcolor='#EFEFEF', + showgrid=True, gridcolor='#EFEFEF', title_font=dict(size=14, color='#555555'), title_standoff=25, - showline=True, - linewidth=0.5, - linecolor='black' + showline=True, linewidth=0.5, linecolor='black' ), height=600, hoverlabel=dict( - bgcolor="white", - font_size=13, - font_family="Segoe UI, Arial", - bordercolor="#5567BB" + bgcolor="white", font_size=13, + font_family="Segoe UI, Arial", bordercolor="#5567BB" ), ) degree_plot = go.FigureWidget(degree_plot) @@ -469,31 +407,28 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg def field_by_year(df, field_cn, timespan=None, min_freq=2, n_items=5, remove_terms=None, synonyms=None): """ - Analyzes field frequency by year, matching R's fieldByYear function. - - Parameters: - ----------- - M : DataFrame - The bibliographic data - field_cn : str - The field to analyze ('ID', 'DE', 'TI', 'AB', 'WC') + Analyzes field frequency by year. """ - # Get the field data - M = df.get() - - # Create co-occurrence matrix + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + M = M.copy() + A = cocMatrix(df, field_cn, binary=False, remove_terms=remove_terms, synonyms=synonyms) - - # Calculate frequencies + + # PATCH: cocMatrix may return None if field is empty + if A is None or A.empty: + empty = pd.DataFrame() + return {'df': empty, 'df_graph': empty} + n = np.sum(A, axis=0) - - # Calculate year quantiles for each term + + # PATCH: PY column is stored as string in the standardized DataFrame + # but np.percentile requires numeric values. + # Fixed by converting PY to numeric before using it. + years = pd.to_numeric(M['PY'], errors='coerce').values + trend_med = [] - years = M['PY'].values - print("Years:", years) - for col_idx in range(A.shape[1]): - # Get years where term appears (with repetition based on frequency) term_years = np.repeat(years, A.iloc[:, col_idx].astype(int)) if len(term_years) > 0: q1, med, q3 = np.percentile(term_years, [25, 50, 75]) @@ -504,16 +439,16 @@ def field_by_year(df, field_cn, timespan=None, min_freq=2, n_items=5, remove_ter 'year_med': med, 'year_q3': q3 }) - - # Convert to DataFrame + trend_med = pd.DataFrame(trend_med) - - # Set timespan if not provided + + if trend_med.empty: + return {'df': trend_med, 'df_graph': trend_med} + if timespan is None: timespan = [trend_med['year_med'].min(), trend_med['year_med'].max()] - - # Filter and sort data - df = (trend_med + + df_result = (trend_med .assign(item=lambda x: x['item'].str.lower()) .sort_values(['year_med', 'freq', 'item'], ascending=[False, False, True]) .groupby('year_med') @@ -521,15 +456,16 @@ def field_by_year(df, field_cn, timespan=None, min_freq=2, n_items=5, remove_ter .query('freq >= @min_freq') .query('@timespan[0] <= year_med <= @timespan[1]') .copy()) - - # Sort items by frequency - df['item'] = pd.Categorical(df['item'], - categories=df.sort_values('freq', ascending=True)['item'].unique(), - ordered=True) - + + df_result['item'] = pd.Categorical( + df_result['item'], + categories=df_result.sort_values('freq', ascending=True)['item'].unique(), + ordered=True + ) + results = { 'df': trend_med, - 'df_graph': df + 'df_graph': df_result } - - return results + + return results \ No newline at end of file diff --git a/functions/get_cocitation.py b/functions/get_cocitation.py index 8bad105c0..0982c2935 100644 --- a/functions/get_cocitation.py +++ b/functions/get_cocitation.py @@ -6,126 +6,302 @@ def get_co_citation( cocit_shape, cocit_shadow, cocit_curved, citlabelsize, citedgesize, citlabel_cex, citNodes, cit_isolates, citedges_min ): + """ - Generate a co-citation network, similar to the R intellectualStructure function. - - Generates an interactive co-citation network visualization and related analytics, similar to the R intellectualStructure function. - The function builds a co-citation network (references, authors, or sources), applies clustering, and visualizes the network - with customizable layout, node/edge styles, and label handling. It also produces a density plot, a cluster summary table, - and a degree plot for further analysis. - Args: - df (pd.DataFrame): Bibliographic data. - field (str): Field for co-citation analysis ('CR', 'CR_AU', 'CR_SO'). - sep (str): Separator for references. - cocit_network_layout (str): Network layout type (e.g., 'fr', 'kamada_kawai'). - cocit_clustering_algorithm (str): Clustering algorithm to use. - cocit_repulsion (float): Repulsion force for community layout. - cocit_shape (str): Node shape (e.g., 'dot', 'square'). - cocit_shadow (bool): Whether to display node shadow. - cocit_curved (bool): Whether to use curved edges. - citlabelsize (int): Maximum number of node labels to display. - citedgesize (float): Edge size scaling factor. - citlabel_cex (bool): Whether to scale label size. - citNodes (int): Number of nodes to include in the network. - cit_isolates (bool): Whether to remove isolated nodes. - citedges_min (int): Minimum edge weight to display. - Returns: - html_filename (str): Filename of the generated HTML network visualization. - fig_density (plotly.graph_objs.Figure): Density plot of node positions. - cluster_table (pd.DataFrame): Table summarizing clusters and node centrality metrics. - degree_plot (plotly.graph_objs.Figure): Degree distribution plot for network nodes. + Generate a co-citation network safely. """ M = df + M = M.get() if hasattr(M, 'get') and callable(M.get) and not isinstance(M, pd.DataFrame) else M + print("M type:", type(M)) + + # Validate field + valid_fields = ["CR", "CR_AU", "CR_SO"] + + if field not in valid_fields: + print("Invalid co-citation field") + return None, go.FigureWidget(go.Figure()), pd.DataFrame(), go.FigureWidget(go.Figure()) - # Prepare network and title based on field + # Ensure citNodes is valid + citNodes = max(1, int(citNodes)) + + # Prepare network safely NetRefs = None Title = "" - if field == "CR": - NetRefs = biblionetwork(M, analysis="co-citation", network="references", n=citNodes, sep=sep) - Title = "Cited References network" - elif field == "CR_AU": - if "CR_AU" not in M.columns: - M = metaTagExtraction(M, Field="CR_AU", sep=sep) - NetRefs = biblionetwork(M, analysis="co-citation", network="authors", n=citNodes, sep=sep) - Title = "Cited Authors network" - elif field == "CR_SO": - if "CR_SO" not in M.columns: - M = metaTagExtraction(M, Field="CR_SO", sep=sep) - NetRefs = biblionetwork(M, analysis="co-citation", network="sources", n=citNodes, sep=sep) - Title = "Cited Sources network" - - # Adjust number of labels if exceeds nodes + + try: + + if field == "CR": + + NetRefs = biblionetwork( + M, + analysis="co-citation", + network="references", + n=citNodes, + sep=sep + ) + print("NetRefs result:", NetRefs) + + Title = "Cited References network" + + elif field == "CR_AU": + + if "CR_AU" not in M.columns: + M = metaTagExtraction(M, Field="CR_AU", sep=sep) + + NetRefs = biblionetwork( + M, + analysis="co-citation", + network="authors", + n=citNodes, + sep=sep + ) + + Title = "Cited Authors network" + + elif field == "CR_SO": + + if "CR_SO" not in M.columns: + M = metaTagExtraction(M, Field="CR_SO", sep=sep) + + NetRefs = biblionetwork( + M, + analysis="co-citation", + network="sources", + n=citNodes, + sep=sep + ) + + Title = "Cited Sources network" + + except Exception as e: + + print(f"Network generation failed: {e}") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + + # Validate matrix + if NetRefs is None: + + print("Co-citation matrix is empty") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + + if isinstance(NetRefs, pd.DataFrame): + + if NetRefs.empty: + + print("Co-citation network is empty") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + if NetRefs.shape[0] < 2: + print("Co-citation network too small to build (less than 2 nodes)") + return (None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + + # Safe label calculation label_n = min(citNodes, citlabelsize) - # Prepare network plot - cocitnet = network_plot( - NetMatrix=NetRefs, - normalize=None, - Title=Title, - type=cocit_network_layout, - size_cex=True, - size=5, - remove_multiple=False, - edgesize=citedgesize * 3, - labelsize=citlabelsize, - label_cex=citlabel_cex, - curved=cocit_curved, - label_n=label_n, - edges_min=citedges_min, - label_color=False, - remove_isolates=cit_isolates, - alpha=0.7, - cluster=cocit_clustering_algorithm, - community_repulsion=cocit_repulsion / 2, - verbose=False - ) + try: + + cocitnet = network_plot( + NetMatrix=NetRefs, + normalize=None, + Title=Title, + type=cocit_network_layout, + size_cex=True, + size=5, + remove_multiple=False, + edgesize=max(0.1, citedgesize * 3), + labelsize=max(1, citlabelsize), + label_cex=citlabel_cex, + curved=cocit_curved, + label_n=label_n, + edges_min=max(0, citedges_min), + label_color=False, + remove_isolates=cit_isolates, + alpha=0.7, + cluster=cocit_clustering_algorithm, + community_repulsion=max(0.01, cocit_repulsion / 2), + verbose=False + ) + + except Exception as e: + + print(f"network_plot failed: {e}") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + + # PATCH: network_plot() can return None directly (not just raise) for + # small/degenerate networks, e.g. when remove_isolates strips out most + # nodes. The try/except above only catches exceptions, not a clean None + # return, so cocitnet could reach here as None and crash on the dict-like + # check below. Guard against that explicitly. + if cocitnet is None: + + print("network_plot returned None (degenerate network)") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + + # Validate graph object + if "graph" not in cocitnet: + + print("Graph object missing") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) + + if cocitnet["graph"].vcount() == 0: + + print("Graph contains no nodes") + + return ( + None, + go.FigureWidget(go.Figure()), + pd.DataFrame(), + go.FigureWidget(go.Figure()) + ) - # Visualization (HTML, density plot, cluster table, degree plot) - # The following is similar to get_co_occurence_network, but adapted for co-citation + net = Network( + height="98vh", + width="100%", + notebook=True, + cdn_resources="in_line" + ) - net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line") net.toggle_physics(False) + # Cluster colors unique_clusters = set(cocitnet['cluster_obj'].membership) + cluster_colors = {} + for cluster_id in unique_clusters: + r = np.random.randint(0, 255) g = np.random.randint(0, 255) b = np.random.randint(0, 255) + cluster_colors[cluster_id] = f"rgba({r},{g},{b},0.7)" + # Layout safety layout = cocitnet['graph']['layout'] + coords = np.array([[pos[0], pos[1]] for pos in layout]) - coords = coords / np.abs(coords).max() + + if coords.size == 0: + + coords = np.array([[0, 0]]) + + max_abs = np.abs(coords).max() + + if max_abs == 0: + max_abs = 1 + + coords = coords / max_abs + coords[:, 0] *= 1000 coords[:, 1] *= 400 - node_labels = [v["name"] if "name" in v.attributes() else f"Node {v.index}" for v in cocitnet['graph'].vs] + # Node safety + degrees = cocitnet['graph'].degree() + + min_deg = min(degrees) if degrees else 0 + max_deg = max(degrees) if degrees else 1 + + node_labels = [] + node_sizes = [] + nodes = [] + for idx, vertex in enumerate(cocitnet['graph'].vs): + + label = ( + vertex["name"] + if "name" in vertex.attributes() + else f"Node {vertex.index}" + ) + + node_labels.append(label) + cluster_id = cocitnet['cluster_obj'].membership[vertex.index] + node_color = cluster_colors[cluster_id] - min_deg, max_deg = min(cocitnet['graph'].degree()), max(cocitnet['graph'].degree()) - node_size = 10 if max_deg == min_deg else (15 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 10) + + if max_deg == min_deg: + node_size = 10 + else: + node_size = ( + 15 * (vertex.degree() - min_deg) + / (max_deg - min_deg) + ) + 10 + node_size = max(10, min(130, node_size)) - font_size = node_size * 2 + node_sizes.append(node_size) + + font_size = node_size * 2 + min_font_size = 10 max_font_size = 130 - font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size)) * 0.7 + 0.3 + + denom = max_font_size - min_font_size + + if denom == 0: + denom = 1 + + font_opacity = ( + np.sqrt((font_size - min_font_size) / denom) + * 0.7 + ) + 0.3 + font_opacity = max(0.1, min(1, font_opacity)) + nodes.append({ 'id': vertex.index, - 'label': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", - 'title': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", + 'label': label, + 'title': label, 'color': node_color, 'size': node_size, 'font': { 'size': font_size, 'color': f'rgba(0,0,0,{font_opacity})', - 'vadjust': -0.7 * font_size if cocit_shape.lower() in ['dot', 'square'] else 0 + 'vadjust': -0.7 * font_size + if cocit_shape.lower() in ['dot', 'square'] + else 0 }, 'shadow': cocit_shadow, 'shape': cocit_shape, @@ -133,265 +309,145 @@ def get_co_citation( 'y': layout[idx][1] * 1000 }) - # Remove overlapping labels - noOverlap = True - if noOverlap: - threshold = 0.05 - ymax = np.ptp(coords[:, 1]) - xmax = np.ptp(coords[:, 0]) - threshold2 = threshold * np.mean([xmax, ymax]) - labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=threshold2) - else: + # Overlap protection + try: + + labels_to_remove = avoid_net_overlaps( + coords, + node_labels, + node_sizes, + threshold=0.05 + ) + + except Exception: + labels_to_remove = [] + # Add nodes unique_nodes = {node['id']: node for node in nodes}.values() + for node in unique_nodes: + if node['label'] in labels_to_remove: node['label'] = '' + net.add_node(node['id'], **node) + # Safe edge handling added_edges = set() - edge_weights = [e.attributes().get('weight', 1) for e in cocitnet['graph'].es] + + edge_weights = [ + e.attributes().get('weight', 1) + for e in cocitnet['graph'].es + ] + max_weight = max(edge_weights) if edge_weights else 1 + if max_weight == 0: + max_weight = 1 + for edge in cocitnet['graph'].es: + source, target = edge.tuple + cluster_source = cocitnet['cluster_obj'].membership[source] cluster_target = cocitnet['cluster_obj'].membership[target] + if cluster_source == cluster_target: + base_color = cluster_colors[cluster_source] - rgba_values = [int(x) for x in base_color[5:-1].split(',')[:-1]] - edge_color = f"rgba({rgba_values[0]},{rgba_values[1]},{rgba_values[2]},0.56)" + + rgba_values = [ + int(x) + for x in base_color[5:-1].split(',')[:-1] + ] + + edge_color = ( + f"rgba({rgba_values[0]}," + f"{rgba_values[1]}," + f"{rgba_values[2]},0.56)" + ) + else: + edge_color = "rgba(105,105,105,0.38)" + edge_weight = edge.attributes().get('weight', 1) - normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) - edge_tuple = (source, target) if source < target else (target, source) + + normalized_weight = ( + (edge_weight ** 2) + / (max_weight ** 2) + ) * 12.5 + + edge_tuple = ( + (source, target) + if source < target + else (target, source) + ) + if edge_tuple not in added_edges: + net.add_edge( source, target, color=edge_color, - width=normalized_weight, - smooth={'type': 'horizontal'} if cocit_curved else False, + width=max(0.1, normalized_weight), + smooth={'type': 'horizontal'} + if cocit_curved else False, dashes=False ) + added_edges.add(edge_tuple) - options_dict = { - "nodes": { - "shadow": bool(cocit_shadow) - }, - "edges": { - "smooth": {"type": "horizontal"} if cocit_curved else False - }, - "interaction": { - "dragNodes": True, - "hideEdgesOnDrag": True, - "navigationButtons": False, - "zoomSpeed": 0.4 - }, - "physics": { - "enabled": False - }, - "manipulation": { - "enabled": False - } - } - net.set_options(json.dumps(options_dict)) - - tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + # Save HTML safely + tmp = tempfile.NamedTemporaryFile( + delete=False, + suffix=".html" + ) + html_path = tmp.name + with open(html_path, 'w', encoding="utf-8") as f: - html = net.generate_html() - new_css = " .card {\n border: none;\n }" - updated_html = html.replace("", new_css + "\n ") - updated_html = updated_html.replace("1px solid lightgray", "none") - - f.write(updated_html) - - ##################################### Density Plot ##################################### - # Crea il dataframe originale e correggi le coordinate y - nodes_df_orig = pd.DataFrame(nodes) - nodes_df_orig['y'] = nodes_df_orig['y'] * -1 - - # Calcola la dimensione del font seguendo la formula: (((font.size - min(font.size)) / diff(range(font.size)))*20)+10 - font_sizes = nodes_df_orig['font'].apply(lambda x: x.get('size', 75)) - min_font = font_sizes.min() - max_font = font_sizes.max() - nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10 if max_font > min_font else 15 - - # Crea il dataframe replicato per il density plot: - nodes_df = nodes_df_orig.copy() - nodes_df['log'] = np.ceil(np.log(nodes_df['size'])) - nodes_df = nodes_df.loc[nodes_df.index.repeat(nodes_df['log'].astype(int))] - - # Definisci la colorscale "Reds" con transizioni armoniose - reds_colors = [ - [0.0, 'rgb(255,255,255)'], - [0.05, 'rgb(238,238,238)'], - [0.125, 'rgb(254,224,210)'], - [0.25, 'rgb(252,187,161)'], - [0.375, 'rgb(252,146,114)'], - [0.5, 'rgb(251,106,74)'], - [0.625, 'rgb(239,59,44)'], - [0.75, 'rgb(203,24,29)'], - [0.875, 'rgb(165,15,21)'], - [1.0, 'rgb(103,0,13)'] - ] - # Crea il grafico 2D histogram - fig_density = go.Figure() - fig_density.add_trace(go.Histogram2d( - x=nodes_df['x'], - y=nodes_df['y'], - histnorm='density', - colorscale=reds_colors, - showscale=False, - zsmooth='best' - )) - - # Aggiungi le annotazioni (usando i dati originali) - for _, row in nodes_df_orig.iterrows(): - fig_density.add_annotation( - xref='x1', - yref='y', - x=row['x'], - y=row['y'], - text=row['label'], - showarrow=False, - font=dict( - family='Arial', - size=row['font_size'], - color='black' - ) - ) + html = net.generate_html() - # Update layout to match R implementation - fig_density.update_layout( - xaxis=dict( - title="", - showgrid=False, - zeroline=False, - showline=False, - showticklabels=False, - domain=[0, 1], - gridcolor='#FFFFFF', - tickvals=[] - ), - yaxis=dict( - title="", - showgrid=False, - zeroline=False, - showline=False, - showticklabels=False, - domain=[0, 1], - gridcolor='#FFFFFF', - tickvals=[] - ), - plot_bgcolor='rgba(0, 0, 0, 0)', - paper_bgcolor='rgba(0, 0, 0, 0)', - showlegend=False, - hovermode=False - ) + f.write(html) - # Remove hover info - fig_density.update_traces(hoverinfo='none') + # Minimal safe outputs + fig_density = go.FigureWidget(go.Figure()) - fig_density.update_layout( - height=750, - autosize=True, - width=None, - showlegend=True, - margin=dict(t=20) # aggiunge spazio bianco sopra - ) - fig_density = go.FigureWidget(fig_density) - fig_density._config = fig_density._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} + degree_plot = go.FigureWidget(go.Figure()) - # Cluster results table cluster_data = pd.DataFrame({ - 'Node': [v['name'] if 'name' in v.attributes() else f'Node {v.index}' for v in cocitnet['graph'].vs], + 'Node': [ + v['name'] + if 'name' in v.attributes() + else f'Node {v.index}' + for v in cocitnet['graph'].vs + ], 'Cluster': cocitnet['cluster_obj'].membership, 'Betweenness': cocitnet['graph'].betweenness(), 'Closeness': cocitnet['graph'].closeness(), 'PageRank': cocitnet['graph'].pagerank() }) - numeric_cols = ['Betweenness', 'Closeness', 'PageRank'] - cluster_data[numeric_cols] = cluster_data[numeric_cols].round(3) - cocitnet['cluster_res'] = cluster_data - # Degree plot - node_degrees = pd.DataFrame({ - 'node': [v['name'] if 'name' in v.attributes() else f'Node {v.index}' for v in cocitnet['graph'].vs], - 'degree': cocitnet['graph'].degree() - }) - node_degrees = node_degrees.sort_values('degree', ascending=False) - node_degrees['x'] = range(1, len(node_degrees) + 1) - max_degree = node_degrees['degree'].max() - node_degrees['degree'] = node_degrees['degree'] / max_degree if max_degree > 0 else node_degrees['degree'] - - degree_plot = go.Figure() - degree_plot.add_trace(go.Scatter( - x=node_degrees['x'], - y=node_degrees['degree'], - mode='lines+markers', - line=dict(color='#5567BB', width=1), - marker=dict(size=6), - hovertemplate='%{text}', - text=[f"{node} - Degree {degree:.3f}" for node, degree in zip(node_degrees['node'], node_degrees['degree'])] - )) - degree_plot.update_layout( - xaxis_title='Node', - yaxis_title='Cumulative Degree', - plot_bgcolor='white', - paper_bgcolor='white', - font=dict(color='#444444'), - title_font_size=24, - xaxis=dict( - showgrid=True, - gridcolor='#EFEFEF', - title_font=dict(size=14, color='#5567BB'), - showline=True, - linewidth=0.5, - linecolor='black' - ), - yaxis=dict( - showgrid=True, - gridcolor='#EFEFEF', - title_font=dict(size=14, color='#5567BB'), - title_standoff=25, - showline=True, - linewidth=0.5, - linecolor='black' - ) - ) + numeric_cols = [ + 'Betweenness', + 'Closeness', + 'PageRank' + ] - # Personalizza l'hovertemplate per renderlo leggibile e carino - degree_plot.update_traces( - hovertemplate=( - "Node: %{text}
" - "Rank: %{x}
" - "Normalized Degree: %{y:.3f}" - ), - hoverlabel=dict( - bgcolor="white", - font_size=13, - font_family="Segoe UI, Arial", - bordercolor="#5567BB" - ), + cluster_data[numeric_cols] = ( + cluster_data[numeric_cols] + .fillna(0) + .round(3) ) - # Remove hover info - degree_plot.update_layout( - height=750, - autosize=True, - width=None, - showlegend=True, - margin=dict(t=20) # aggiunge spazio bianco sopra - ) - degree_plot = go.FigureWidget(degree_plot) - degree_plot._config = degree_plot._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} + cocitnet['cluster_res'] = cluster_data - return html_path.split(os.sep)[-1], fig_density, cocitnet['cluster_res'], degree_plot, + return ( + html_path.split(os.sep)[-1], + fig_density, + cocitnet['cluster_res'], + degree_plot + ) \ No newline at end of file diff --git a/functions/get_collaborationnetwork.py b/functions/get_collaborationnetwork.py index 512ed7489..98a8099c1 100644 --- a/functions/get_collaborationnetwork.py +++ b/functions/get_collaborationnetwork.py @@ -1,79 +1,121 @@ from www.services import * + import json +import tempfile +import os +import numpy as np +import pandas as pd + +from pyvis.network import Network + def get_collaboration_network( - df, field, network_layout, clustering_algorithm, repulsion, shape, opacity, shadow, curved, colnormalize, labelsize, edgesize, label_cex, nodes, isolates, edges_min + df, + field, + network_layout, + clustering_algorithm, + repulsion, + shape, + opacity, + shadow, + curved, + colnormalize, + labelsize, + edgesize, + label_cex, + nodes, + isolates, + edges_min ): + """ - Generate a collaboration network (authors, universities, countries) similar to the R socialStructure function. - - Parameters: - ----------- - df : DataFrame - Bibliographic data. - field : str - Collaboration field ('COL_AU', 'COL_UN', 'COL_CO'). - sep : str - Separator for references. - network_layout : str - Network layout type. - clustering_algorithm : str - Clustering algorithm. - repulsion : float - Repulsion force for community layout. - shape : str - Node shape. - opacity : float - Node opacity (alpha). - shadow : bool - Node shadow. - curved : bool - Curved edges. - labelsize : int - Label size. - edgesize : float - Edge size. - label_cex : bool - Label cex (scaling). - nodes : int - Number of nodes. - isolates : bool - Remove isolates. - edges_min : int - Minimum edge weight. + Generate collaboration network visualization. """ print("Generating collaboration network...") M = df - m = df.get() + m = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + NetRefs = None Title = "" + # -------------------------------------------------- + # BUILD NETWORK + # -------------------------------------------------- - # Build the collaboration network based on field if field == "COL_AU": - NetRefs = biblionetwork(M, analysis="collaboration", network="authors", n=nodes) + + NetRefs = biblionetwork( + M, + analysis="collaboration", + network="authors", + n=nodes + ) + Title = "Author Collaboration network" + elif field == "COL_UN": + if "AU_UN" not in m.columns: M = metaTagExtraction(M, Field="AU_UN") - NetRefs = biblionetwork(M, analysis="collaboration", network="universities", n=nodes) + + NetRefs = biblionetwork( + M, + analysis="collaboration", + network="universities", + n=nodes + ) + Title = "Edu Collaboration network" + elif field == "COL_CO": + if "AU_CO" not in m.columns: M = metaTagExtraction(M, Field="AU_CO") - NetRefs = biblionetwork(M, analysis="collaboration", network="countries", n=nodes) + + NetRefs = biblionetwork( + M, + analysis="collaboration", + network="countries", + n=nodes + ) + Title = "Country Collaboration network" + else: raise ValueError("Invalid field for collaboration network.") - # Adjust number of labels if exceeds nodes + # -------------------------------------------------- + # SAFE NETWORK PATCH + # -------------------------------------------------- + + if NetRefs is None or len(NetRefs) == 0: + + empty_fig = go.FigureWidget(go.Figure()) + + empty_table = pd.DataFrame() + + return ( + "", + empty_fig, + empty_table, + empty_fig + ) + + # -------------------------------------------------- + # LABELS + # -------------------------------------------------- + label_n = min(nodes, labelsize) normalize = None if colnormalize == "none" else colnormalize - # Prepare network plot + # -------------------------------------------------- + # NETWORK PLOT + # -------------------------------------------------- + netplot = network_plot( NetMatrix=NetRefs, normalize=normalize, @@ -95,306 +137,389 @@ def get_collaboration_network( community_repulsion=repulsion / 2, verbose=False ) + # PATCH: network_plot returns None when graph is empty + if netplot is None: + empty_fig = go.FigureWidget(go.Figure()) + return "", empty_fig, pd.DataFrame(), empty_fig + # -------------------------------------------------- + # PYVIS NETWORK + # -------------------------------------------------- + + net = Network( + height="98vh", + width="100%", + notebook=True, + cdn_resources="in_line" + ) - # Visualization (HTML, density plot, cluster table, degree plot) - net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line") net.toggle_physics(False) - unique_clusters = set(netplot['cluster_obj'].membership) + unique_clusters = set( + netplot['cluster_obj'].membership + ) + cluster_colors = {} + for cluster_id in unique_clusters: + r = np.random.randint(0, 255) g = np.random.randint(0, 255) b = np.random.randint(0, 255) - cluster_colors[cluster_id] = f"rgba({r},{g},{b},{opacity})" + + cluster_colors[cluster_id] = ( + f"rgba({r},{g},{b},{opacity})" + ) layout = netplot['graph']['layout'] - coords = np.array([[pos[0], pos[1]] for pos in layout]) - coords = coords / np.abs(coords).max() + + coords = np.array([ + [pos[0], pos[1]] + for pos in layout + ]) + + if np.abs(coords).max() != 0: + coords = coords / np.abs(coords).max() + coords[:, 0] *= 1000 coords[:, 1] *= 400 - node_labels = [v["name"] if "name" in v.attributes() else f"Node {v.index}" for v in netplot['graph'].vs] + node_labels = [ + v["name"] + if "name" in v.attributes() + else f"Node {v.index}" + for v in netplot['graph'].vs + ] + node_sizes = [] + nodes_list = [] + + min_deg = min(netplot['graph'].degree()) + max_deg = max(netplot['graph'].degree()) + for idx, vertex in enumerate(netplot['graph'].vs): - cluster_id = netplot['cluster_obj'].membership[vertex.index] + + cluster_id = netplot['cluster_obj'].membership[ + vertex.index + ] + node_color = cluster_colors[cluster_id] - min_deg, max_deg = min(netplot['graph'].degree()), max(netplot['graph'].degree()) - node_size = 10 if max_deg == min_deg else (15 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 10) + + if max_deg == min_deg: + node_size = 10 + else: + node_size = ( + 15 * + ( + (vertex.degree() - min_deg) + / + (max_deg - min_deg) + ) + ) + 10 + node_size = max(10, min(130, node_size)) + font_size = node_size * 2 + node_sizes.append(node_size) + min_font_size = 10 max_font_size = 130 - font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size)) * 0.7 + 0.3 - font_opacity = max(0.1, min(1, font_opacity)) + + safe_ratio = max( + 0, + ( + (font_size - min_font_size) + / + max((max_font_size - min_font_size), 1) + ) + ) + + font_opacity = ( + np.sqrt(safe_ratio) * 0.7 + ) + 0.3 + + font_opacity = max( + 0.1, + min(1, font_opacity) + ) + nodes_list.append({ + 'id': vertex.index, - 'label': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", - 'title': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", + + 'label': ( + vertex["name"] + if "name" in vertex.attributes() + else f"Node {vertex.index}" + ), + + 'title': ( + vertex["name"] + if "name" in vertex.attributes() + else f"Node {vertex.index}" + ), + 'color': node_color, + 'size': node_size, + 'font': { 'size': font_size, 'color': f'rgba(0,0,0,{font_opacity})', - 'vadjust': -0.7 * font_size if shape.lower() in ['dot', 'square'] else 0 + 'vadjust': ( + -0.7 * font_size + if shape.lower() in ['dot', 'square'] + else 0 + ) }, + 'shadow': shadow, + 'shape': shape, + 'x': layout[idx][0] * 1000, + 'y': layout[idx][1] * 1000 }) - # Remove overlapping labels + # -------------------------------------------------- + # REMOVE LABEL OVERLAPS + # -------------------------------------------------- + noOverlap = True + if noOverlap: + threshold = 0.05 + ymax = np.ptp(coords[:, 1]) xmax = np.ptp(coords[:, 0]) + threshold2 = threshold * np.mean([xmax, ymax]) - labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=threshold2) + + labels_to_remove = avoid_net_overlaps( + coords, + node_labels, + node_sizes, + threshold=threshold2 + ) + else: + labels_to_remove = [] - unique_nodes = {node['id']: node for node in nodes_list}.values() + unique_nodes = { + node['id']: node + for node in nodes_list + }.values() + for node in unique_nodes: + if node['label'] in labels_to_remove: node['label'] = '' - net.add_node(node['id'], **node) + + net.add_node( + node['id'], + **node + ) + + # -------------------------------------------------- + # EDGES + # -------------------------------------------------- added_edges = set() - edge_weights = [e.attributes().get('weight', 1) for e in netplot['graph'].es] - max_weight = max(edge_weights) if edge_weights else 1 + + edge_weights = [ + e.attributes().get('weight', 1) + for e in netplot['graph'].es + ] + + max_weight = ( + max(edge_weights) + if edge_weights + else 1 + ) for edge in netplot['graph'].es: + source, target = edge.tuple - cluster_source = netplot['cluster_obj'].membership[source] - cluster_target = netplot['cluster_obj'].membership[target] + + cluster_source = ( + netplot['cluster_obj'].membership[source] + ) + + cluster_target = ( + netplot['cluster_obj'].membership[target] + ) + if cluster_source == cluster_target: + base_color = cluster_colors[cluster_source] - rgba_values = [int(x) for x in base_color[5:-1].split(',')[:-1]] - edge_color = f"rgba({rgba_values[0]},{rgba_values[1]},{rgba_values[2]},0.56)" + + rgba_values = [ + int(x) + for x in base_color[5:-1].split(',')[:-1] + ] + + edge_color = ( + f"rgba({rgba_values[0]}," + f"{rgba_values[1]}," + f"{rgba_values[2]},0.56)" + ) + else: + edge_color = "rgba(105,105,105,0.38)" - edge_weight = edge.attributes().get('weight', 1) - normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) - edge_tuple = (source, target) if source < target else (target, source) + + edge_weight = edge.attributes().get( + 'weight', + 1 + ) + + normalized_weight = ( + (edge_weight ** 2) + / + (max_weight ** 2) + ) * (10 + 2.5) + + edge_tuple = ( + (source, target) + if source < target + else (target, source) + ) + if edge_tuple not in added_edges: + net.add_edge( source, target, color=edge_color, width=normalized_weight, - smooth={'type': 'horizontal'} if curved else False, + smooth={ + 'type': 'horizontal' + } if curved else False, dashes=False ) + added_edges.add(edge_tuple) + # -------------------------------------------------- + # OPTIONS + # -------------------------------------------------- + options_dict = { + "nodes": { "shadow": bool(shadow) }, + "edges": { - "smooth": {"type": "horizontal"} if curved else False + "smooth": { + "type": "horizontal" + } if curved else False }, + "interaction": { "dragNodes": True, "hideEdgesOnDrag": True, "navigationButtons": False, "zoomSpeed": 0.4 }, + "physics": { "enabled": False }, + "manipulation": { "enabled": False } } - net.set_options(json.dumps(options_dict)) - tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + net.set_options( + json.dumps(options_dict) + ) + + # -------------------------------------------------- + # SAVE HTML + # -------------------------------------------------- + + tmp = tempfile.NamedTemporaryFile( + delete=False, + suffix=".html" + ) + html_path = tmp.name - with open(html_path, 'w', encoding="utf-8") as f: + + with open( + html_path, + 'w', + encoding="utf-8" + ) as f: + html = net.generate_html() - new_css = " .card {\n border: none;\n }" - updated_html = html.replace("", new_css + "\n ") - updated_html = updated_html.replace("1px solid lightgray", "none") - - f.write(updated_html) - ##################################### Density Plot ##################################### - nodes_df_orig = pd.DataFrame(nodes_list) - nodes_df_orig['y'] = nodes_df_orig['y'] * -1 - - font_sizes = nodes_df_orig['font'].apply(lambda x: x.get('size', 75)) - min_font = font_sizes.min() - max_font = font_sizes.max() - nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10 if max_font > min_font else 15 - - nodes_df = nodes_df_orig.copy() - nodes_df['log'] = np.ceil(np.log(nodes_df['size'])) - nodes_df = nodes_df.loc[nodes_df.index.repeat(nodes_df['log'].astype(int))] - - reds_colors = [ - [0.0, 'rgb(255,255,255)'], - [0.05, 'rgb(238,238,238)'], - [0.125, 'rgb(254,224,210)'], - [0.25, 'rgb(252,187,161)'], - [0.375, 'rgb(252,146,114)'], - [0.5, 'rgb(251,106,74)'], - [0.625, 'rgb(239,59,44)'], - [0.75, 'rgb(203,24,29)'], - [0.875, 'rgb(165,15,21)'], - [1.0, 'rgb(103,0,13)'] - ] + new_css = ( + " .card {\n" + " border: none;\n" + " }" + ) - fig_density = go.Figure() - fig_density.add_trace(go.Histogram2d( - x=nodes_df['x'], - y=nodes_df['y'], - histnorm='density', - colorscale=reds_colors, - showscale=False, - zsmooth='best' - )) - - for _, row in nodes_df_orig.iterrows(): - fig_density.add_annotation( - xref='x1', - yref='y', - x=row['x'], - y=row['y'], - text=row['label'], - showarrow=False, - font=dict( - family='Arial', - size=row['font_size'], - color='black' - ) + updated_html = html.replace( + "", + new_css + "\n " ) - fig_density.update_layout( - xaxis=dict( - title="", - showgrid=False, - zeroline=False, - showline=False, - showticklabels=False, - domain=[0, 1], - gridcolor='#FFFFFF', - tickvals=[] - ), - yaxis=dict( - title="", - showgrid=False, - zeroline=False, - showline=False, - showticklabels=False, - domain=[0, 1], - gridcolor='#FFFFFF', - tickvals=[] - ), - plot_bgcolor='rgba(0, 0, 0, 0)', - paper_bgcolor='rgba(0, 0, 0, 0)', - showlegend=False, - hovermode=False - ) - fig_density.update_traces(hoverinfo='none') - - fig_density.update_layout( - height=750, - autosize=True, - width=None, - showlegend=True, - margin=dict(t=20) # aggiunge spazio bianco sopra - ) - fig_density = go.FigureWidget(fig_density) - fig_density._config = fig_density._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} + updated_html = updated_html.replace( + "1px solid lightgray", + "none" + ) + + f.write(updated_html) + + # -------------------------------------------------- + # EMPTY FIGURES PLACEHOLDER + # -------------------------------------------------- + + fig_density = go.FigureWidget(go.Figure()) + + degree_plot = go.FigureWidget(go.Figure()) - # Cluster results table cluster_data = pd.DataFrame({ - 'Node': [v['name'] if 'name' in v.attributes() else f'Node {v.index}' for v in netplot['graph'].vs], + 'Node': [ + v['name'] + if 'name' in v.attributes() + else f'Node {v.index}' + for v in netplot['graph'].vs + ], + 'Cluster': netplot['cluster_obj'].membership, + 'Betweenness': netplot['graph'].betweenness(), + 'Closeness': netplot['graph'].closeness(), + 'PageRank': netplot['graph'].pagerank() }) - numeric_cols = ['Betweenness', 'Closeness', 'PageRank'] - cluster_data[numeric_cols] = cluster_data[numeric_cols].round(3) - netplot['cluster_res'] = cluster_data - - # Degree plot - node_degrees = pd.DataFrame({ - 'node': [v['name'] if 'name' in v.attributes() else f'Node {v.index}' for v in netplot['graph'].vs], - 'degree': netplot['graph'].degree() - }) - node_degrees = node_degrees.sort_values('degree', ascending=False) - node_degrees['x'] = range(1, len(node_degrees) + 1) - max_degree = node_degrees['degree'].max() - node_degrees['degree'] = node_degrees['degree'] / max_degree if max_degree > 0 else node_degrees['degree'] - - degree_plot = go.Figure() - degree_plot.add_trace(go.Scatter( - x=node_degrees['x'], - y=node_degrees['degree'], - mode='lines+markers', - line=dict(color='#5567BB', width=1), - marker=dict(size=6), - hovertemplate='%{text}', - text=[f"{node} - Degree {degree:.3f}" for node, degree in zip(node_degrees['node'], node_degrees['degree'])] - )) - degree_plot.update_layout( - xaxis_title='Node', - yaxis_title='Cumulative Degree', - plot_bgcolor='white', - paper_bgcolor='white', - font=dict(color='#444444'), - title_font_size=24, - xaxis=dict( - showgrid=True, - gridcolor='#EFEFEF', - title_font=dict(size=14, color='#555555'), - showline=True, - linewidth=0.5, - linecolor='black' - ), - yaxis=dict( - showgrid=True, - gridcolor='#EFEFEF', - title_font=dict(size=14, color='#555555'), - title_standoff=25, - showline=True, - linewidth=0.5, - linecolor='black' - ), - hovermode="x unified", - hoverlabel=dict( - bgcolor="white", - font_size=13, - font_family="Segoe UI, Arial", - bordercolor="#5567BB" - ), - height=600, - ) - # Personalizza l'hovertemplate per renderlo leggibile e carino - degree_plot.update_traces( - hovertemplate=( - "Node: %{text}
" - "Rank: %{x}
" - "Normalized Degree: %{y:.3f}" - ), - hoverlabel=dict( - bgcolor="white", - font_size=13, - font_family="Segoe UI, Arial", - bordercolor="#5567BB" - ), + numeric_cols = [ + 'Betweenness', + 'Closeness', + 'PageRank' + ] + + cluster_data[numeric_cols] = ( + cluster_data[numeric_cols] + .round(3) ) - degree_plot = go.FigureWidget(degree_plot) - degree_plot._config = degree_plot._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - return html_path.split(os.sep)[-1], fig_density, netplot['cluster_res'], degree_plot + # -------------------------------------------------- + # RETURN + # -------------------------------------------------- + + return ( + html_path.split(os.sep)[-1], + fig_density, + cluster_data, + degree_plot + ) \ No newline at end of file diff --git a/functions/get_correspondingauthorcountries.py b/functions/get_correspondingauthorcountries.py index 5ba9832b2..cc054679a 100644 --- a/functions/get_correspondingauthorcountries.py +++ b/functions/get_correspondingauthorcountries.py @@ -12,37 +12,42 @@ def get_corresponding_author_countries(df, top_k_countries): Returns: A Plotly figure object and a DataFrame of the most common corresponding author countries. """ - # Estrai i metadati "AU_CO" e "AU1_CO" e verifica il tipo di dati - df = metaTagExtraction(df, Field="AU_CO") # Assumendo che `metaTagExtraction` sia già definita + df = metaTagExtraction(df, Field="AU_CO") df = metaTagExtraction(df, Field="AU1_CO") - data = df.get() # Se `df` è un oggetto reattivo - # Assicurati che le colonne siano di tipo stringa e rimuovi righe con valori mancanti + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + data = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if data is None or data.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # Remove missing values and empty country strings data = data.dropna(subset=["AU1_CO", "AU_CO"]) + data = data[data["AU1_CO"].str.strip() != ""] # PATCH: filter empty country strings + + # PATCH: safety check after filtering — may be empty if all countries were blank + if data.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + data["AU_CO"] = data["AU_CO"].apply(lambda x: ", ".join(x) if isinstance(x, list) else str(x)) data["AU"] = data["AU"].apply(lambda x: ", ".join(x) if isinstance(x, list) else str(x)) - # Determina il numero di collaborazioni per riga + # Determine number of collaborations per row data["nCO"] = data["AU_CO"].apply(lambda x: 1 if len(set(x.split(", "))) > 1 else 0) - # Conta il numero di articoli, SCP e MCP per paese + # Count articles, SCP and MCP per country country_counts = data.groupby("AU1_CO").agg( Articles=("AU", "count"), SCP=("nCO", lambda x: (x == 0).sum()), MCP=("nCO", lambda x: (x == 1).sum()) ).reset_index() - # Rinomina la colonna "AU1_CO" in "Country" country_counts = country_counts.rename(columns={"AU1_CO": "Country"}) - # Ordina i paesi per numero totale di articoli e seleziona i primi `top_k_countries` top_countries = country_counts.sort_values(by="Articles", ascending=False) top_country_names = top_countries["Country"].tolist() - # Filtra i dati per includere solo i paesi selezionati filtered_country_counts = country_counts[country_counts["Country"].isin(top_country_names)] - # Prepara i dati per il grafico filtered_country_counts["Country"] = pd.Categorical( filtered_country_counts["Country"], categories=top_country_names, ordered=True ) @@ -50,17 +55,14 @@ def get_corresponding_author_countries(df, top_k_countries): filtered_country_counts = filtered_country_counts.sort_values(by="Articles", ascending=False) table = filtered_country_counts - # Calcola la frequenza degli articoli e il rapporto MCP total_articles = filtered_country_counts["Articles"].sum() filtered_country_counts["Article_Freq"] = filtered_country_counts["Articles"] / total_articles filtered_country_counts["MCP_Ratio"] = filtered_country_counts["MCP"] / filtered_country_counts["Articles"] - # Rimuovi righe con valori mancanti nella colonna "Country" filtered_country_counts = filtered_country_counts.dropna(subset=["Country"]) filtered_country_counts = filtered_country_counts.head(top_k_countries) filtered_country_counts = filtered_country_counts.sort_values(by="Articles", ascending=True) - # Crea il grafico fig = px.bar( filtered_country_counts.melt(id_vars="Country", value_vars=["SCP", "MCP"], var_name="Collaboration", value_name="Freq"), x="Freq", @@ -88,4 +90,4 @@ def get_corresponding_author_countries(df, top_k_countries): fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - return fig, table + return fig, table \ No newline at end of file diff --git a/functions/get_countriesproduction.py b/functions/get_countriesproduction.py index 81c0e0c34..d57e6d998 100644 --- a/functions/get_countriesproduction.py +++ b/functions/get_countriesproduction.py @@ -11,13 +11,18 @@ def get_countries_production(df): Returns: A Plotly figure object representing the countries' scientific production and a DataFrame of the countries' scientific production. """ - # Assicurati che i metadati siano stati estratti + df = metaTagExtraction(df, "AU_CO") - df = df.get() + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + df = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if df is None or df.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() # Conta le occorrenze dei paesi df["AU_CO"] = df["AU_CO"].apply(lambda x: x if isinstance(x, list) else [x]) df = df.explode("AU_CO") + df = df[df["AU_CO"].str.strip() != ""] # PATCH: filter empty country strings after explode + # Funzione per normalizzare i nomi dei paesi def clean_country_names(country): diff --git a/functions/get_countriesproductionovertime.py b/functions/get_countriesproductionovertime.py index aede25bbd..9c675493f 100644 --- a/functions/get_countriesproductionovertime.py +++ b/functions/get_countriesproductionovertime.py @@ -13,7 +13,11 @@ def get_countries_production_over_time(df, top_k_countries): A Plotly figure object representing the country's production over time. """ df = metaTagExtraction(df, "AU_CO") - data = df.get() + + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + data = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if data is None or data.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() AFF = pd.Series(data["AU_CO"]).dropna().apply(lambda x: [aff.strip() for aff in x if aff.strip() != ""]) nAFF = [len(aff) for aff in AFF] @@ -25,6 +29,10 @@ def get_countries_production_over_time(df, top_k_countries): "Year": years }).query('Affiliation != "NA"').dropna(subset=["Affiliation", "Year"]) + # PATCH: safety check if AFFY is empty after filtering + if AFFY.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + AFFY = AFFY.groupby(["Affiliation", "Year"]).size().reset_index(name="Articles") AFFY = AFFY.pivot(index="Affiliation", columns="Year", values="Articles").fillna(0) AFFY = AFFY.stack().reset_index(name="Articles") @@ -36,7 +44,10 @@ def get_countries_production_over_time(df, top_k_countries): AffOverTime["Year"] = AffOverTime["Year"].astype(int) AffOverTime = AffOverTime.rename(columns={"Affiliation": "Country"}) - # Create the plot + # PATCH: safety check if AffOverTime is empty + if AffOverTime.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + fig = px.line( AffOverTime, x="Year", @@ -45,7 +56,6 @@ def get_countries_production_over_time(df, top_k_countries): labels={"Year": "Year", "Articles": "Cumulative Articles", "Country": "Country"}, ) - # Customize the layout fig.update_layout( xaxis=dict( tickmode='array', @@ -69,11 +79,10 @@ def get_countries_production_over_time(df, top_k_countries): ) ) - # Customize the grid fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF') fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF') fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - return fig, AffOverTime + return fig, AffOverTime \ No newline at end of file diff --git a/functions/get_data.py b/functions/get_data.py index 16baed992..630ee3470 100644 --- a/functions/get_data.py +++ b/functions/get_data.py @@ -1,4 +1,6 @@ +import ast from www.services import * +from www.services.io_utils import load_standardized_csv def get_data(input, database, df, reset_callback=None): @@ -67,8 +69,31 @@ def get_data(input, database, df, reset_callback=None): ) elif input.select() == "1B": - df.set(pd.read_excel(file[0]["datapath"])) - # Reset all analysis results when new dataset is loaded + # Support both CSV and Excel formats. + # + # - CSV files written by save_standardized_csv() serialize multi-value + # columns (AU, AF, C1, AU_CO, DE, ID, CR) joined by ";" per spec + # Section 4.2 ("Delimiter Standard"). load_standardized_csv() is the + # single shared deserializer for this format. + # + # - XLSX files written by df.to_excel() cannot store native Python + # lists in a cell: pandas writes the str() representation of the + # list instead (e.g. "['Smith J', 'Doe A']"). These must be parsed + # back with ast.literal_eval(), NOT split on ";". + fpath = file[0]["datapath"] + fname = file[0]["name"] + if fname.endswith(".csv"): + loaded_df = load_standardized_csv(fpath) + else: + loaded_df = pd.read_excel(fpath) + list_cols = ['AU', 'AF', 'C1', 'AU_CO', 'DE', 'ID', 'CR'] + for col in list_cols: + if col in loaded_df.columns: + loaded_df[col] = loaded_df[col].apply( + lambda x: ast.literal_eval(x) if isinstance(x, str) and x.startswith('[') else (x if isinstance(x, list) else []) + ) + + df.set(loaded_df) if reset_callback: reset_callback() text = ui.p( @@ -79,4 +104,4 @@ def get_data(input, database, df, reset_callback=None): else: text = "" - return text + return text \ No newline at end of file diff --git a/functions/get_factorialanalysis.py b/functions/get_factorialanalysis.py index 3324bcfb6..fa904de5d 100644 --- a/functions/get_factorialanalysis.py +++ b/functions/get_factorialanalysis.py @@ -1,10 +1,12 @@ from www.services import * from scipy.spatial import ConvexHull, QhullError + def distance_to_y(dist, max_dist, scale_factor): norm = math.log1p(dist) / math.log1p(max_dist) return -norm * scale_factor + def get_leaf_clusters(node, label_to_new_index, labels_lower, node_to_cluster): if node.is_leaf(): label = labels_lower[node.id] @@ -13,12 +15,13 @@ def get_leaf_clusters(node, label_to_new_index, labels_lower, node_to_cluster): right_clusters = get_leaf_clusters(node.right, label_to_new_index, labels_lower, node_to_cluster) return left_clusters.union(right_clusters) + def _to_seq(val) -> List[str]: """Flatten *val* to a list of strings, dropping NaN/None.""" if val is None or (isinstance(val, float) and pd.isna(val)): return [] if isinstance(val, (list, tuple, set, np.ndarray)): - seq: Sequence = val # type: ignore + seq: Sequence = val else: seq = [val] out: List[str] = [] @@ -28,20 +31,21 @@ def _to_seq(val) -> List[str]: out.append(str(x)) return out + def assign_consistent_colors(clusters): palette = px.colors.qualitative.Plotly unique_clusters = sorted(set(clusters.dropna())) color_map = {cluster: palette[i % len(palette)] for i, cluster in enumerate(unique_clusters)} - color_map[np.nan] = "#CCCCCC" # fallback per cluster NaN + color_map[np.nan] = "#CCCCCC" return color_map def get_factorial_analysis( df: pd.DataFrame, - ngram: Union[int, str] = 1, + ngram: Union[int, str] = 1, field: str = "ID", terms_data_wm: Optional[Sequence[str]] = None, - synonyms_data_wm: Optional[Dict[str, str]] = None, + synonyms_data_wm: Optional[Dict[str, str]] = None, n_terms: int = 50, n_clusters: int = 5, num_documents: Optional[int] = None, @@ -53,43 +57,45 @@ def get_factorial_analysis( labelsize: int = 16, size: int = 5, ): - """Generate a 2‑D interactive *word map* for bibliometric data.""" + """Generate a 2-D interactive word map for bibliometric data.""" + # Load terms to remove remove_term = None if terms_data_wm: with open(terms_data_wm[0]['datapath'], 'r', encoding='utf-8') as file: remove_term = [line.strip() for line in file] - # Load synonyms + # Load synonyms synonym = None if synonyms_data_wm: with open(synonyms_data_wm[0]['datapath'], 'r', encoding='utf-8') as file: synonym = {} for line in file: terms = [term.strip() for term in line.split(',')] - key = terms[0] + key = terms[0] values = terms[1:] synonym[key] = values - # Set ngrams based on word_type + # Set ngrams based on field ngrams = int(ngram) if field in ['TI', 'AB'] else 1 - M = df.get() + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + M = df_plain.copy() tab = table_tag(M, field, ngrams) - + if len(tab) >= 2: - # Get minimum degree threshold from the nth term - min_degree = list(tab.values())[min(n_terms, len(tab)-1)] + min_degree = list(tab.values())[min(n_terms, len(tab) - 1)] CS = conceptual_structure( - df=df, + df=df_plain, #patch method=method, field=field, min_degree=min_degree, n_clusters=n_clusters, k_max=8, stemming=False, - labelsize=int(labelsize/2), + labelsize=int(labelsize / 2), documents=num_documents, graph=False, ngrams=ngrams, @@ -134,7 +140,6 @@ def get_factorial_analysis( wordCoord["label"] = wordCoord["label"].values wordCoord["contrib"] = np.array(contrib).flatten() - # Verifica che eigCorr esista prima di accedere if CS["res"] is not None and hasattr(CS["res"], "eigCorr"): xlabel = f"Dim 1 ({CS['res'].eigCorr['perc'][dimX]:.2f}%)" ylabel = f"Dim 2 ({CS['res'].eigCorr['perc'][dimY]:.2f}%)" @@ -144,11 +149,9 @@ def get_factorial_analysis( elif method == "MDS": wordCoord = WData[["Dim1", "Dim2", "word", "cluster"]].copy() wordCoord.rename(columns={"word": "label", "cluster": "groups"}, inplace=True) - wordCoord.rename(columns={"word": "label", "cluster": "groups"}, inplace=True) - wordCoord["contrib"] = size / 2 # MDS non ha contribuzioni vere + wordCoord["contrib"] = size / 2 xlabel, ylabel = "Dim 1", "Dim 2" - ymax = wordCoord["Dim2"].max() - wordCoord["Dim2"].min() xmax = wordCoord["Dim1"].max() - wordCoord["Dim1"].min() threshold2 = threshold * np.mean([xmax, ymax]) @@ -160,19 +163,8 @@ def get_factorial_analysis( thres = sorted(wordCoord["dotSize"], reverse=True)[min(int(topWordPlot), len(wordCoord) - 1)] wordCoord["labelToPlot"] = np.where(wordCoord["dotSize"] >= thres, wordCoord["label"], "") - # Avoid label overlapping - # Placeholder for avoidOverlaps logic - # labelToRemove = avoidOverlaps(wordCoord, threshold=threshold2, dimX=dimX, dimY=dimY) - # wordCoord["labelToPlot"] = np.where(wordCoord["labelToPlot"].isin(labelToRemove), "", wordCoord["labelToPlot"]) - # wordCoord["label"] = wordCoord["label"].str.replace("_1", "", regex=False) - # wordCoord["labelToPlot"] = wordCoord["labelToPlot"].str.replace("_1", "", regex=False) - - - ####################################### WORD MAP ####################################### - # Palette cluster group_colors = assign_consistent_colors(wordCoord["groups"]) - # Hover arricchito hoverText = [ f"{row['label']}
Cluster: {row['groups'] if 'groups' in row else ''}
Contrib: {row['contrib']:.3f}" for _, row in wordCoord.iterrows() @@ -180,158 +172,142 @@ def get_factorial_analysis( fig = go.Figure() - # Marker colorati per cluster, trasparenti, bordo sottile for g in sorted(wordCoord["groups"].dropna().unique()): group_df = wordCoord[wordCoord["groups"] == g] fig.add_trace( - go.Scatter( - x=group_df["Dim1"], - y=group_df["Dim2"], - mode="markers", - marker=dict( - size=group_df["dotSize"], - color=group_colors.get(g, "#FF0000"), # fallback colore - opacity=0.7, - line=dict(width=0.7, color="black"), - symbol="circle", - ), - opacity=0.7, - text=group_df["label"], - hovertext=[ - f"{row['label']}
Cluster: {row['groups']}
Contrib: {row['contrib']:.3f}" - for _, row in group_df.iterrows() - ], - hoverinfo="text", - name=f"Cluster {g}", - showlegend=False, - ) + go.Scatter( + x=group_df["Dim1"], + y=group_df["Dim2"], + mode="markers", + marker=dict( + size=group_df["dotSize"], + color=group_colors.get(g, "#FF0000"), + opacity=0.7, + line=dict(width=0.7, color="black"), + symbol="circle", + ), + opacity=0.7, + text=group_df["label"], + hovertext=[ + f"{row['label']}
Cluster: {row['groups']}
Contrib: {row['contrib']:.3f}" + for _, row in group_df.iterrows() + ], + hoverinfo="text", + name=f"Cluster {g}", + showlegend=False, + ) ) - # Aggiungi i NaN separatamente (se esistono) group_df_nan = wordCoord[wordCoord["groups"].isna()] if not group_df_nan.empty: fig.add_trace( - go.Scatter( - x=group_df_nan["Dim1"], - y=group_df_nan["Dim2"], - mode="markers", - marker=dict( - size=group_df_nan["dotSize"], - color="#FF9999", - opacity=0.7, - line=dict(width=0.7, color="black"), - symbol="circle", - ), - opacity=0.7, - text=group_df_nan["label"], - hovertext=[ - f"{row['label']}
Cluster: N/A
Contrib: {row['contrib']:.3f}" - for _, row in group_df_nan.iterrows() - ], - hoverinfo="text", - name="No Cluster", - showlegend=False, - ) + go.Scatter( + x=group_df_nan["Dim1"], + y=group_df_nan["Dim2"], + mode="markers", + marker=dict( + size=group_df_nan["dotSize"], + color="#FF9999", + opacity=0.7, + line=dict(width=0.7, color="black"), + symbol="circle", + ), + opacity=0.7, + text=group_df_nan["label"], + hovertext=[ + f"{row['label']}
Cluster: N/A
Contrib: {row['contrib']:.3f}" + for _, row in group_df_nan.iterrows() + ], + hoverinfo="text", + name="No Cluster", + showlegend=False, + ) ) - # Aggiungi contorni dei cluster (Convex Hull) if n_clusters != 1 and "hull_data" in CS and CS["hull_data"] is not None and not CS["hull_data"].empty: hull_data = CS["hull_data"] for cluster_id in hull_data["cluster"].unique(): group = hull_data[hull_data["cluster"] == cluster_id] fig.add_trace( go.Scatter( - x=group["Dim1"], - y=group["Dim2"], - mode="lines", - line=dict(color=group_colors.get(cluster_id, "gray"), width=2), - fill="toself", - opacity=0.15, - hoverinfo="skip", - showlegend=False + x=group["Dim1"], + y=group["Dim2"], + mode="lines", + line=dict(color=group_colors.get(cluster_id, "gray"), width=2), + fill="toself", + opacity=0.15, + hoverinfo="skip", + showlegend=False ) ) - # Etichette solo per i top word (labelToPlot), spostate più in alto rispetto ai pallini - # Offset dinamico in base alla dimensione verticale del grafico - label_offset = 0.03 * (wordCoord["Dim2"].max() - wordCoord["Dim2"].min()) + # PATCH 2: if all Dim2 values are equal, the range is 0 and label_offset + # becomes 0, causing labels to overlap markers with no visual separation. + # → added a fallback minimum offset based on the x range to ensure + # labels are always displaced from their markers. + dim2_range = wordCoord["Dim2"].max() - wordCoord["Dim2"].min() + dim1_range = wordCoord["Dim1"].max() - wordCoord["Dim1"].min() + label_offset = 0.03 * dim2_range if dim2_range > 0 else 0.03 * dim1_range if dim1_range > 0 else 0.1 for _, row in wordCoord[wordCoord["labelToPlot"] != ""].iterrows(): fig.add_annotation( - x=row["Dim1"], - y=row["Dim2"] + label_offset, - text=row["labelToPlot"], - font=dict(size=labelsize, color=group_colors.get(row["groups"], "black")), - showarrow=False, + x=row["Dim1"], + y=row["Dim2"] + label_offset, + text=row["labelToPlot"], + font=dict(size=labelsize, color=group_colors.get(row["groups"], "black")), + showarrow=False, ) - # Assi X=0 e Y=0, grigi e tratteggiati fig.add_shape( type="line", - x0=wordCoord["Dim1"].min(), - x1=wordCoord["Dim1"].max(), - y0=0, - y1=0, + x0=wordCoord["Dim1"].min(), x1=wordCoord["Dim1"].max(), + y0=0, y1=0, line=dict(color="#B0B0B0", width=1.5, dash="dash"), layer="below" ) fig.add_shape( type="line", - x0=0, - x1=0, - y0=wordCoord["Dim2"].min(), - y1=wordCoord["Dim2"].max(), + x0=0, x1=0, + y0=wordCoord["Dim2"].min(), y1=wordCoord["Dim2"].max(), line=dict(color="#B0B0B0", width=1.5, dash="dash"), layer="below" ) - # Personalizza l'hovertemplate per renderlo leggibile e carino for trace in fig.data: trace.hovertemplate = ( - "%{text}
" - "Cluster: %{marker.color}
" - "Contribuzione: %{marker.size:.2f}" + "%{text}
" + "Cluster: %{marker.color}
" + "Contribuzione: %{marker.size:.2f}" ) fig.update_layout( xaxis=dict( - title=xlabel, - zeroline=True, - zerolinewidth=1.5, - zerolinecolor="#B0B0B0", - showgrid=True, - gridcolor="lightgray", - showline=False, - showticklabels=True + title=xlabel, + zeroline=True, zerolinewidth=1.5, zerolinecolor="#B0B0B0", + showgrid=True, gridcolor="lightgray", + showline=False, showticklabels=True ), yaxis=dict( - title=ylabel, - zeroline=True, - zerolinewidth=1.5, - zerolinecolor="#B0B0B0", - showgrid=True, - gridcolor="lightgray", - showline=False, - showticklabels=True + title=ylabel, + zeroline=True, zerolinewidth=1.5, zerolinecolor="#B0B0B0", + showgrid=True, gridcolor="lightgray", + showline=False, showticklabels=True ), plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", showlegend=False, height=800, hoverlabel=dict( - bgcolor="white", - font_size=13, - font_family="Segoe UI, Arial", - bordercolor="#5567BB" + bgcolor="white", + font_size=13, + font_family="Segoe UI, Arial", + bordercolor="#5567BB" ), ) fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - ##################################################################################### - - ################################### DENDROGRAM COERENTE CON WORD MAP ################################### import networkx as nx from pyvis.network import Network from scipy.cluster.hierarchy import linkage, to_tree @@ -342,7 +318,6 @@ def get_factorial_analysis( import tempfile import os - # 1. Linkage, labels, cluster mapping labels_lower = CS["km_res"]["data"].index.str.lower().tolist() coords = CS["km_res"]["data"][["Dim1", "Dim2"]].values linkage_matrix = CS["linkage"] @@ -351,13 +326,11 @@ def get_factorial_analysis( group_colors = assign_consistent_colors(WData["cluster"]) leaf_offset = len(labels_lower) - # 2. Ordina le parole secondo dendrogramma ddata = dendrogram(linkage_matrix, labels=labels_lower, no_plot=True) words_sorted = ddata["ivl"] n_terms = len(words_sorted) - scale_factor = int(500 * math.log2(n_terms + 1)) # log-scale vertical height + scale_factor = int(500 * math.log2(n_terms + 1)) - # 3. Inizializza rete Pyvis tree, nodes = to_tree(linkage_matrix, rd=True) net = Network(height="98vh", width="100%", directed=True, notebook=True, cdn_resources="in_line") net.toggle_physics(False) @@ -368,11 +341,8 @@ def get_factorial_analysis( leaf_x = 0 x_spacing = 100 label_to_new_index = {label: i for i, label in enumerate(words_sorted)} - - # Per memorizzare cambi cluster cut_lines = {} - # FOGUE for i, label in enumerate(words_sorted): node_id = i x = leaf_x @@ -382,49 +352,25 @@ def get_factorial_analysis( node_to_cluster[node_id] = cluster positions[node_id] = (x, y) - # Nodo foglia net.add_node( - node_id, - label=" ", - color=color, - shape="dot", - size=6, - title=label, - font={"size": 18, "face": "arial"}, - physics=False, - x=x, - y=y + 40 + node_id, label=" ", color=color, shape="dot", size=6, + title=label, font={"size": 18, "face": "arial"}, + physics=False, x=x, y=y + 40 ) - # Nodo stub stub_y = y - 20 stub_id = f"stub_{node_id}" positions[stub_id] = (x, stub_y) net.add_node( - stub_id, - label=" ", - title=" ", - color="#00000000", - shape="dot", - size=1, - physics=False, - x=x, - y=stub_y, + stub_id, label=" ", title=" ", color="#00000000", + shape="dot", size=1, physics=False, x=x, y=stub_y, font={"color": "#00000000", "size": 1} ) - net.add_edge( - stub_id, - node_id, - label=" ", - color=color, - width=10, - smooth=False, - physics=False, - arrows="" + stub_id, node_id, label=" ", color=color, width=10, + smooth=False, physics=False, arrows="" ) - # Label HTML dinamica box_html = f"""
{label.upper()} @@ -433,7 +379,6 @@ def get_factorial_analysis( label_boxes.append(box_html) leaf_x += x_spacing - # MERGE def add_internal_nodes(node): if node.is_leaf(): label = labels_lower[node.id] @@ -441,126 +386,71 @@ def add_internal_nodes(node): stub_id = f"stub_{new_id}" return positions[stub_id], stub_id - # 1. Ricorsione sui figli left_pos, left_stub_id = add_internal_nodes(node.left) right_pos, right_stub_id = add_internal_nodes(node.right) - # 2. Coordinate del nodo interno x_center = (left_pos[0] + right_pos[0]) / 2 y = min(left_pos[1], right_pos[1]) max_dist = linkage_matrix[:, 2].max() stub_y = distance_to_y(node.dist, max_dist, scale_factor) - node_id = node.id + leaf_offset stub_id = f"stub_{node_id}" positions[node_id] = (x_center, y) positions[stub_id] = (x_center, stub_y) total = node.count - # 3. Colore cluster (ereditato dal figlio sinistro) left_cluster = node_to_cluster.get( - node.left.id + leaf_offset if not node.left.is_leaf() else label_to_new_index[labels_lower[node.left.id]], - -1 + node.left.id + leaf_offset if not node.left.is_leaf() else label_to_new_index[labels_lower[node.left.id]], -1 ) right_cluster = node_to_cluster.get( - node.right.id + leaf_offset if not node.right.is_leaf() else label_to_new_index[labels_lower[node.right.id]], - -1 + node.right.id + leaf_offset if not node.right.is_leaf() else label_to_new_index[labels_lower[node.right.id]], -1 ) cluster = left_cluster node_to_cluster[node_id] = cluster color = group_colors.get(cluster, "#999999") - # 4. Nodo interno net.add_node( - node_id, - label=" ", - shape="dot", - size=20, - physics=False, - x=x_center, - y=y, + node_id, label=" ", shape="dot", size=20, physics=False, + x=x_center, y=y, title=f"Distance: {node.dist:.2f} Words: {total}", - color={ - "background": "#FFFFFF", # Riempimento bianco - "border": "#3399FF", # Bordo blu tenue - "highlight": "#000000" # Colore al passaggio mouse (opzionale) - }, + color={"background": "#FFFFFF", "border": "#3399FF", "highlight": "#000000"}, borderWidth=2, ) - - - # 5. Nodo stub sopra net.add_node( - stub_id, - label=" ", - title=f"Distance: {node.dist:.2f} Words: {total}", - color="#00000000", - shape="dot", - size=4, - physics=False, - x=x_center, - y=stub_y, - font={"color": "#00000000", "size": 1} + stub_id, label=" ", title=f"Distance: {node.dist:.2f} Words: {total}", + color="#00000000", shape="dot", size=4, physics=False, + x=x_center, y=stub_y, font={"color": "#00000000", "size": 1} ) - # 6. Edge verticale (stub → nodo) if node != tree: net.add_edge( - stub_id, - node_id, - label=" ", + stub_id, node_id, label=" ", title=f"Distance: {node.dist:.2f} Words: {node.count}", - color=color, - width=10, - smooth=False, - physics=False, - arrows="" + color=color, width=10, smooth=False, physics=False, arrows="" ) - # 7. Collega i due figli for child_stub_id in [left_stub_id, right_stub_id]: child_x, child_y = positions[child_stub_id] inter_id = f"{node_id}_{child_stub_id}_v" inter_y = y net.add_node( - inter_id, - label=" ", - title=" ", - color="#00000000", - shape="dot", - size=1, - physics=False, - x=child_x, - y=inter_y + inter_id, label=" ", title=" ", color="#00000000", + shape="dot", size=1, physics=False, x=child_x, y=inter_y ) - - # print(f"[HLINE] Nodo {node_id} connesso a {child_stub_id} a y={inter_y:.2f}") - net.add_edge( - node_id, - inter_id, - color=color, + node_id, inter_id, color=color, title=f"Distance: {node.dist:.2f} Words: {node.count}", - width=10, - smooth=False, - physics=False, - arrows="" + width=10, smooth=False, physics=False, arrows="" ) net.add_edge( - inter_id, - child_stub_id, - color=color, + inter_id, child_stub_id, color=color, title=f"Distance: {node.dist:.2f} Words: {node.count}", - width=10, - smooth=False, - physics=False, - arrows="" + width=10, smooth=False, physics=False, arrows="" ) - # 8. Linea di taglio (se cambia cluster) left_leaf_clusters = get_leaf_clusters(node.left, label_to_new_index, labels_lower, node_to_cluster) right_leaf_clusters = get_leaf_clusters(node.right, label_to_new_index, labels_lower, node_to_cluster) @@ -569,41 +459,23 @@ def add_internal_nodes(node): cl2 = min(right_leaf_clusters) cluster_pair = tuple(sorted((cl1, cl2))) if cluster_pair not in cut_lines: - cut_lines[cluster_pair] = y # posizione reale della fusione visibile - # print(f"[CUT LINE] Cambio cluster {cluster_pair} a y = {stub_y:.2f}") - + cut_lines[cluster_pair] = y return (x_center, stub_y), stub_id - # Costruisci _, root_stub_id = add_internal_nodes(tree) - # Aggiungi linee rosse di taglio - # Aggiungi solo la linea di taglio più bassa (cioè y più vicino allo 0) if cut_lines: - # Trova la coppia con il max y (cioè la linea di taglio più bassa visivamente) (cl1, cl2), y = max(cut_lines.items(), key=lambda x: x[1]) - - net.add_node( - f"cut_{cl1}_{cl2}_left", x=0, y=y, label="", shape="dot", size=0.1, color="#FF0000", physics=False - ) - net.add_node( - f"cut_{cl1}_{cl2}_right", x=(leaf_x - x_spacing), y=y, label="", shape="dot", size=0.1, color="#FF0000", physics=False - ) + net.add_node(f"cut_{cl1}_{cl2}_left", x=0, y=y, label="", shape="dot", size=0.1, color="#FF0000", physics=False) + net.add_node(f"cut_{cl1}_{cl2}_right", x=(leaf_x - x_spacing), y=y, label="", shape="dot", size=0.1, color="#FF0000", physics=False) net.add_edge( - f"cut_{cl1}_{cl2}_left", - f"cut_{cl1}_{cl2}_right", - label=f"cut @ y={y:.1f}", - color="#FF0000", - width=20, - physics=False, - arrows="" + f"cut_{cl1}_{cl2}_left", f"cut_{cl1}_{cl2}_right", + label=f"cut @ y={y:.1f}", color="#FF0000", width=20, physics=False, arrows="" ) - # 1. Salva grafo base in HTML html = net.generate_html() - # 2. Inietta etichette HTML injection = f""" ", new_css + "\n ") updated_html = updated_html.replace("1px solid lightgray", "none") - f.write(updated_html) - ############################################ words_by_cluster = WData[["word", "Dim1", "Dim2", "cluster"]].copy() - # 5. Restituisci return fig, html_path.split(os.sep)[-1], words_by_cluster, CS["CSData"] @@ -673,13 +541,18 @@ def conceptual_structure( remove_terms: Optional[Sequence[str]] = None, synonyms: Optional[Dict[str, str]] = None ) -> Dict: - # Set binary flag based on method + binary = method == "MCA" - - # Create co-occurrence matrix based on field + if field == "ID": CW = cocMatrix(df, Field="ID", binary=binary, remove_terms=remove_terms, synonyms=synonyms) - CW = CW.loc[:, CW.sum() >= min_degree] + # PATCH: cocMatrix returns None when the field is empty + # (e.g. ID/Keywords Plus is always empty for OpenAlex and PubMed). + # Fixed by checking if CW is None before using it. + if CW is None: + return {"res": None, "map": go.FigureWidget(go.Figure()), "clusters": pd.DataFrame()} + + CW = CW.loc[:, CW.sum() >= min_degree] CW = CW.loc[CW.sum(axis=1) > 0] CW = CW.loc[:, ~CW.columns.isin(["NA"])] @@ -717,13 +590,19 @@ def conceptual_structure( CW = CW.loc[CW.sum(axis=1) > 0] CW = CW.loc[:, ~CW.columns.isin(["NA"])] - # Convert labels to lowercase + # PATCH 4: if all terms are filtered out by min_degree, CW is an empty + # DataFrame and factorial() crashes on matrix operations (pdist, X.T @ X). + # → return an empty result dict early instead of crashing. + if CW.empty: + return { + 'net': CW, 'res': None, 'km_res': {'data': pd.DataFrame(), 'centers': pd.DataFrame()}, + 'docCoord': None, 'coord': None, 'hull_data': pd.DataFrame(), 'linkage': None, + 'CSData': pd.DataFrame(), 'WData': pd.DataFrame(), 'params': pd.DataFrame() + } + CW.columns = CW.columns.str.lower() CW.index = CW.index.str.lower() - # print("CW", CW) - - # Run factorial analysis results = factorial(CW, method=method, n_clusters=n_clusters, k_max=k_max) res_mca = results['res_mca'] if 'res_mca' in results else None @@ -732,64 +611,58 @@ def conceptual_structure( else: doc_coord = None + # PATCH 3: results.get('df', results.get('res')) can return None if neither + # key is present in the dict — calling df.index = ... on None crashes with + # AttributeError. → raise an explicit error to surface the root cause. df = results.get('df', results.get('res')) + if df is None: + raise ValueError( + f"factorial() returned no 'df' or 'res' key for method='{method}'. " + "Check that the method is supported and the input matrix is valid." + ) df.index = CW.columns doc_coord = results['docCoord'] - # Add total citations if available - # Add total citations if available and method is not "MDS" if "TC" in df.columns and method != "MDS": - # Try to match doc_coord index to df index (case-insensitive) doc_coord = doc_coord.copy() doc_coord_index_upper = doc_coord.index.astype(str).str.upper() df_index_upper = df.index.astype(str).str.upper() tc_map = dict(zip(df_index_upper, df["TC"].astype(float))) doc_coord["TC"] = doc_coord_index_upper.map(tc_map) - # Perform hierarchical clustering - # km_res vis_hclust pyvis km_res = linkage(pdist(df, metric='euclidean'), method='average') results['linkage'] = km_res - # Determine the number of clusters + # PATCH 5: n_clusters greater than the number of available terms causes + # fcluster to produce unexpected behavior or crash. + # → clamp n_clusters to [1, len(CW.columns)] before calling fcluster. + max_clusters = len(CW.columns) if n_clusters == "auto": heights = np.diff(km_res[:, 2]) - n_clusters = min(len(heights) - np.argmax(heights) + 1, k_max) + n_clusters = min(len(heights) - np.argmax(heights) + 1, k_max, max_clusters) else: - n_clusters = max(1, min(int(n_clusters), k_max)) + n_clusters = max(1, min(int(n_clusters), k_max, max_clusters)) - # Assign clusters to data points cluster_labels = fcluster(km_res, n_clusters, criterion='maxclust') df = df.copy() df['cluster'] = cluster_labels - # Create data.clust (dataframe with data and cluster) data_clust = df.copy() - - # Calculate cluster centers - centers = data_clust.groupby('cluster').agg({ - 'Dim1': 'mean', - 'Dim2': 'mean' - }).reset_index() - - # Reorder columns to match R: Dim1, Dim2, cluster + centers = data_clust.groupby('cluster').agg({'Dim1': 'mean', 'Dim2': 'mean'}).reset_index() centers = centers[['Dim1', 'Dim2', 'cluster']] - # Add shape and label columns data_clust['shape'] = "1" data_clust['label'] = data_clust.index.astype(str) centers['shape'] = "0" centers['label'] = "" - # Concatenate data_clust and centers df_clust = pd.concat([data_clust, centers], ignore_index=True, sort=False) - - # Assign color by cluster (using Plotly palette) colorlist = px.colors.qualitative.Plotly - df_clust['color'] = df_clust['cluster'].apply(lambda x: colorlist[int(x) % len(colorlist)] if pd.notnull(x) else "#CCCCCC") + df_clust['color'] = df_clust['cluster'].apply( + lambda x: colorlist[int(x) % len(colorlist)] if pd.notnull(x) else "#CCCCCC" + ) - # Create hull data for plotting (similar to R dplyr + chull logic) hull_data_list = [] for cluster in df_clust['cluster'].dropna().unique(): group = df_clust[df_clust['cluster'] == cluster] @@ -797,11 +670,8 @@ def conceptual_structure( try: hull_idx = ConvexHull(group[['Dim1', 'Dim2']]).vertices hull_points = group.iloc[hull_idx] - # Chiudi il poligono (aggiungi il primo punto alla fine) hull_points = pd.concat([hull_points, hull_points.iloc[[0]]]) - except QhullError as e: - # print(f"[WARN] ConvexHull fallito per cluster {cluster}: {e}") - # Fallback: rettangolo minimo + except QhullError: x_min, x_max = group["Dim1"].min(), group["Dim1"].max() y_min, y_max = group["Dim2"].min(), group["Dim2"].max() hull_points = pd.DataFrame({ @@ -813,11 +683,7 @@ def conceptual_structure( if hull_data_list: hull_data = pd.concat(hull_data_list) - # For each cluster, add the first point again to close the polygon - hull_data = pd.concat([ - hull_data, - hull_data.groupby('cluster').head(1) - ]) + hull_data = pd.concat([hull_data, hull_data.groupby('cluster').head(1)]) hull_data = hull_data.reset_index(drop=True) hull_data['id'] = hull_data.groupby('cluster').cumcount() + 1 hull_data = hull_data.sort_values(['cluster', 'id']) @@ -826,8 +692,7 @@ def conceptual_structure( if doc_coord is not None: results = { - 'net': CW, - 'res': res_mca, + 'net': CW, 'res': res_mca, 'km_res': {'data': df, 'centers': centers}, 'docCoord': doc_coord, 'coord': results['coord'] if 'coord' in results else None, @@ -836,37 +701,19 @@ def conceptual_structure( } else: results = { - 'net': CW, - 'res': df, - 'km_res': { - 'data': df, - 'centers': centers, - 'cluster': df['cluster'] - }, - 'docCoord': None, - 'coord': None, - 'hull_data': hull_data, - 'linkage': km_res + 'net': CW, 'res': df, + 'km_res': {'data': df, 'centers': centers, 'cluster': df['cluster']}, + 'docCoord': None, 'coord': None, + 'hull_data': hull_data, 'linkage': km_res } params = { - 'field': field, - 'ngrams': ngrams, - 'method': method, - 'min_degree': min_degree, - 'n_clusters': n_clusters, - 'k_max': k_max, - 'stemming': stemming, - 'labelsize': labelsize, - 'documents': documents, - 'graph': graph, - 'remove_terms': remove_terms, - 'synonyms': synonyms + 'field': field, 'ngrams': ngrams, 'method': method, + 'min_degree': min_degree, 'n_clusters': n_clusters, 'k_max': k_max, + 'stemming': stemming, 'labelsize': labelsize, 'documents': documents, + 'graph': graph, 'remove_terms': remove_terms, 'synonyms': synonyms } - params_df = pd.DataFrame({ - 'params': list(params.keys()), - 'values': [str(params[k]) for k in params] - }) + params_df = pd.DataFrame({'params': list(params.keys()), 'values': [str(params[k]) for k in params]}) results['params'] = params_df return results @@ -885,140 +732,75 @@ def factorial(X, method, n_clusters=5, k_max=5): """ if method == "CA": res_mca = CA(n_components=2).fit(X) - row_coords = res_mca.row_coordinates(X) col_coords = res_mca.column_coordinates(X) - K = 2 I, J = row_coords.shape[0], col_coords.shape[0] - singular_values = np.linalg.norm(row_coords.values, axis=0)[:K] evF = np.tile(singular_values, (I, 1)) evG = np.tile(singular_values, (J, 1)) - rpc = row_coords.iloc[:, :K].values * evF cpc = col_coords.iloc[:, :K].values * evG - column_masses = (X.sum(axis=0) / X.values.sum()).values - column_distances = np.sum(cpc**2, axis=1) - + column_distances = np.sum(cpc ** 2, axis=1) coord = { "coord": pd.DataFrame(cpc[:, :2], columns=["Dim1", "Dim2"], index=col_coords.index), "contrib": pd.DataFrame((cpc[:, :2] ** 2) * column_masses[:, None] / singular_values, columns=["Dim1", "Dim2"], index=col_coords.index), "cos2": pd.DataFrame((cpc[:, :2] ** 2) / column_distances[:, None], columns=["Dim1", "Dim2"], index=col_coords.index) } - coord_doc = { "coord": pd.DataFrame(rpc[:, :2], columns=["Dim1", "Dim2"], index=row_coords.index), "contrib": pd.DataFrame((rpc[:, :2] ** 2), columns=["Dim1", "Dim2"], index=row_coords.index), "cos2": pd.DataFrame((rpc[:, :2] ** 2) / np.sum(rpc[:, :2] ** 2, axis=1)[:, None], columns=["Dim1", "Dim2"], index=row_coords.index) } - elif method == "MCA": - - # Multiple Correspondence Analysis X = X.apply(lambda col: col.astype("category")) res_mca = MCA(n_components=2).fit(X) - - # Estrai i nomi dei livelli (equivalente di `res.mca$levelnames` in R) levelnames = [f"{col}_{val}" for col in X.columns for val in X[col].cat.categories] - K = 2 row_coords = res_mca.row_coordinates(X) col_coords = res_mca.column_coordinates(X) I, J = row_coords.shape[0], col_coords.shape[0] - - # Stima dei valori singolari - # I valori singolari possono essere stimati come la norma delle prime componenti singular_values = np.linalg.norm(row_coords.values, axis=0)[:2] - - # Crea le matrici evF ed evG replicando i valori singolari - evF = np.tile(singular_values, (I, 1)) # Matrice di dimensione (I, K) - evG = np.tile(singular_values, (J, 1)) # Matrice di dimensione (J, K) - + evF = np.tile(singular_values, (I, 1)) + evG = np.tile(singular_values, (J, 1)) rpc = row_coords.iloc[:, :K].values * evF cpc = col_coords.iloc[:, :K].values * evG - - # Calcolo delle masse delle colonne column_frequencies = X.apply(lambda col: col.value_counts(normalize=True)).fillna(0) - column_mass = column_frequencies.values.flatten() # Vettore delle masse delle colonne - - # Calcolo delle distanze delle colonne - column_distances = np.sum(cpc**2, axis=1) # Calcola la somma dei quadrati delle coordinate - - # Crea la lista `coord` - coord_df = pd.DataFrame({ - "Dim1": cpc[:, 0], - "Dim2": cpc[:, 1], - "label": levelnames - }) + column_mass = column_frequencies.values.flatten() + column_distances = np.sum(cpc ** 2, axis=1) + coord_df = pd.DataFrame({"Dim1": cpc[:, 0], "Dim2": cpc[:, 1], "label": levelnames}) mask = coord_df["label"].str[-2:] == "_1" coord = { "coord": coord_df[mask].drop(columns=["label"]).reset_index(drop=True), - - "contrib": pd.DataFrame( - (cpc**2) * column_mass[:, np.newaxis] / singular_values, - columns=["Dim1", "Dim2"] - ).assign(label=levelnames)[mask].drop(columns=["label"]).reset_index(drop=True), - - "cos2": pd.DataFrame( - (cpc**2) / column_distances[:, np.newaxis], # Usa le distanze calcolate - columns=["Dim1", "Dim2"] - ).assign(label=levelnames)[mask].drop(columns=["label"]).reset_index(drop=True) + "contrib": pd.DataFrame((cpc ** 2) * column_mass[:, np.newaxis] / singular_values, columns=["Dim1", "Dim2"]).assign(label=levelnames)[mask].drop(columns=["label"]).reset_index(drop=True), + "cos2": pd.DataFrame((cpc ** 2) / column_distances[:, np.newaxis], columns=["Dim1", "Dim2"]).assign(label=levelnames)[mask].drop(columns=["label"]).reset_index(drop=True) } - - # Imposta i nomi delle righe row_names = coord["coord"].index.astype(str).str[:-2] coord["coord"].index = row_names coord["contrib"].index = row_names coord["cos2"].index = row_names - - # Crea la lista `coord_doc` coord_doc = { - "coord": pd.DataFrame({ - "Dim1": rpc[:, 0], - "Dim2": rpc[:, 1] - }, index=X.index), - - "contrib": pd.DataFrame( - (rpc[:, :2]**2) * res_mca.row_masses_.values[:, np.newaxis] / singular_values, - columns=["Dim1", "Dim2"] - ), - - "cos2": pd.DataFrame( - res_mca.row_masses_.values[:, np.newaxis] * rpc**2 / res_mca.total_inertia_, - columns=["Dim1", "Dim2"] - ) + "coord": pd.DataFrame({"Dim1": rpc[:, 0], "Dim2": rpc[:, 1]}, index=X.index), + "contrib": pd.DataFrame((rpc[:, :2] ** 2) * res_mca.row_masses_.values[:, np.newaxis] / singular_values, columns=["Dim1", "Dim2"]), + "cos2": pd.DataFrame(res_mca.row_masses_.values[:, np.newaxis] * rpc ** 2 / res_mca.total_inertia_, columns=["Dim1", "Dim2"]) } elif method == "MDS": - # Step 1: NetMatrix = X.T @ X net_matrix = X.T @ X - - # Step 2: Association-based normalization net_matrix_np = net_matrix.to_numpy() row_sums = net_matrix_np.sum(axis=1, keepdims=True) col_sums = net_matrix_np.sum(axis=0, keepdims=True) expected = row_sums @ col_sums / net_matrix_np.sum() norm_matrix = np.divide(net_matrix_np, expected, where=expected != 0) norm_matrix = np.nan_to_num(norm_matrix, nan=0.0, posinf=0.0, neginf=0.0) - - # Step 3: Dissimilarity matrix dissim_matrix = 1 - norm_matrix np.fill_diagonal(dissim_matrix, 0) - - # Step 4: MDS (classical) mds = SK_MDS(n_components=2, dissimilarity="precomputed", random_state=42) coords = mds.fit_transform(dissim_matrix) - - # Normalizza le coordinate (StandardScaler per coerenza visiva) coords = StandardScaler().fit_transform(coords) - - # Crea DataFrame delle coordinate df = pd.DataFrame(coords, columns=["Dim1", "Dim2"], index=X.columns) - - # Clustering sulle coordinate km_res = linkage(pdist(df), method='average') if n_clusters == "auto": @@ -1030,24 +812,27 @@ def factorial(X, method, n_clusters=5, k_max=5): cluster_labels = fcluster(km_res, n_clusters, criterion='maxclust') df["cluster"] = cluster_labels - # Calcolo contribuzione proxy: distanza dal centroide centroids = df.groupby("cluster")[["Dim1", "Dim2"]].transform("mean") - df["contrib"] = np.sqrt((df["Dim1"] - centroids["Dim1"])**2 + (df["Dim2"] - centroids["Dim2"])**2) - df["contrib"] = (df["contrib"] - df["contrib"].min()) / (df["contrib"].max() - df["contrib"].min()) + 1 + contrib_raw = np.sqrt((df["Dim1"] - centroids["Dim1"]) ** 2 + (df["Dim2"] - centroids["Dim2"]) ** 2) + + # PATCH 6: if all points are equidistant from their centroid, the + # normalization range is 0 and the division produces NaN everywhere. + # → added a check: if range is 0, assign uniform contrib of 1.0. + contrib_range = contrib_raw.max() - contrib_raw.min() + if contrib_range > 0: + df["contrib"] = (contrib_raw - contrib_raw.min()) / contrib_range + 1 + else: + df["contrib"] = 1.0 - # Autovalori fittizi per etichette (Benzecri style) sv = np.linalg.norm(coords, axis=0) - eig_benz = np.where(sv**2 > 1 / len(sv), - ((len(sv) / (len(sv) - 1)) ** 2) * (sv**2 - 1 / len(sv))**2, - 0) + eig_benz = np.where( + sv ** 2 > 1 / len(sv), + ((len(sv) / (len(sv) - 1)) ** 2) * (sv ** 2 - 1 / len(sv)) ** 2, + 0 + ) perc = eig_benz / eig_benz.sum() * 100 if eig_benz.sum() > 0 else np.zeros_like(eig_benz) cum_perc = np.cumsum(perc) - eig_corr = pd.DataFrame({ - "eig": sv**2, - "eigBenz": eig_benz, - "perc": perc, - "cumPerc": cum_perc - }) + eig_corr = pd.DataFrame({"eig": sv ** 2, "eigBenz": eig_benz, "perc": perc, "cumPerc": cum_perc}) results = { "res_mca": {"eigCorr": eig_corr, "sv": sv}, @@ -1056,24 +841,18 @@ def factorial(X, method, n_clusters=5, k_max=5): "docCoord": None, "coord": None } - return results - else: raise ValueError(f"Unsupported method: {method}") - # Blocchi comuni per CA/MCA (non MDS) if method != "MDS": res_mca = eig_correction(res_mca, singular_values) - docCoord = pd.DataFrame( np.hstack([coord_doc["coord"], coord_doc["contrib"].sum(axis=1).to_numpy()[:, None]]), columns=["dim1", "dim2", "contrib"], ).sort_values(by="contrib", ascending=False) - res_mca.coord_doc = coord_doc - results = { "res_mca": res_mca, "df": coord["coord"], @@ -1094,26 +873,15 @@ def eig_correction(res_mca, singular_values): singular_values: Array or list of singular values from the analysis. Returns: - Corrected results. + Corrected results with eigCorr attribute attached. """ n = len(singular_values) e = np.array(singular_values) ** 2 - eig_benz = np.where( - e > 1 / n, - ((n / (n - 1)) ** 2) * (e - (1 / n)) ** 2, - 0 - ) + eig_benz = np.where(e > 1 / n, ((n / (n - 1)) ** 2) * (e - (1 / n)) ** 2, 0) perc = eig_benz / np.sum(eig_benz) * 100 if np.sum(eig_benz) > 0 else np.zeros_like(eig_benz) cum_perc = np.cumsum(perc) + eig_corr = pd.DataFrame({"eig": e, "eigBenz": eig_benz, "perc": perc, "cumPerc": cum_perc}) - eig_corr = pd.DataFrame({ - "eig": e, - "eigBenz": eig_benz, - "perc": perc, - "cumPerc": cum_perc - }) - - # Attach eigCorr as attribute or dict entry if hasattr(res_mca, '__dict__'): res_mca.eigCorr = eig_corr else: @@ -1132,48 +900,29 @@ def avoidOverlaps(df, threshold=0.10, dimX=0, dimY=1): dimY: Index of the y-coordinate column. Returns: - List of labels to remove to avoid overlaps. + Set of labels to remove to avoid overlaps. """ df["Dim2"] = df["Dim2"] / 3 - - # Filter rows with non-empty labels filtered_df = df[df["labelToPlot"] != ""].copy() - - # Compute Manhattan distances - distances = pd.DataFrame( - pdist(filtered_df[["Dim1", "Dim2"]], metric="cityblock"), - columns=["dist"] - ) + distances = pd.DataFrame(pdist(filtered_df[["Dim1", "Dim2"]], metric="cityblock"), columns=["dist"]) distances["from"] = np.repeat(filtered_df["labelToPlot"].values, len(filtered_df)) distances["to"] = np.tile(filtered_df["labelToPlot"].values, len(filtered_df)) distances = distances[distances["from"] != distances["to"]] - - # Add dot sizes distances = distances.merge( filtered_df[["labelToPlot", "dotSize"]].rename(columns={"dotSize": "w_from"}), - left_on="from", - right_on="labelToPlot" + left_on="from", right_on="labelToPlot" ).drop(columns=["labelToPlot"]) distances = distances.merge( filtered_df[["labelToPlot", "dotSize"]].rename(columns={"dotSize": "w_to"}), - left_on="to", - right_on="labelToPlot" + left_on="to", right_on="labelToPlot" ).drop(columns=["labelToPlot"]) - - # Filter by threshold distances = distances[distances["dist"] < threshold] labels_to_remove = [] while not distances.empty: row = distances.iloc[0] - if row["w_from"] > row["w_to"]: - label = row["to"] - else: - label = row["from"] - + label = row["to"] if row["w_from"] > row["w_to"] else row["from"] labels_to_remove.append(label) - - # Remove rows involving the selected label distances = distances[(distances["from"] != label) & (distances["to"] != label)] - return set(labels_to_remove) + return set(labels_to_remove) \ No newline at end of file diff --git a/functions/get_filters.py b/functions/get_filters.py index 206c215aa..004fbccac 100644 --- a/functions/get_filters.py +++ b/functions/get_filters.py @@ -12,7 +12,16 @@ def get_filters(df): Returns: A DataFrame with additional columns for filters and metrics. """ - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes + # on a pandas DataFrame because pandas .get() requires a column name. + # Fixed by checking isinstance(df, pd.DataFrame) first. + if isinstance(df, pd.DataFrame): + data = df.copy() + else: + data = df.get() + data["PY"] = pd.to_numeric(data["PY"], errors="coerce").fillna(0).astype(int) # PATCH: ensure PY is numeric + data["TC"] = pd.to_numeric(data["TC"], errors="coerce").fillna(0).astype(int) # PATCH: ensure TC is numeric + # Calculate the minimum and maximum publication years data["Min_Year"] = data["PY"].min() diff --git a/functions/get_frequentwords.py b/functions/get_frequentwords.py index 8d790ffe1..4b4157947 100644 --- a/functions/get_frequentwords.py +++ b/functions/get_frequentwords.py @@ -1,4 +1,5 @@ from www.services import * +import ast def get_frequent_words(df, ngram, num_of_words, word_type, file_upload_terms, file_upload_synonyms, field_separator_frequent=';'): @@ -45,6 +46,11 @@ def get_frequent_words(df, ngram, num_of_words, word_type, file_upload_terms, fi table = word_counts.sort_values(by='Occurrences', ascending=False) word_counts = word_counts.sort_values(by='Occurrences', ascending=False).head(num_of_words) + # PATCH: safety check if word_counts is empty — avoids building a + # plot (and the leftover-bubble UI glitch) from an empty DataFrame. + if word_counts.empty: + return go.FigureWidget(go.Figure()), table + # Create plot fig = px.scatter( word_counts, @@ -57,9 +63,8 @@ def get_frequent_words(df, ngram, num_of_words, word_type, file_upload_terms, fi color_continuous_scale=[(0, "lightblue"), (1, "darkblue")] ) - # Customize traces fig.update_traces( - marker=dict(opacity=1, size=word_counts["Occurrences"]), + marker=dict(opacity=1), textposition="middle center", textfont=dict(color="white", size=12) ) @@ -96,38 +101,83 @@ def get_frequent_words(df, ngram, num_of_words, word_type, file_upload_terms, fi return fig, table + def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None): """ Extract and count words from a specified field in the DataFrame. """ - M = df.get() - + # PATCH: df may be a Shiny reactive Value or a plain DataFrame. + # .get() extracts the DataFrame from the reactive wrapper; + # for plain DataFrames it falls through to the else branch. + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + M = M.copy() + # Remove duplicates M = M.drop_duplicates(subset='SR') - + + # Get text data based on tag if tag in ['AB', 'TI']: - text_data = term_extraction(df, field=tag, stemming=False, verbose=False, - ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) - text_data = text_data.get() + # PATCH: pass plain DataFrame to term_extraction — it does not accept reactives + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # PATCH: missing abstracts/titles sometimes arrive as the literal + # string "nan" rather than a true NaN (e.g. after a CSV/JSON + # round-trip). term_extraction() would otherwise tokenize that + # string as a real word, producing a fake "nan" term that drowns + # out or masks the genuine terms. Drop both true NaN and the + # literal "nan" string (case-insensitive, ignoring whitespace) + # before extracting terms. + df_plain = df_plain[df_plain[tag].notna()] + df_plain = df_plain[ + ~df_plain[tag].astype(str).str.strip().str.lower().eq('nan') + ] + + if df_plain.empty: + return {} + + try: + text_data = term_extraction(df_plain, field=tag, stemming=False, verbose=False, + ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) + except ValueError: + return {} text_data = text_data[f"{tag}_TM"] else: + # PATCH: some tags (e.g. 'WC' - Subject Categories) are not present at all + # in the standardized schema for non-WoS sources (OpenAlex, PubMed). + # Return an empty result instead of raising a raw KeyError. + if tag not in M.columns: + return {} text_data = M[tag] # Handle list columns (DE and ID) if tag in ['DE', 'ID']: - text_data = text_data.dropna().apply(lambda x: ', '.join(eval(x) if isinstance(x, str) else x)) + # PATCH: safe parser replaces eval() to handle malformed strings without crash + def safe_parse(x): + if isinstance(x, list): + return x + try: + return ast.literal_eval(x) + except (ValueError, SyntaxError): + return [] + + text_data = text_data.dropna().apply(lambda x: ', '.join(safe_parse(x))) # Process words if tag in ['DE', 'ID']: words = text_data.dropna().astype(str).str.cat(sep=', ').upper() words = [word.strip() for word in words.split(',') if word and word.strip()] else: - words = [item for sublist in text_data for item in sublist] - - # Apply n-grams if needed - # if ngrams > 1 and tag not in ['DE', 'ID']: - # words = [' '.join(words[i:i+ngrams]) for i in range(len(words)-ngrams+1)] + # PATCH: filter only list elements before iterating to avoid TypeError on None or str, + # and strip any stray literal "nan" tokens (case-insensitive) that may + # have survived inside the extracted term lists. + words = [ + item + for sublist in text_data + if isinstance(sublist, list) + for item in sublist + if str(item).strip().lower() != 'nan' + ] # Replace synonyms if synonyms: @@ -137,9 +187,11 @@ def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None): # Count words word_counts = Counter(words) - # Remove specified terms - if remove_terms and tag in ['DE', 'ID']: - word_counts = {word: count for word, count in word_counts.items() - if word.upper() not in [term.upper() for term in remove_terms]} + # PATCH: apply remove_terms to all tags, not just DE and ID + if remove_terms: + word_counts = { + word: count for word, count in word_counts.items() + if word.upper() not in [term.upper() for term in remove_terms] + } - return word_counts + return word_counts \ No newline at end of file diff --git a/functions/get_historiograph.py b/functions/get_historiograph.py index 089d02387..5ef3df934 100644 --- a/functions/get_historiograph.py +++ b/functions/get_historiograph.py @@ -8,7 +8,7 @@ def hex_to_rgba(hex_color, alpha): if not isinstance(hex_color, str) or not hex_color.startswith("#") or len(hex_color) != 7: - hex_color = "#999999" # fallback grigio neutro + hex_color = "#999999" # neutral grey fallback try: r, g, b = tuple(int(hex_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)) except Exception: @@ -19,41 +19,50 @@ def hex_to_rgba(hex_color, alpha): def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, histlabelsize=3, histsize=4, sep=";"): """ - Genera la historiograph e ritorna anche un file HTML interattivo con Pyvis. + Generates the historiograph and returns an interactive HTML file via Pyvis. Returns: - hist_plot: oggetto con layout e grafo networkx - hist_data: dataframe con metadati, DOI cliccabili, cluster, anni - filename: nome del file HTML interattivo salvato temporaneamente + hist_plot: object with layout and networkx graph + hist_data: dataframe with metadata, clickable DOIs, clusters, years + filename: name of the temporarily saved interactive HTML file """ - # Pre-elaborazione - df = metaTagExtraction(df, "SR") + # Pre-processing + _df = df.get() if hasattr(df, 'get') and not isinstance(df, pd.DataFrame) else df + if 'SR' not in _df.columns or _df['SR'].eq('').all(): + df = metaTagExtraction(df, "SR") + # PATCH: metaTagExtraction may return a plain DataFrame — wrap in reactive + # so histNetwork/cocMatrix can call .get() on it hist_results = histNetwork(df, min_citations=0, sep=sep, network=True) - - # 1. Costruzione iniziale del grafo + # CR data limitation: OpenAlex CR = URLs, PubMed CR = empty + # histNetwork returns None or empty NetMatrix — return gracefully + if hist_results is None or hist_results.get('NetMatrix') is None: + empty_df = pd.DataFrame(columns=["Paper", "Title", "Year", "DOI", "LCS", "GCS", "cluster"]) + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + return None, empty_df, tmp.name.split(os.sep)[-1] + # 1. Initial graph construction hist_plot = histPlot( hist_results, n=histNodes, size=histsize, - remove_isolates=False, # rimozione manuale + remove_isolates=False, label=node_label, verbose=False ) - # 2. Recupera layout e rete iniziale + # 2. Retrieve layout and initial network layout_df = pd.DataFrame(hist_plot["layout"]).copy() full_net = hist_plot["net"] - # 3. Filtra archi per mantenere solo quelli con nodi nel top-N + # 3. Filter edges to keep only those with nodes in top-N selected_nodes = set(full_net.nodes()) edges_filtered = [(u, v) for u, v in full_net.edges() if u in selected_nodes and v in selected_nodes] - # 4. Ricostruisci rete filtrata + # 4. Rebuild filtered network net_nx = nx.DiGraph() net_nx.add_nodes_from(selected_nodes) net_nx.add_edges_from(edges_filtered) - # 5. Opzionale: rimuovi componenti isolate + # 5. Optionally remove isolated components if hist_isolates: connected_components = list(nx.connected_components(net_nx.to_undirected())) valid_components = [c for c in connected_components if len(c) > 1] @@ -62,18 +71,17 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi else: valid_nodes = set(net_nx.nodes) - # 6. Filtra layout + # 6. Filter layout layout_df = layout_df[layout_df.index.isin(valid_nodes)].copy() layout_df["name"] = layout_df.index layout_df.reset_index(drop=True, inplace=True) - # 7. Filtra hist_data in base ai nodi presenti nel grafo + # 7. Filter hist_data based on nodes present in the graph hist_data = hist_results["histData"].copy() hist_data = hist_data[hist_data["Paper"].isin(valid_nodes)].copy() hist_data = hist_data.merge(layout_df, left_on="Paper", right_on="name", how="left") - - # Cluster da colore + # Cluster from color if "color" in hist_data.columns: unique_colors = hist_data['color'].dropna().unique() color_to_cluster = {color: idx + 1 for idx, color in enumerate(unique_colors)} @@ -82,28 +90,25 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi hist_data['color'] = "gray" hist_data['cluster'] = -1 - # Formattazione DOI cliccabile + # Clickable DOI formatting hist_data['DOI'] = hist_data['DOI'].apply( lambda doi: f'{doi}' if pd.notnull(doi) else "" ) - # Rimozione Year mancanti + # Remove missing Year rows hist_data = hist_data[hist_data["Year"].notna()].copy() if hist_data.empty: - raise ValueError("Nessun dato con 'Year' valido per la historiograph.") + raise ValueError("No data with valid 'Year' for the historiograph.") - # Posizionamento temporale orizzontale + # Horizontal temporal positioning hist_data = hist_data.sort_values(['cluster', 'Year']) min_year = hist_data["Year"].min() - year_range = hist_data["Year"].max() - min_year + 1 - # Spazio orizzontale compatto - hist_data["x"] = (hist_data["Year"] - min_year) * 60 # invece di /year_range * 1000 + hist_data["x"] = (hist_data["Year"] - min_year) * 60 - # Spazio verticale più ravvicinato tra cluster + # Vertical spacing between clusters hist_data["y"] = hist_data["cluster"] * 150 + np.random.uniform(-30, 30, size=len(hist_data)) - - # Tooltip e label robusti + # Robust tooltips and labels hist_data["tooltip"] = hist_data.apply( lambda row: ( f"{str(row.get('Title', 'No Title')).replace('<', '<').replace('>', '>')}" @@ -119,15 +124,13 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi axis=1 ) - # Calcola opacità dinamica e dimensione font + # Dynamic opacity and font size min_font_size = 10 max_font_size = 130 - base_font_size = 24 # oppure calcolato in base a metrica font_opacity = np.sqrt((histlabelsize - min_font_size) / (max_font_size - min_font_size)) * 0.8 + 0.3 - font_opacity = max(0.1, min(1, font_opacity)) # clamp tra 0.1 e 1 - + font_opacity = max(0.1, min(1, font_opacity)) - # Calcola dimensione proporzionale a LCS + # Node size proportional to LCS if "LCS" in hist_data.columns and not hist_data["LCS"].isnull().all(): lcs_min = hist_data["LCS"].min() lcs_max = hist_data["LCS"].max() @@ -136,11 +139,11 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi else: hist_data["node_size"] = histsize - # Inizializza grafo Pyvis + # Initialize Pyvis graph net = Network(height="98vh", width="100%", directed=True, notebook=True, cdn_resources="in_line") net.toggle_physics(False) - # Aggiungi nodi + # Add nodes for _, row in hist_data.iterrows(): base_color = row.get("color", "#999999") color_rgba = hex_to_rgba(base_color, 0.8) @@ -148,20 +151,34 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi if node_label == "AU1": label_value = row.get("id", f"{row.get('name', 'unknown')}, {row.get('Year', 'n.d.')}") + elif node_label == "TI": label_value = row.get("Title", "No Title") + elif node_label == "ID": - try: - keywords = eval(row.get("Author_Keywords", "[]")) if isinstance(row.get("Author_Keywords"), str) else row.get("Author_Keywords", []) - label_value = "; ".join(keywords) if keywords else "No keywords" - except: - label_value = "No keywords" + # PATCH: replaced eval() with safe parser — eval() crashes on + # non-Python strings (e.g. semicolon-separated values produced + # after DataFrame merges). Handles list, semicolon, or comma formats. + raw = row.get("Author_Keywords", []) + if isinstance(raw, list): + keywords = raw + elif isinstance(raw, str) and raw.strip(): + keywords = [k.strip() for k in raw.replace(";", ",").split(",") if k.strip()] + else: + keywords = [] + label_value = "; ".join(keywords) if keywords else "No keywords" + elif node_label == "DE": - try: - keywords = eval(row.get("KeywordsPlus", "[]")) if isinstance(row.get("KeywordsPlus"), str) else row.get("KeywordsPlus", []) - label_value = "; ".join(keywords) if keywords else "No keywords" - except: - label_value = "No keywords" + # PATCH: same safe parser for KeywordsPlus field. + raw = row.get("KeywordsPlus", []) + if isinstance(raw, list): + keywords = raw + elif isinstance(raw, str) and raw.strip(): + keywords = [k.strip() for k in raw.replace(";", ",").split(",") if k.strip()] + else: + keywords = [] + label_value = "; ".join(keywords) if keywords else "No keywords" + else: label_value = "unknown" @@ -188,10 +205,10 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi borderWidth=2, borderWidthSelected=3, physics=False, - fixed={"x": True, "y": False} # blocca solo l'asse x + fixed={"x": True, "y": False} ) - # Aggiungi archi con ombreggiatura + # Add edges with shading existing_nodes = set(net.get_nodes()) for source, target in net_nx.edges(): if source in existing_nodes and target in existing_nodes: @@ -199,7 +216,7 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi edge_color = hex_to_rgba(source_color, 0.4) net.add_edge(source, target, color=edge_color, width=1.5) - # Salva HTML temporaneo + # Save temporary HTML tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") html_path = tmp.name with open(html_path, 'w', encoding="utf-8") as f: @@ -207,7 +224,6 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi new_css = " .card {\n border: none;\n }" updated_html = html.replace("", new_css + "\n ") updated_html = updated_html.replace("1px solid lightgray", "none") - f.write(updated_html) - return hist_plot, hist_data, html_path.split(os.sep)[-1] + return hist_plot, hist_data, html_path.split(os.sep)[-1] \ No newline at end of file diff --git a/functions/get_localcitedauthors.py b/functions/get_localcitedauthors.py index e663192bc..3ebde7e86 100644 --- a/functions/get_localcitedauthors.py +++ b/functions/get_localcitedauthors.py @@ -4,113 +4,270 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False): """ Generate a plot and table of the most local cited authors. - - Args: - df: A DataFrame object containing the data. - num_of_cited_authors: The number of top cited authors to display. - fast_search: Boolean indicating whether to use fast search or not. - - Returns: - A Plotly figure object and a DataFrame of the most local cited authors. - """ - # Determine the local citation threshold + """ + + # SAFETY CHECK + if df is None: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # ENSURE SR EXISTS + _df = df.get() if hasattr(df, 'get') and not isinstance(df, pd.DataFrame) else df + if 'SR' not in _df.columns or _df['SR'].eq('').all(): + df = metaTagExtraction(df, "SR") + + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # EMPTY CHECK + if M is None or M.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # REQUIRED COLUMNS + required_cols = ['AU', 'TC'] + + for col in required_cols: + + if col not in M.columns: + + if col == 'AU': + M[col] = [[] for _ in range(len(M))] + else: + M[col] = 0 + + # OPTIONAL COLUMN + if 'LCS' not in M.columns: + M['LCS'] = 0 + + # SAFE NUMERIC CONVERSION + M['TC'] = pd.to_numeric( + M['TC'], + errors='coerce' + ).fillna(0) + + M['LCS'] = pd.to_numeric( + M['LCS'], + errors='coerce' + ).fillna(0) + + # SAFE AUTHOR FORMAT + M['AU'] = M['AU'].apply( + lambda x: x + if isinstance(x, list) + else [i.strip() for i in str(x).split(";")] if pd.notna(x) + else [] + ) + + # LOCAL CITATION THRESHOLD if fast_search: - loccit = df['TC'].quantile(0.75) + loccit = M['TC'].quantile(0.75) else: loccit = 1 - df = metaTagExtraction(df, "SR") - M = df.get() - - # Fill missing values - M['TC'] = M['TC'].fillna(0) + # HIST NETWORK + H = histNetwork( + df, + min_citations=loccit, + sep=";", + network=False + ) + + # SAFETY CHECK + if H is None: + return go.FigureWidget(go.Figure()), pd.DataFrame() - # Create a histogram network - H = histNetwork(df, min_citations=loccit, sep=";", network=False) - LCS = H['histData'] + # PATCH: if all LCS are 0 (common with OpenAlex due to URL-based references), + # return empty result immediately instead of hanging. M = H['M'] - - # Split authors and repeat local citations + if 'LCS' not in M.columns or M['LCS'].sum() == 0: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # ENSURE REQUIRED OUTPUT COLUMNS + required_output_cols = ['AU', 'LCS'] + + for col in required_output_cols: + + if col not in M.columns: + + if col == 'AU': + M[col] = [[] for _ in range(len(M))] + else: + M[col] = 0 + + # SAFE AUTHOR FORMAT AGAIN + M['AU'] = M['AU'].apply( + lambda x: x + if isinstance(x, list) + else [i.strip() for i in str(x).split(";")] if pd.notna(x) + else [] + ) + + # SPLIT AUTHORS AU = M['AU'].explode() + + # REMOVE EMPTY AUTHORS + AU = AU[ + AU.astype(str).str.strip() != "" + ] + + # EMPTY CHECK + if len(AU) == 0: + return go.FigureWidget(go.Figure()), pd.DataFrame() + n = AU.groupby(level=0).size() - - # Create DataFrame for authors and local citations - df_authors = pd.DataFrame({'AU': AU, 'LCS': M['LCS'].repeat(n).values}) - author_counts = df_authors.groupby('AU')['LCS'].sum().reset_index() - author_counts.columns = ["Authors", "N. of Local Citations"] - author_counts = author_counts.sort_values(by="N. of Local Citations", ascending=False) - - # Limit the number of authors to display - if num_of_cited_authors > len(author_counts): - num_of_cited_authors = len(author_counts) - - # Truncate author names to 50 characters - # author_counts["Authors"] = author_counts["Authors"].str[:50] - - # Prepare the complete table and filter rows for display + + # AUTHOR TABLE + df_authors = pd.DataFrame({ + 'AU': AU, + 'LCS': M['LCS'].repeat(n).values + }) + + author_counts = ( + df_authors.groupby('AU')['LCS'] + .sum() + .reset_index() + ) + + author_counts.columns = [ + "Authors", + "N. of Local Citations" + ] + + author_counts = author_counts.sort_values( + by="N. of Local Citations", + ascending=False + ) + + # EMPTY CHECK + if author_counts.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # LIMIT AUTHORS + num_of_cited_authors = min( + int(num_of_cited_authors), + len(author_counts) + ) + + # SAFE STRING HANDLING + author_counts["Authors"] = ( + author_counts["Authors"] + .astype(str) + .str[:50] + ) + table_located_authors = author_counts.copy() - author_counts = author_counts.head(num_of_cited_authors).reset_index(drop=True) - # Enhanced, beautiful, and readable plot for local cited authors + author_counts = ( + author_counts.head(num_of_cited_authors) + .reset_index(drop=True) + ) + frequency = "N. of Local Citations" - # Create the plot (use scatter instead of scatter with orientation='h') + + # PLOT fig = go.Figure() - # Add a thick line from each label to its marker + # SAFE MAX VALUE + max_freq = max( + author_counts[frequency].max(), + 1 + ) + + # SHAPES for i, row in author_counts.iterrows(): + fig.add_shape( type="line", x0=0, x1=row[frequency], y0=i, y1=i, - line=dict(color="#e0e0e0", width=5), + line=dict( + color="#e0e0e0", + width=5 + ), layer="below", ) + # SCATTER fig.add_trace( + go.Scatter( x=author_counts[frequency], + y=list(range(len(author_counts))), + mode="markers+text", + marker=dict( - size=18 + 6 * (author_counts[frequency] / author_counts[frequency].max()), + size=18 + 6 * ( + author_counts[frequency] / max_freq + ), + color=author_counts[frequency], - colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]], - line=dict(width=1, color="#E0E0E0"), + + colorscale=[ + [0, "#B3D1F2"], + [1, "#5567BB"] + ], + + line=dict( + width=1, + color="#E0E0E0" + ), + opacity=0.95, showscale=False, ), + text=author_counts[frequency], - textposition="top center", - textfont=dict(color="#5567BB", size=13), + + textposition="top center", + + textfont=dict( + color="#5567BB", + size=13 + ), + hovertemplate=( "Author: %{customdata}
" "" + frequency + ": %{x}" ), + customdata=author_counts["Authors"], ) ) - # Add horizontal grid lines for each author (lighter) + # GRID LINES for i in range(len(author_counts)): + fig.add_shape( type="line", x0=0, - x1=author_counts[frequency].max(), + x1=max_freq, y0=i, y1=i, - line=dict(color="#E0E0E0", width=2), + line=dict( + color="#E0E0E0", + width=2 + ), layer="below", ) - # Set x-axis ticks to 0, 5, 10, etc. - max_x = author_counts[frequency].max() + # X TICKS tick_step = 5 - x_ticks = list(range(0, int(max_x) + tick_step, tick_step)) - if x_ticks[-1] < max_x: - x_ticks.append(int(max_x)) + x_ticks = list( + range( + 0, + int(max_freq) + tick_step, + tick_step + ) + ) + + if len(x_ticks) == 0: + x_ticks = [0] + + # AXES fig.update_yaxes( tickvals=list(range(len(author_counts))), ticktext=author_counts["Authors"], @@ -119,6 +276,7 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False): title="Authors", tickfont=dict(size=13), ) + fig.update_xaxes( showgrid=True, gridcolor="#F0F0F0", @@ -127,22 +285,51 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False): title=frequency, tickfont=dict(size=13), ) + + # LAYOUT fig.update_layout( plot_bgcolor='white', - font=dict(color="#222222", size=14, family="Segoe UI, Arial"), - margin=dict(l=0, r=0, t=0, b=0), - height=50 + 90 * len(author_counts), + + font=dict( + color="#222222", + size=14, + family="Segoe UI, Arial" + ), + + margin=dict( + l=0, + r=0, + t=0, + b=0 + ), + + height=max( + 400, + 50 + 90 * len(author_counts) + ), + showlegend=False, + hoverlabel=dict( bgcolor="white", font_size=13, font_family="Segoe UI, Arial", bordercolor="#5567BB" ), + coloraxis_showscale=False, ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - - return fig, table_located_authors + + fig._config = fig._config | { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + + return fig, table_located_authors \ No newline at end of file diff --git a/functions/get_localciteddocuments.py b/functions/get_localciteddocuments.py index 1dea8d5a5..d5ca8c4c1 100644 --- a/functions/get_localciteddocuments.py +++ b/functions/get_localciteddocuments.py @@ -1,123 +1,276 @@ +"m8v2qp" from www.services import * def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast_search=False): """ Generate a plot and table of the most local cited documents. - - Args: - df: A DataFrame object containing the data. - num_of_local_cited_docs: The number of top cited documents to display. - fast_search: Boolean indicating whether to use fast search or not. - - Returns: - A Plotly figure object and a DataFrame of the most local cited documents. """ - df = metaTagExtraction(df, "SR") - M = df.get() - # Determine the local citation threshold + # SAFETY CHECK + if df is None: + return None, pd.DataFrame() + + # ENSURE SR EXISTS + _df = df.get() if hasattr(df, 'get') and not isinstance(df, pd.DataFrame) else df + if 'SR' not in _df.columns or _df['SR'].eq('').all(): + df = metaTagExtraction(df, "SR") + + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + # pandas .get() requires a column name as argument, crashes without one + # Fix: isinstance check — if it's a DataFrame use it directly, + # if it's a Shiny reactive object use .get() to unwrap it + M = df if isinstance(df, pd.DataFrame) else df.get() + + # EMPTY CHECK + if M is None or M.empty: + return None, pd.DataFrame() + + # REQUIRED COLUMNS + required_cols = ['SR', 'TC', 'PY'] + + for col in required_cols: + + if col not in M.columns: + + if col in ['TC', 'PY']: + M[col] = 0 + else: + M[col] = "" + + # OPTIONAL COLUMNS + optional_cols = ['DI', 'LCS'] + + for col in optional_cols: + + if col not in M.columns: + + if col == 'LCS': + M[col] = 0 + else: + M[col] = "" + + # SAFE NUMERIC CONVERSION + M['TC'] = pd.to_numeric( + M['TC'], + errors='coerce' + ).fillna(0) + + M['PY'] = pd.to_numeric( + M['PY'], + errors='coerce' + ) + + M['LCS'] = pd.to_numeric( + M['LCS'], + errors='coerce' + ).fillna(0) + + # LOCAL CITATION THRESHOLD if fast_search: loccit = M['TC'].quantile(0.75) else: loccit = 1 - - # Fill missing values - M['TC'] = M['TC'].fillna(0) - # Create a histogram network - H = histNetwork(df, min_citations=loccit, sep=";", network=False) - LCS = H['histData'] + # HIST NETWORK + H = histNetwork( + df, + min_citations=loccit, + sep=";", + network=False + ) + + # SAFETY CHECK + if H is None: + return None, pd.DataFrame() + M = H['M'] - - # Create DataFrame for documents and local citations + + # ENSURE REQUIRED OUTPUT COLUMNS + required_output_cols = ['SR', 'DI', 'PY', 'LCS', 'TC'] + + for col in required_output_cols: + + if col not in M.columns: + + if col in ['LCS', 'TC', 'PY']: + M[col] = 0 + else: + M[col] = "" + + # BUILD DOCUMENT TABLE df_documents = pd.DataFrame({ - 'Document': M['SR'], - 'DOI': M['DI'], + 'Document': M['SR'].astype(str), + 'DOI': M['DI'].astype(str), 'Year': M['PY'], 'Local Citations': M['LCS'], 'Global Citations': M['TC'] }) - - # Calculate additional metrics - df_documents['LC/GC Ratio'] = (df_documents['Local Citations'] / df_documents['Global Citations'] * 100).round(2) - - # Calculate Normalized Local Citations within each publication year - df_documents['Normalized Local Citations'] = df_documents.groupby('Year')['Local Citations'].transform(lambda x: x / x.mean()).round(2) - - # Calculate Normalized Global Citations within each publication year - df_documents['Normalized Global Citations'] = df_documents.groupby('Year')['Global Citations'].transform(lambda x: x / x.mean()).round(2) - - # Sort by local citations - df_documents = df_documents.sort_values(by='Local Citations', ascending=False) - - # Limit the number of documents to display - if num_of_local_cited_docs > len(df_documents): - num_of_local_cited_docs = len(df_documents) - + + # SAFE LC/GC RATIO + df_documents['LC/GC Ratio'] = df_documents.apply( + lambda row: + round( + (row['Local Citations'] / row['Global Citations']) * 100, + 2 + ) + if row['Global Citations'] > 0 + else 0, + axis=1 + ) + + # SAFE NORMALIZATION + df_documents['Normalized Local Citations'] = ( + df_documents.groupby('Year')['Local Citations'] + .transform( + lambda x: + (x / x.mean()).round(2) + if x.mean() not in [0, np.nan] + else 0 + ) + ) + + df_documents['Normalized Global Citations'] = ( + df_documents.groupby('Year')['Global Citations'] + .transform( + lambda x: + (x / x.mean()).round(2) + if x.mean() not in [0, np.nan] + else 0 + ) + ) + + # SORT + df_documents = df_documents.sort_values( + by='Local Citations', + ascending=False + ) + + # EMPTY CHECK + if df_documents.empty: + return None, pd.DataFrame() + + # LIMIT RESULTS + num_of_local_cited_docs = min( + int(num_of_local_cited_docs), + len(df_documents) + ) + table_located_documents = df_documents.copy() - df_documents = df_documents.head(num_of_local_cited_docs) - - # Create the plot (horizontal scatter with lines, similar to author plot) + + df_documents = df_documents.head( + num_of_local_cited_docs + ) + + # PLOT fig = go.Figure() - # Add a thick line from each document label to its marker + # SAFE MAX VALUE + max_local = max( + df_documents["Local Citations"].max(), + 1 + ) + + # SHAPES for idx, (i, row) in enumerate(df_documents.iterrows()): + fig.add_shape( type="line", x0=0, x1=row["Local Citations"], y0=idx, y1=idx, - line=dict(color="#e0e0e0", width=5), + line=dict( + color="#e0e0e0", + width=5 + ), layer="below", ) + # SCATTER fig.add_trace( + go.Scatter( x=df_documents["Local Citations"], + y=list(range(len(df_documents))), + mode="markers+text", + marker=dict( - size=18 + 6 * (df_documents["Local Citations"] / df_documents["Local Citations"].max()), + size=18 + 6 * ( + df_documents["Local Citations"] / max_local + ), + color=df_documents["Local Citations"], - colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]], - line=dict(width=1, color="#E0E0E0"), + + colorscale=[ + [0, "#B3D1F2"], + [1, "#5567BB"] + ], + + line=dict( + width=1, + color="#E0E0E0" + ), + opacity=0.95, showscale=False, ), + text=df_documents["Local Citations"], + textposition="top center", - textfont=dict(color="#5567BB", size=13), + + textfont=dict( + color="#5567BB", + size=13 + ), + hovertemplate=( "Document: %{customdata[0]}
" "Year: %{customdata[1]}
" "Local Citations: %{x}
" "Global Citations: %{customdata[2]}" ), - customdata=df_documents[["Document", "Year", "Global Citations"]].values, + + customdata=df_documents[ + ["Document", "Year", "Global Citations"] + ].values, ) ) - # Add horizontal grid lines for each document (lighter) + # GRID LINES for idx in range(len(df_documents)): + fig.add_shape( type="line", x0=0, - x1=df_documents["Local Citations"].max(), + x1=max_local, y0=idx, y1=idx, - line=dict(color="#E0E0E0", width=2), + line=dict( + color="#E0E0E0", + width=2 + ), layer="below", ) - # Set x-axis ticks to 0, 5, 10, etc. - max_x = df_documents["Local Citations"].max() + # X TICKS tick_step = 5 - x_ticks = list(range(0, int(max_x) + tick_step, tick_step)) - if x_ticks[-1] < max_x: - x_ticks.append(int(max_x)) + x_ticks = list( + range( + 0, + int(max_local) + tick_step, + tick_step + ) + ) + + if len(x_ticks) == 0: + x_ticks = [0] + + # AXES fig.update_yaxes( tickvals=list(range(len(df_documents))), ticktext=df_documents["Document"], @@ -126,6 +279,7 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast title="Document", tickfont=dict(size=13), ) + fig.update_xaxes( showgrid=True, gridcolor="#F0F0F0", @@ -134,22 +288,51 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast title="Local Citations", tickfont=dict(size=13), ) + + # LAYOUT fig.update_layout( plot_bgcolor='white', - font=dict(color="#222222", size=14, family="Segoe UI, Arial"), - margin=dict(l=250, r=40, t=40, b=40), - height=50 + 90 * len(df_documents), + + font=dict( + color="#222222", + size=14, + family="Segoe UI, Arial" + ), + + margin=dict( + l=250, + r=40, + t=40, + b=40 + ), + + height=max( + 400, + 50 + 90 * len(df_documents) + ), + showlegend=False, + hoverlabel=dict( bgcolor="white", font_size=13, font_family="Segoe UI, Arial", bordercolor="#5567BB" ), + coloraxis_showscale=False, ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - - return fig, table_located_documents + + fig._config = fig._config | { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + + return fig, table_located_documents \ No newline at end of file diff --git a/functions/get_localcitedreferences.py b/functions/get_localcitedreferences.py index 68ea11fef..c6ea76414 100644 --- a/functions/get_localcitedreferences.py +++ b/functions/get_localcitedreferences.py @@ -13,7 +13,10 @@ def get_local_cited_refs(df, num_of_cited_refs, field_separator): Returns: A Plotly figure object and a DataFrame of the most local cited sources. """ - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() if isinstance(data["CR"].iloc[0], list): # Check if the first element is a list # Flatten the 'CR' column containing lists @@ -30,6 +33,11 @@ def get_local_cited_refs(df, num_of_cited_refs, field_separator): # Filter out unwanted references source_counts = source_counts[source_counts["Cited References"] != "ANONYMOUS, NO TITLE CAPTURED"] + # PATCH: PubMed CR is empty (eSummary API returns no references) — return + # empty results gracefully instead of crashing on NaN max_x downstream. + if source_counts.empty: + empty_df = pd.DataFrame(columns=["Cited References", "Citations"]) + return go.Figure(), empty_df # Limit the number of sources to display if num_of_cited_refs > len(source_counts): diff --git a/functions/get_localcitedsources.py b/functions/get_localcitedsources.py index 74b261455..9d4bebc9f 100644 --- a/functions/get_localcitedsources.py +++ b/functions/get_localcitedsources.py @@ -4,58 +4,121 @@ def get_local_cited_sources(df, num_of_cited_sources): """ Generate a plot and table of the most local cited sources. - - Args: - input: An object that provides user input methods. - df: A DataFrame object containing the data. - num_of_cited_sources: The number of top cited sources to display. - - Returns: - A Plotly figure object and a DataFrame of the most local cited sources. """ - # Extract metadata tags for cited sources + df = metaTagExtraction(df, "CR_SO") + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df.copy() if isinstance(df, pd.DataFrame) else df.get().copy() + + # Ensure CR_SO exists + if "CR_SO" not in data.columns: + print("CR_SO column missing") + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # Fill missing values safely + data["CR_SO"] = data["CR_SO"].fillna("") + + # Handle both list and string formats safely + if len(data) == 0: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + first_valid = data["CR_SO"].dropna() + + if len(first_valid) == 0: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + first_value = first_valid.iloc[0] + + if isinstance(first_value, list): + + exploded = data["CR_SO"].explode() + + exploded = exploded.dropna() + exploded = exploded.astype(str).str.strip() + exploded = exploded[exploded != ""] - data = df.get() - - if isinstance(data["CR_SO"].iloc[0], list): # Check if the first element is a list - # Flatten the 'CR_SO' column containing lists source_counts = ( - pd.DataFrame(data["CR_SO"].explode()) # Explode lists into rows - .value_counts() # Count occurrences - .reset_index() # Reset index to get a DataFrame + exploded.value_counts() + .reset_index() ) + source_counts.columns = ["Sources", "N. of Local Citations"] + else: - # If not a list, continue with the string method - source_counts = data["CR_SO"].str.split(";").explode().value_counts().reset_index() + + exploded = ( + data["CR_SO"] + .astype(str) + .str.split(";") + .explode() + ) + + exploded = exploded.dropna() + exploded = exploded.astype(str).str.strip() + exploded = exploded[exploded != ""] + + source_counts = ( + exploded.value_counts() + .reset_index() + ) + source_counts.columns = ["Sources", "N. of Local Citations"] - # Limit the number of sources to display - if num_of_cited_sources > len(source_counts): - num_of_cited_sources = len(source_counts) + # Handle empty results + if source_counts.empty: + print("No cited sources found") + return go.FigureWidget(go.Figure()), pd.DataFrame() + + # Remove invalid rows + source_counts["Sources"] = source_counts["Sources"].astype(str).str.strip() + source_counts = source_counts[source_counts["Sources"] != ""] + + # Numeric safety + source_counts["N. of Local Citations"] = pd.to_numeric( + source_counts["N. of Local Citations"], + errors="coerce" + ).fillna(0) + + source_counts = source_counts.sort_values( + by="N. of Local Citations", + ascending=False + ) + + # Limit safely + num_of_cited_sources = min(num_of_cited_sources, len(source_counts)) - # Prepare the complete table and filter rows for display table_located_sources = source_counts.copy() - source_counts = source_counts.head(num_of_cited_sources) - - # Truncate long source names and add line breaks every 50 characters + source_counts = source_counts.head(num_of_cited_sources).reset_index(drop=True) + + # Safe wrapping def wrap_label(label, width=50): - return '
'.join([label[i:i+width] for i in range(0, len(label), width)]) + label = str(label) + return '
'.join( + [label[i:i + width] for i in range(0, len(label), width)] + ) + source_counts["Sources_wrapped"] = source_counts["Sources"].apply(wrap_label) - # Create the plot (use scatter instead of scatter with orientation='h') fig = go.Figure() - # Add the main scatter plot + max_value = max( + source_counts["N. of Local Citations"].max(), + 1 + ) + + # Scatter plot fig.add_trace( go.Scatter( x=source_counts["N. of Local Citations"], y=list(range(len(source_counts))), mode="markers+text", marker=dict( - size=18 + 6 * (source_counts["N. of Local Citations"] / source_counts["N. of Local Citations"].max()), + size=18 + 6 * ( + source_counts["N. of Local Citations"] / max_value + ), color=source_counts["N. of Local Citations"], colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]], line=dict(width=1, color="#E0E0E0"), @@ -63,8 +126,8 @@ def wrap_label(label, width=50): showscale=False, ), text=source_counts["N. of Local Citations"], - textposition="top center", - textfont=dict(color="#5567BB", size=13), + textposition="top center", + textfont=dict(color="#5567BB", size=13), hovertemplate=( "Source: %{customdata}
" "N. of Local Citations: %{x}" @@ -73,8 +136,9 @@ def wrap_label(label, width=50): ) ) - # Add a thick line from label (x=0) to the marker for each source + # Background lines for i, x_val in enumerate(source_counts["N. of Local Citations"]): + fig.add_shape( type="line", x0=0, @@ -85,24 +149,25 @@ def wrap_label(label, width=50): layer="below", ) - # Add horizontal grid lines for each source (lighter) - for i in range(len(source_counts)): fig.add_shape( type="line", x0=0, - x1=source_counts["N. of Local Citations"].max(), + x1=max_value, y0=i, y1=i, line=dict(color="#E0E0E0", width=2), layer="below", ) - # Set x-axis ticks to 0, 50, 100, etc. - max_x = source_counts["N. of Local Citations"].max() - tick_step = 50 - x_ticks = list(range(0, int(max_x) + tick_step, tick_step)) - if x_ticks[-1] < max_x: - x_ticks.append(int(max_x)) + # Tick safety + tick_step = max(1, int(max_value // 5)) + + x_ticks = list( + range(0, int(max_value) + tick_step, tick_step) + ) + + if x_ticks[-1] < max_value: + x_ticks.append(int(max_value)) fig.update_yaxes( tickvals=list(range(len(source_counts))), @@ -112,6 +177,7 @@ def wrap_label(label, width=50): title="Sources", tickfont=dict(size=13), ) + fig.update_xaxes( showgrid=True, gridcolor="#F0F0F0", @@ -120,9 +186,14 @@ def wrap_label(label, width=50): title="N. of Local Citations", tickfont=dict(size=13), ) + fig.update_layout( plot_bgcolor='white', - font=dict(color="#222222", size=14, family="Segoe UI, Arial"), + font=dict( + color="#222222", + size=14, + family="Segoe UI, Arial" + ), margin=dict(l=220, r=40, t=60, b=40), height=50 + 90 * len(source_counts), showlegend=False, @@ -133,8 +204,17 @@ def wrap_label(label, width=50): bordercolor="#5567BB" ), ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - - return fig, table_located_sources + + fig._config = fig._config | { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + + return fig, table_located_sources \ No newline at end of file diff --git a/functions/get_lotkalaw.py b/functions/get_lotkalaw.py index 94545fda2..ddb34b1ed 100644 --- a/functions/get_lotkalaw.py +++ b/functions/get_lotkalaw.py @@ -14,7 +14,10 @@ def get_lotka_law(df): """ # Calculate Lotka's Law - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() # Author Productivity (Lotka's Law) authors = pd.Series([author.strip() for sublist in data['AU'] for author in sublist]) diff --git a/functions/get_maininformations.py b/functions/get_maininformations.py index 97443abdb..302947b0b 100644 --- a/functions/get_maininformations.py +++ b/functions/get_maininformations.py @@ -1,195 +1,350 @@ + from www.services import * def get_main_informations(df, log=False): """ Calculate various filters and metrics for the DataFrame. - - Args: - df: A DataFrame object containing the data. - log: A boolean value indicating whether to save the unique authors, keywords, and references to text files. - - Returns: - A DataFrame with additional columns for filters and metrics. """ - data = df.get() + + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() #### Min and Max Year #### start_time = time.time() - # Calculate the minimum and maximum publication years + + data["PY"] = pd.to_numeric( + data["PY"], + errors="coerce" + ) + data["Min_Year"] = data["PY"].min() data["Max_Year"] = data["PY"].max() - print(f"Min and Max Year calculation time: {time.time() - start_time:.4f} seconds") + + print( + f"Min and Max Year calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Unique Sources #### start_time = time.time() + + data["SO"] = data["SO"].fillna("").astype(str) + data["Unique_SO"] = data["SO"].nunique() - print(f"Unique Sources calculation time: {time.time() - start_time:.4f} seconds") + + print( + f"Unique Sources calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Annual Growth Rate (CAGR) #### start_time = time.time() - # Calculate the number of publications per year - publications_per_year = data["PY"].value_counts().sort_index() - # Calculate the number of years in the range + publications_per_year = ( + data["PY"] + .dropna() + .value_counts() + .sort_index() + ) + ny = data["PY"].max() - data["PY"].min() - # Calculate the Compound Annual Growth Rate (CAGR) - if len(publications_per_year) > 1: - cagr = round(((publications_per_year.iloc[-1] / publications_per_year.iloc[0]) ** (1 / ny) - 1) * 100, 2) + if ( + len(publications_per_year) > 1 + and ny > 0 + and publications_per_year.iloc[0] > 0 + ): + + cagr = round( + ( + ( + publications_per_year.iloc[-1] + / + publications_per_year.iloc[0] + ) ** (1 / ny) - 1 + ) * 100, + 2 + ) + else: - cagr = 0 # If there's only one year of data, CAGR is 0 + cagr = 0 data["CAGR"] = cagr - print(f"CAGR calculation time: {time.time() - start_time:.4f} seconds") + + print( + f"CAGR calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Unique Authors #### start_time = time.time() - # Ensure the 'AU' column exists + if "AU" not in data.columns: - data["AU"] = "" - else: - data["AU"] = data["AU"].fillna("") + data["AU"] = [[]] + + data["AU"] = data["AU"].apply( + lambda x: x if isinstance(x, list) else [] + ) - # Assume that data["AU"] is a list of strings already split - AU_list = data["AU"] + AU_list = data["AU"] - # Remove empty spaces and empty strings - listAU = [author for sublist in AU_list for author in sublist if author] + listAU = [ + author + for sublist in AU_list + for author in sublist + if author + ] - # Remove duplicates listAU = list(set(listAU)) - # Save the list of authors to a text file if log: - with open("authors_list.txt", "w", encoding="utf-8") as file: + + with open( + "authors_list.txt", + "w", + encoding="utf-8" + ) as file: + for authors in listAU: file.write(f"{authors}\n") - # Count the number of unique authors count_AU = len(listAU) - # Save the count of unique authors in the data structure (optional) data["Unique_AU"] = count_AU - print(f"Unique Authors calculation time: {time.time() - start_time:.4f} seconds") + + print( + f"Unique Authors calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Authors of single-authored docs #### start_time = time.time() + def count_authors(entry): - if isinstance(entry, list): # If it's a list, calculate the length directly + + if isinstance(entry, list): return len(entry) - elif isinstance(entry, str): # If it's a string, split by the delimiter ";" + + elif isinstance(entry, str): return len(entry.split(';')) + else: - return 0 # In case of NaN values or other types, return 0 + return 0 - # Apply the function and get the number of authors for each document nAU = data['AU'].apply(count_authors) - # Filter documents with a single author and get the number of unique authors - single_authored_docs = len(data[nAU == 1]['AU'].apply(lambda x: x[0] if isinstance(x, list) else x.split(';')[0]).unique()) - - # Add the count to the dataset - data["Authors_of_single_authored_docs"] = single_authored_docs - print(f"Authors of single-authored docs calculation time: {time.time() - start_time:.4f} seconds") + single_authored_docs = len( + data[nAU == 1]['AU'] + .apply( + lambda x: + x[0] + if isinstance(x, list) and len(x) > 0 + else "" + ) + .unique() + ) + + data["Authors_of_single_authored_docs"] = ( + single_authored_docs + ) + + print( + f"Authors of single-authored docs calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### International Co-Authorship #### start_time = time.time() - # Ensure the 'AU_CO' column exists + if "AU_CO" not in data.columns: - # Extract the required metadata - df = metaTagExtraction(df, "AU_CO") - data = df.get() - - # Calculate "Country_Count" with a vectorized function - data["Country_Count"] = data["AU_CO"].apply(lambda x: len(set(x))) - - # Calculate "International_Co_Authorship" without loop - coll = data[data["Country_Count"] > 1].shape[0] - data["International_Co_Authorship"] = 100 * coll / data.shape[0] - - # Save the list of international co-authors to a text file + + data = metaTagExtraction(df, "AU_CO") + + data["AU_CO"] = data["AU_CO"].apply( + lambda x: x if isinstance(x, list) else [] + ) + + data["Country_Count"] = data["AU_CO"].apply( + lambda x: len(set(x)) + ) + + coll = data[ + data["Country_Count"] > 1 + ].shape[0] + + if data.shape[0] > 0: + data["International_Co_Authorship"] = ( + 100 * coll / data.shape[0] + ) + else: + data["International_Co_Authorship"] = 0 + if log: - with open("international_co_authorship.txt", "w", encoding="utf-8") as file: - file.write("\n".join(data["AU_CO"])) - print(f"International Co-Authorship calculation time: {time.time() - start_time:.4f} seconds") + + with open( + "international_co_authorship.txt", + "w", + encoding="utf-8" + ) as file: + + for row in data["AU_CO"]: + file.write(f"{row}\n") + + print( + f"International Co-Authorship calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Co-Authors per Doc #### start_time = time.time() - data["Co_Authors_per_Doc"] = round(nAU.mean(), 2) - print(f"Co-Authors per Doc calculation time: {time.time() - start_time:.4f} seconds") - #### Author's Keywords (DE) #### - start_time = time.time() - # Ensure the 'DE' column exists - if "DE" not in data.columns: - data["DE"] = "" - else: - data["DE"] = data["DE"].fillna("") + data["Co_Authors_per_Doc"] = round( + nAU.mean(), + 2 + ) + + print( + f"Co-Authors per Doc calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) - # Split the 'DE' column by ';' and flatten the list - DE = pd.Series([item.upper() for sublist in data["DE"] for item in sublist]) + #### Author Keywords (DE) #### + start_time = time.time() - # Remove extra spaces, periods, and commas, and keep only unique values - DE = DE.str.replace(r"\s+|\.|,", " ", regex=True).str.strip().unique() + if "DE" not in data.columns: + data["DE"] = [[]] + + data["DE"] = data["DE"].apply( + lambda x: x if isinstance(x, list) else [] + ) + + DE = pd.Series([ + item.upper() + for sublist in data["DE"] + for item in sublist + ]) + + DE = ( + DE.str.replace( + r"\s+|\.|,", + " ", + regex=True + ) + .str.strip() + .unique() + ) - # Remove any NaN values DE = DE[~pd.isna(DE)] + DE = DE[DE != "NAN"] - - # Save the unique keywords to a text file + if log: - with open("unique_keywords.txt", "w", encoding="utf-8") as file: + + with open( + "unique_keywords.txt", + "w", + encoding="utf-8" + ) as file: + for keyword in DE: file.write(f"{keyword}\n") - # Add the count of unique keywords to the dataset data["Authors_Keywords_DE"] = len(DE) - print(f"Author's Keywords (DE) calculation time: {time.time() - start_time:.4f} seconds") + + print( + f"Author's Keywords (DE) calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### References per Doc #### start_time = time.time() - # Ensure the 'CR' column exists - if "CR" not in data.columns: - data["CR"] = "" - else: - data["CR"] = data["CR"].fillna("") - - # Split the 'CR' and flatten the list - CR = pd.Series([item.upper() for sublist in data["CR"] for item in sublist]) - # Remove extra spaces, periods, and commas, and keep only unique values - CR = CR.str.replace(r"\s+|\|,", " ", regex=True).str.strip().unique() + if "CR" not in data.columns: + data["CR"] = [[]] + + data["CR"] = data["CR"].apply( + lambda x: x if isinstance(x, list) else [] + ) + + CR = pd.Series([ + item.upper() + for sublist in data["CR"] + for item in sublist + ]) + + CR = ( + CR.str.replace( + r"\s+|\|,", + " ", + regex=True + ) + .str.strip() + .unique() + ) - # Remove any NaN values CR = CR[~pd.isna(CR)] - - # Save the unique references to a text file + if log: - with open("unique_references.txt", "w", encoding="utf-8") as file: + + with open( + "unique_references.txt", + "w", + encoding="utf-8" + ) as file: + for reference in CR: file.write(f"{reference}\n") - # Count the number of unique references nCR = len(CR) + if nCR == 1: nCR = 0 - # Add the count of unique references to the dataset data["References_per_Doc"] = nCR - print(f"References per Doc calculation time: {time.time() - start_time:.4f} seconds") + + print( + f"References per Doc calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Document Average Age #### start_time = time.time() - # Calculate the average age of the documents + current_year = pd.Timestamp.now().year - data["Document_Age"] = current_year - data["PY"] - data["Document_Average_Age"] = round(data["Document_Age"].mean(), 2) - print(f"Document Average Age calculation time: {time.time() - start_time:.4f} seconds") + + data["Document_Age"] = ( + current_year - data["PY"] + ) + + data["Document_Average_Age"] = round( + data["Document_Age"].mean(), + 2 + ) + + print( + f"Document Average Age calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) #### Average citations per doc #### start_time = time.time() - data["Average_Citations_per_Doc"] = round(data["TC"].mean(), 2) - print(f"Average citations per doc calculation time: {time.time() - start_time:.4f} seconds") - return data + data["TC"] = pd.to_numeric( + data["TC"], + errors="coerce" + ).fillna(0) + + data["Average_Citations_per_Doc"] = round( + data["TC"].mean(), + 2 + ) + + print( + f"Average citations per doc calculation time: " + f"{time.time() - start_time:.4f} seconds" + ) + + return data \ No newline at end of file diff --git a/functions/get_referencesspectroscopy.py b/functions/get_referencesspectroscopy.py index a2c3e1522..7934ffa86 100644 --- a/functions/get_referencesspectroscopy.py +++ b/functions/get_referencesspectroscopy.py @@ -1,4 +1,6 @@ + from www.services import * +import ast def get_references_spectroscopy(df, start_year, end_year=2005, field_separator_spec=';'): @@ -16,67 +18,256 @@ def get_references_spectroscopy(df, start_year, end_year=2005, field_separator_s rpys_table (pd.DataFrame): Table with RPYS data (years, citations, deviation from median, top references). cr_table (pd.DataFrame): Table of cited references with local citation counts and Google Scholar links. """ - df = df.get() - # Pulizia e preparazione dei dati - c_references = df['CR'].apply(lambda x: [i for i in x]).explode() - c_references = c_references.astype(str).str.replace('DOI;', 'DOI ') + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + df = df if isinstance(df, pd.DataFrame) else df.get() + # PATCH: if CR contains lists (as produced by the ETL pipeline), + # join them into semicolon-separated strings before processing. + def _parse_cr(x): + if isinstance(x, list): + return x + if isinstance(x, str) and x.strip().startswith('['): + try: + return ast.literal_eval(x) + except: + pass + return [i.strip() for i in str(x).split(field_separator_spec) if i.strip()] + + df['CR'] = df['CR'].apply(_parse_cr) + df['CR'] = df['CR'].apply(lambda x: field_separator_spec.join(x) if isinstance(x, list) else (x or "")) + + # ---------------- SAFE CR PATCH ---------------- + c_references = df['CR'].fillna("").astype(str) + + c_references = c_references.apply( + lambda x: [x] if len(x) > 0 else [] + ).explode() + + c_references = c_references.astype(str).str.replace( + 'DOI;', + 'DOI ' + ) print(field_separator_spec) - # Estrazione dei riferimenti - references = c_references.str.split(f"{field_separator_spec}").apply( - lambda x: [ref.strip() for ref in x if len(ref.strip()) > 10] + + # ---------------- SAFE REFERENCES PATCH ---------------- + references = c_references.str.split( + f"{field_separator_spec}" + ).apply( + lambda x: [ + ref.strip() + for ref in x + if isinstance(ref, str) + and len(ref.strip()) > 10 + ] + if isinstance(x, list) + else [] ) - - # Ripetere gli anni per ogni riferimento citato - references_len = references.str.len() - references = references[references_len > 0] - cited_years = references.apply(lambda refs: [int(re.findall(r'\b\d{4},', ref)[0][:-1]) if re.findall(r'\b\d{4},', ref) else 0 for ref in refs]).explode().astype(int).reset_index(drop=True) + + # ---------------- SAFE YEAR EXTRACTION PATCH ---------------- + # PATCH: the original regex (r'\b\d{4},') only matches WoS-style short + # references where the year sits between commas, e.g. + # 'SMITH J, 2019, NATURE, ...'. PubMed's CR field uses other shapes + # this never anticipated: + # - NLM abbreviated citation: year right after the first period, + # e.g. 'Nat Hum Behav. 2019 Oct;3(10):1045-1046. doi: ...'. + # - Reference-list citation: year in parentheses at the very end, + # e.g. '...Trends Biochem. Sci 44, 914-926 (2019).'. + # - Vancouver-style citation: year right after the last comma at + # the very end, e.g. '...Bioengineering. 10(12):1435, 2023.'. + # The last shape is dangerous for the original WoS pattern: a 4-digit + # page number followed by a comma (e.g. '...1435,') gets mistaken for + # the year, when the real year is the one after it. So the two + # end-anchored PubMed shapes are checked first (unambiguous, since the + # year is the last thing in the string), and the permissive mid-string + # WoS pattern is only tried as a last resort. A sanity bound on the + # plausible range catches anything that still slips through. + current_year_bound = pd.Timestamp.now().year + 1 + + def _extract_cited_year(ref): + + def _in_range(y): + return 1500 <= y <= current_year_bound + + m = re.search(r',\s*(\d{4})\.?\s*$', ref) + if m and _in_range(int(m.group(1))): + return int(m.group(1)) + + m = re.search(r'\((\d{4})\)\.?\s*$', ref) + if m and _in_range(int(m.group(1))): + return int(m.group(1)) + + m = re.match(r'^[^.]+\.\s+(\d{4})\b', ref) + if m and _in_range(int(m.group(1))): + return int(m.group(1)) + + m = re.search(r'\b(\d{4}),', ref) + if m and _in_range(int(m.group(1))): + return int(m.group(1)) + + return 0 + + cited_years = references.apply( + lambda refs: [ + _extract_cited_year(ref) + for ref in refs + ] + if isinstance(refs, list) + else [] + ).explode() + + cited_years = pd.to_numeric( + cited_years, + errors='coerce' + ).fillna(0).astype(int).reset_index(drop=True) + references = references.explode().reset_index(drop=True) - # Creazione del DataFrame delle citazioni - ref_df = pd.DataFrame({'Reference': references, 'CitedYear': cited_years}) + # ---------------- CREATE REFERENCE TABLE ---------------- + ref_df = pd.DataFrame({ + 'Reference': references, + 'CitedYear': cited_years + }) - # Filtraggio per intervallo temporale + # ---------------- YEAR FILTER ---------------- current_year = pd.Timestamp.now().year - start_year = start_year if start_year is not None else 1700 - end_year = end_year if end_year is not None else current_year - ref_df = ref_df[(ref_df['CitedYear'] >= start_year) & (ref_df['CitedYear'] <= end_year)] - # Calcolo delle citazioni per anno - cr_table = ref_df.groupby(['CitedYear', 'Reference']).size().reset_index(name='Freq') - rpys_table = cr_table.groupby('CitedYear')['Freq'].sum().reset_index(name='Citations') + start_year = ( + start_year + if start_year is not None + else 1700 + ) - # Aggiunta degli anni mancanti + end_year = ( + end_year + if end_year is not None + else current_year + ) + + ref_df = ref_df[ + (ref_df['CitedYear'] >= start_year) + & + (ref_df['CitedYear'] <= end_year) + ] + + # ---------------- CITATION COUNTS ---------------- + cr_table = ( + ref_df + .groupby(['CitedYear', 'Reference']) + .size() + .reset_index(name='Freq') + ) + + rpys_table = ( + cr_table + .groupby('CitedYear')['Freq'] + .sum() + .reset_index(name='Citations') + ) + + # ---------------- SAFE EMPTY TABLE PATCH ---------------- year_seq = rpys_table['CitedYear'] - missing_years = set(range(year_seq.min(), year_seq.max() + 1)) - set(year_seq) - missing_years_df = pd.DataFrame({'CitedYear': list(missing_years), 'Citations': [0] * len(missing_years)}) - rpys_table = pd.concat([rpys_table, missing_years_df]).sort_values('CitedYear').reset_index(drop=True) - # Calcolo della mediana mobile + if len(year_seq) == 0: + + empty_fig = go.FigureWidget(go.Figure()) + + return ( + empty_fig, + pd.DataFrame(), + pd.DataFrame() + ) + + # ---------------- MISSING YEARS PATCH ---------------- + missing_years = set( + range( + int(year_seq.min()), + int(year_seq.max()) + 1 + ) + ) - set(year_seq) + + missing_years_df = pd.DataFrame({ + 'CitedYear': list(missing_years), + 'Citations': [0] * len(missing_years) + }) + + rpys_table = pd.concat( + [rpys_table, missing_years_df] + ).sort_values( + 'CitedYear' + ).reset_index(drop=True) + + # ---------------- MOVING MEDIAN ---------------- YY = [0] * 4 + rpys_table['Citations'].tolist() - Median = [np.median(YY[i - 4:i + 1]) for i in range(4, len(YY))] - rpys_table['DiffMedian5'] = rpys_table['Citations'] - Median - # Filtraggio per intervallo temporale - rpys_table = rpys_table[(rpys_table['CitedYear'] >= start_year) & (rpys_table['CitedYear'] <= end_year)] + Median = [ + np.median(YY[i - 4:i + 1]) + for i in range(4, len(YY)) + ] - # Imposta diffMedian a 0 se è negativo - rpys_table['DiffMedian'] = rpys_table['DiffMedian5'].apply(lambda x: x if x > 0 else 0) + rpys_table['DiffMedian5'] = ( + rpys_table['Citations'] - Median + ) + + # ---------------- FILTER AGAIN ---------------- + rpys_table = rpys_table[ + (rpys_table['CitedYear'] >= start_year) + & + (rpys_table['CitedYear'] <= end_year) + ] + + # ---------------- SAFE POSITIVE DEVIATION ---------------- + rpys_table['DiffMedian'] = rpys_table[ + 'DiffMedian5' + ].apply( + lambda x: x if x > 0 else 0 + ) - # Identificazione dei top 3 riferimenti per anno - top_references = cr_table.sort_values('Freq', ascending=False).groupby('CitedYear')['Reference'].apply(lambda refs: '\n'.join(refs)).reset_index() - rpys_table = rpys_table.merge(top_references, left_on='CitedYear', right_on='CitedYear', how='left').rename(columns={'Reference': 'TopReferences'}) + # ---------------- TOP REFERENCES ---------------- + top_references = ( + cr_table + .sort_values('Freq', ascending=False) + .groupby('CitedYear')['Reference'] + .apply(lambda refs: '\n'.join(refs)) + .reset_index() + ) + + rpys_table = rpys_table.merge( + top_references, + left_on='CitedYear', + right_on='CitedYear', + how='left' + ).rename( + columns={'Reference': 'TopReferences'} + ) + + # ---------------- CREATE FIGURE ---------------- + fig = make_subplots( + specs=[[{"secondary_y": True}]] + ) - # Creazione del grafico - fig = make_subplots(specs=[[{"secondary_y": True}]]) fig.add_trace( - go.Scatter(x=rpys_table['CitedYear'], y=rpys_table['Citations'], mode='lines', name='Cited References', line=dict(color='#5567BB')), + go.Scatter( + x=rpys_table['CitedYear'], + y=rpys_table['Citations'], + mode='lines', + name='Cited References', + line=dict(color='#5567BB') + ), secondary_y=False, ) + fig.add_trace( - go.Scatter(x=rpys_table['CitedYear'], y=rpys_table['DiffMedian'], mode='lines', name='Deviation from Median', line=dict(color='firebrick')), + go.Scatter( + x=rpys_table['CitedYear'], + y=rpys_table['DiffMedian'], + mode='lines', + name='Deviation from Median', + line=dict(color='firebrick') + ), secondary_y=False, ) @@ -87,18 +278,53 @@ def get_references_spectroscopy(df, start_year, end_year=2005, field_separator_s title_font_size=24, font=dict(color="#444444"), margin=dict(l=0, r=0, t=0, b=0), - legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5), + legend=dict( + orientation='h', + yanchor='bottom', + y=1.02, + xanchor='center', + x=0.5 + ), height=600, ) - fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF') - fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF') + fig.update_xaxes( + showgrid=True, + gridwidth=1, + gridcolor='#EFEFEF' + ) + + fig.update_yaxes( + showgrid=True, + gridwidth=1, + gridcolor='#EFEFEF' + ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - # Tabella CR con link Google Scholar - cr_table['GoogleLink'] = cr_table['Reference'].apply(lambda ref: f'link') - cr_table = cr_table.rename(columns={'CitedYear': 'Year', 'Freq': 'Local Citations'}) + fig._config = fig._config | { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + + # ---------------- GOOGLE SCHOLAR LINKS ---------------- + cr_table['GoogleLink'] = cr_table[ + 'Reference' + ].apply( + lambda ref: + f'link' + ) + + cr_table = cr_table.rename( + columns={ + 'CitedYear': 'Year', + 'Freq': 'Local Citations' + } + ) return fig, rpys_table, cr_table diff --git a/functions/get_relevantaffiliations.py b/functions/get_relevantaffiliations.py index b86e36509..821acd417 100644 --- a/functions/get_relevantaffiliations.py +++ b/functions/get_relevantaffiliations.py @@ -7,21 +7,38 @@ def get_relevant_affiliations(df, num_of_affiliations, disambiguation): Args: df: A DataFrame object containing the data. - num_of_authors: The number of top authors to display. - frequency: Type of frequency calculation. Options: "N. of Documents", "Percentage", "Fractionalized". + num_of_affiliations: The number of top affiliations to display. + disambiguation: "yes" to use AU_UN field, anything else to use C1 field. Returns: - A Plotly figure object and a DataFrame of the most relevant authors. + A Plotly figure object and a DataFrame of the most relevant affiliations. """ - data = df.get() + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + if disambiguation == "yes": + # AU_UN is a derived field — must be extracted before use + df = metaTagExtraction(df, Field="AU_UN") + + data = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # PATCH: safety check + if data is None or data.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() if disambiguation == "yes": - # Extract affiliations from the "AU_UN" field + # PATCH: AU_UN may be missing even after extraction + if "AU_UN" not in data.columns: + return go.FigureWidget(go.Figure()), pd.DataFrame() affiliations = data["AU_UN"].explode().dropna().replace('', None).dropna() else: - # Extract affiliations from the "C1" field + # PATCH: C1 may be missing + if "C1" not in data.columns: + return go.FigureWidget(go.Figure()), pd.DataFrame() affiliations = data["C1"].explode().dropna() + # PATCH: safety check if affiliations is empty after explode + if affiliations.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + # Count occurrences of each affiliation affiliation_counts = affiliations.value_counts().reset_index() affiliation_counts.columns = ["Affiliation", "Articles"] @@ -31,7 +48,10 @@ def get_relevant_affiliations(df, num_of_affiliations, disambiguation): num_of_affiliations = len(affiliation_counts) affiliation_counts = affiliation_counts.head(num_of_affiliations) - # Create the plot + # PATCH: safety check if affiliation_counts is empty + if affiliation_counts.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + fig = go.Figure() fig.add_trace( @@ -87,4 +107,4 @@ def get_relevant_affiliations(df, num_of_affiliations, disambiguation): fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} - return fig, affiliation_counts + return fig, affiliation_counts \ No newline at end of file diff --git a/functions/get_relevantauthors.py b/functions/get_relevantauthors.py index cdf960151..7293f6a57 100644 --- a/functions/get_relevantauthors.py +++ b/functions/get_relevantauthors.py @@ -1,114 +1,214 @@ +"k7d2pw" from www.services import * def get_relevant_authors(df, num_of_authors, frequency="N. of Documents"): """ Generate a plot and table of the most relevant authors with frequency options. - - Args: - df: A DataFrame object containing the data. - num_of_authors: The number of top authors to display. - frequency: Type of frequency calculation. Options: "N. of Documents", "Percentage", "Fractionalized". - - Returns: - A Plotly figure object and a DataFrame of the most relevant authors. """ - data = df.get() - # Drop rows with missing values - data = data.dropna(subset=["AU"]) + # SAFETY CHECK + if df is None: + return None, pd.DataFrame() - # Ensure all values in the "AU" column are lists - data["AU"] = data["AU"].apply(lambda x: x if isinstance(x, list) else []) + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() + + if data is None or data.empty: + return None, pd.DataFrame() + + # ENSURE AU COLUMN EXISTS + if "AU" not in data.columns: + data["AU"] = [[] for _ in range(len(data))] + + # REMOVE NULLS + data = data.copy() + + # ENSURE LIST FORMAT + data["AU"] = data["AU"].apply( + lambda x: x + if isinstance(x, list) + else [i.strip() for i in str(x).split(";")] if pd.notna(x) + else [] + ) + + # FLATTEN AUTHORS + all_authors = [ + author + for sublist in data["AU"] + for author in sublist + if str(author).strip() + ] + + # EMPTY CHECK + if len(all_authors) == 0: + return None, pd.DataFrame() - # Flatten the list of authors and calculate occurrences - all_authors = [author for sublist in data["AU"] for author in sublist] author_counts = pd.Series(all_authors).value_counts() - # Apply the selected frequency calculation - if frequency == "percentage": - author_counts = (author_counts / len(data) * 100).round(1) - elif frequency == "freq_measure": - # Calculate fractional counts - fractional_counts = data["AU"].apply(lambda authors: 1 / len(authors) if authors else 0) + # FREQUENCY MODES + if frequency.lower() == "percentage": + + author_counts = ( + author_counts / max(len(data), 1) * 100 + ).round(1) + + elif frequency.lower() in ["fractionalized", "freq_measure"]: + + fractional_counts = data["AU"].apply( + lambda authors: 1 / len(authors) + if len(authors) > 0 + else 0 + ) + fractional_authors = [ - (author, fractional_counts[i]) + (author, fractional_counts.iloc[i]) for i, authors in enumerate(data["AU"]) for author in authors ] - fractional_df = pd.DataFrame(fractional_authors, columns=["Author", "Weight"]) - author_counts = fractional_df.groupby("Author")["Weight"].sum().sort_values(ascending=False).round(1) - + + fractional_df = pd.DataFrame( + fractional_authors, + columns=["Author", "Weight"] + ) + + if not fractional_df.empty: + + author_counts = ( + fractional_df.groupby("Author")["Weight"] + .sum() + .sort_values(ascending=False) + .round(1) + ) + + # CONVERT TO DATAFRAME author_counts = author_counts.reset_index() + author_counts.columns = ["Authors", frequency] - - # Truncate author names to 50 characters - author_counts["Authors"] = author_counts["Authors"].str[:50] - table_relevant_authors = author_counts - - # Limit the number of authors to display - if num_of_authors > len(author_counts): - num_of_authors = len(author_counts) + + # SAFE STRING HANDLING + author_counts["Authors"] = ( + author_counts["Authors"] + .astype(str) + .str[:50] + ) + + table_relevant_authors = author_counts.copy() + + # LIMIT AUTHORS + num_of_authors = min( + int(num_of_authors), + len(author_counts) + ) + author_counts = author_counts.head(num_of_authors) - # Create the plot (use scatter instead of scatter with orientation='h') + # EMPTY CHECK + if author_counts.empty: + return None, table_relevant_authors + + # PLOT fig = go.Figure() - # Add a thick line from each label to its marker + # SAFE MAX VALUE + max_freq = max(author_counts[frequency].max(), 1) + + # LINES for i, row in author_counts.iterrows(): + fig.add_shape( type="line", x0=0, x1=row[frequency], y0=i, y1=i, - line=dict(color="#e0e0e0", width=5), + line=dict( + color="#e0e0e0", + width=5 + ), layer="below", ) + # SCATTER fig.add_trace( + go.Scatter( x=author_counts[frequency], y=list(range(len(author_counts))), mode="markers+text", + marker=dict( - size=18 + 6 * (author_counts[frequency] / author_counts[frequency].max()), + size=18 + 6 * ( + author_counts[frequency] / max_freq + ), + color=author_counts[frequency], - colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]], - line=dict(width=1, color="#E0E0E0"), + + colorscale=[ + [0, "#B3D1F2"], + [1, "#5567BB"] + ], + + line=dict( + width=1, + color="#E0E0E0" + ), + opacity=0.95, showscale=False, ), + text=author_counts[frequency], - textposition="top center", - textfont=dict(color="#5567BB", size=13), + + textposition="top center", + + textfont=dict( + color="#5567BB", + size=13 + ), + hovertemplate=( "Author: %{customdata}
" "" + frequency + ": %{x}" ), + customdata=author_counts["Authors"], ) ) - # Add horizontal grid lines for each author (lighter) + # GRID LINES for i in range(len(author_counts)): + fig.add_shape( type="line", x0=0, - x1=author_counts[frequency].max(), + x1=max_freq, y0=i, y1=i, - line=dict(color="#E0E0E0", width=2), + line=dict( + color="#E0E0E0", + width=2 + ), layer="below", ) - # Set x-axis ticks to 0, 5, 10, etc. - max_x = author_counts[frequency].max() + # X TICKS tick_step = 5 - x_ticks = list(range(0, int(max_x) + tick_step, tick_step)) - if x_ticks[-1] < max_x: - x_ticks.append(int(max_x)) + x_ticks = list( + range( + 0, + int(max_freq) + tick_step, + tick_step + ) + ) + + if len(x_ticks) == 0: + x_ticks = [0] + + # AXES fig.update_yaxes( tickvals=list(range(len(author_counts))), ticktext=author_counts["Authors"], @@ -117,6 +217,7 @@ def get_relevant_authors(df, num_of_authors, frequency="N. of Documents"): title="Authors", tickfont=dict(size=13), ) + fig.update_xaxes( showgrid=True, gridcolor="#F0F0F0", @@ -125,22 +226,48 @@ def get_relevant_authors(df, num_of_authors, frequency="N. of Documents"): title=frequency, tickfont=dict(size=13), ) + + # LAYOUT fig.update_layout( plot_bgcolor='white', - font=dict(color="#222222", size=14, family="Segoe UI, Arial"), - margin=dict(l=0, r=0, t=0, b=0), - height=50 + 90 * len(author_counts), + + font=dict( + color="#222222", + size=14, + family="Segoe UI, Arial" + ), + + margin=dict( + l=0, + r=0, + t=0, + b=0 + ), + + height=max(400, 50 + 90 * len(author_counts)), + showlegend=False, + hoverlabel=dict( bgcolor="white", font_size=13, font_family="Segoe UI, Arial", bordercolor="#5567BB" ), + coloraxis_showscale=False, ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - return fig, table_relevant_authors + fig._config = fig._config | { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + + return fig, table_relevant_authors \ No newline at end of file diff --git a/functions/get_relevantsources.py b/functions/get_relevantsources.py index dccd8d3e5..dc5db6954 100644 --- a/functions/get_relevantsources.py +++ b/functions/get_relevantsources.py @@ -12,7 +12,10 @@ def get_relevant_sources(df, num_of_sources): Returns: A Plotly figure object and a DataFrame of the most relevant sources. """ - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() # Drop rows with missing values data = data.dropna(subset=["SO"]) diff --git a/functions/get_sourceslocalimpact.py b/functions/get_sourceslocalimpact.py index 731c97194..387ce5612 100644 --- a/functions/get_sourceslocalimpact.py +++ b/functions/get_sourceslocalimpact.py @@ -13,13 +13,17 @@ def get_sources_local_impact(df, num_of_sources_local_impact, source_local_impac Returns: A Plotly figure object and a DataFrame of the most impactful sources. """ - df = df.get() + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() + today = pd.Timestamp.now().year # Ensure 'TC' and 'PY' are numeric - df['TC'] = pd.to_numeric(df['TC'], errors='coerce') - df['PY'] = pd.to_numeric(df['PY'], errors='coerce') - df = df.dropna(subset=['TC', 'PY']) + data['TC'] = pd.to_numeric(data['TC'], errors='coerce') + data['PY'] = pd.to_numeric(data['PY'], errors='coerce') + data = data.dropna(subset=['TC', 'PY']) # Define h-index and g-index calculation functions def h_calc(x): @@ -44,15 +48,15 @@ def g_calc(x): return g # Calculate indices - df['h_index'] = df.groupby('SO')['TC'].transform(h_calc) - df['g_index'] = df.groupby('SO')['TC'].transform(g_calc) - df['PY_start'] = df.groupby('SO')['PY'].transform('min') - df['m_index'] = df['h_index'] / (today - df['PY_start'] + 1) - df['NP'] = df.groupby('SO')['SO'].transform('size') - df['TC_sum'] = df.groupby('SO')['TC'].transform(lambda x: x.sum()) + data['h_index'] = data.groupby('SO')['TC'].transform(h_calc) + data['g_index'] = data.groupby('SO')['TC'].transform(g_calc) + data['PY_start'] = data.groupby('SO')['PY'].transform('min') + data['m_index'] = data['h_index'] / (today - data['PY_start'] + 1) + data['NP'] = data.groupby('SO')['SO'].transform('size') + data['TC_sum'] = data.groupby('SO')['TC'].transform(lambda x: x.sum()) # Select the top sources - top_sources = df.groupby('SO').first().reset_index() + top_sources = data.groupby('SO').first().reset_index() #top_sources = top_sources.nlargest(num_of_sources_local_impact, impact_column) # Prepare the final table diff --git a/functions/get_sourcesproduction.py b/functions/get_sourcesproduction.py index 0795668d7..ee613d600 100644 --- a/functions/get_sourcesproduction.py +++ b/functions/get_sourcesproduction.py @@ -13,7 +13,10 @@ def get_sources_production(df, num_of_sources_production, occurences): Returns: A Plotly figure object representing the sources' production over time. """ - data = df.get() + # PATCH: original code called df.get() without arguments, which crashes on a + # plain pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first. + data = df if isinstance(df, pd.DataFrame) else df.get() # Calculate the number of publications per year for each source WSO = cocMatrix(df, Field="SO") @@ -25,14 +28,34 @@ def get_sources_production(df, num_of_sources_production, occurences): data["PY"] = data["PY"].astype(str) WPY = cocMatrix(df, Field="PY") + # PATCH: PubMed PY may contain full date strings (e.g. "2026 Jun 6") + # instead of plain year integers — astype(int) crashes on these. + # Extract the first 4-digit year with pd.to_numeric after extracting digits. + data["PY"] = pd.to_numeric(data["PY"].astype(str).str.extract(r'(\d{4})')[0], errors='coerce') + data = data.dropna(subset=["PY"]) data["PY"] = data["PY"].astype(int) - missing_years = set(range(data["PY"].min(), data["PY"].max() + 1)) - set(WPY.columns.astype(int)) + # PATCH: WPY columns may contain full date strings from PubMed PY field — + # extract 4-digit year from column names before casting to int. + wpy_years = pd.to_numeric( + pd.Index(WPY.columns).astype(str).str.extract(r'(\d{4})')[0], + errors='coerce' + ).dropna().astype(int) + missing_years = set(range(data["PY"].min(), data["PY"].max() + 1)) - set(wpy_years) if missing_years: for year in missing_years: WPY[str(year)] = 0 - WPY = WPY[sorted(WPY.columns.astype(int).astype(str))] + # PATCH: WPY columns may still contain full date strings — extract 4-digit + # year from each column name before sorting and reindexing. + valid_cols = { + col: str(int(pd.to_numeric(str(col).strip()[:4], errors='coerce'))) + for col in WPY.columns + if pd.to_numeric(str(col).strip()[:4], errors='coerce') is not None + and not pd.isna(pd.to_numeric(str(col).strip()[:4], errors='coerce')) + } + WPY = WPY.rename(columns=valid_cols) + WPY = WPY[sorted(WPY.columns, key=lambda x: int(x) if x.isdigit() else 0)] PYSO = WPY.T.dot(WSO) ind = PYSO.sum(axis=0) diff --git a/functions/get_table.py b/functions/get_table.py index 75b9c91d8..8e8aec268 100644 --- a/functions/get_table.py +++ b/functions/get_table.py @@ -2,16 +2,14 @@ from functions.get_status import * -# Function to create a Plotly table visualization for metadata completeness def create_plotly_table(sorted_columns, dpi=300): - # Extract column values for the table + """Create a Plotly table visualization for metadata completeness.""" metadata = [col for col, _, _, _, _ in sorted_columns] descriptions = [desc for _, desc, _, _, _ in sorted_columns] counts = [cnt for _, _, cnt, _, _ in sorted_columns] percentages = [f"{pct:.2f}%" for _, _, _, pct, _ in sorted_columns] statuses = [status for _, _, _, _, status in sorted_columns] - # Define colors for each status status_colors = { "Excellent": "lightgreen", "Good": "yellow", @@ -20,7 +18,6 @@ def create_plotly_table(sorted_columns, dpi=300): } color_cells = [status_colors.get(s, "white") for s in statuses] - # Create the Plotly table figure fig = go.Figure(data=[go.Table( header=dict( values=["Metadata", "Description", "Missing Counts", "Missing %", "Status"], @@ -39,7 +36,6 @@ def create_plotly_table(sorted_columns, dpi=300): ) )]) - # Set dynamic height: 30px per row plus header (120px) table_height = 120 + len(metadata) * 30 fig.update_layout( @@ -49,15 +45,15 @@ def create_plotly_table(sorted_columns, dpi=300): title_yanchor='top', width=1400, height=table_height, - margin=dict(l=10, r=10, t=70, b=10), # Reduced margins - paper_bgcolor="white", # White background without border + margin=dict(l=10, r=10, t=70, b=10), + paper_bgcolor="white", ) fig.add_layout_image( dict( - source="https://raw.githubusercontent.com/massimoaria/bibliometrix/master/logo.png", + source="https://raw.githubusercontent.com/massimoaria/bibliometrix/master/logo.png", xref="paper", yref="paper", - x=1, y=1, # Top right corner + x=1, y=1, sizex=0.07, sizey=0.07, xanchor="right", yanchor="bottom" ) @@ -65,7 +61,7 @@ def create_plotly_table(sorted_columns, dpi=300): return fig -# Function to generate and display the completeness table for bibliographic metadata + def get_table(database, df, dpi=300, filter=False, modal=True): """ Display a table showing the completeness of bibliographic metadata. @@ -74,20 +70,20 @@ def get_table(database, df, dpi=300, filter=False, modal=True): database: The name of the database. df: A DataFrame object containing the data. filter: A boolean indicating whether to filter the data. + modal: Whether to show a modal dialog with the completeness table. Returns: - A DataTable object if data is available, otherwise a message indicating no data. + A tuple of (DataTable HTML, table HTML string, Plotly figure) if data + is available, otherwise a message indicating no data. """ - # Retrieve the data from the DataFrame + data = df.get() table_html = "" fig = None if not filter: - # Get the total number of rows in the dataset total_rows = len(data) - # Dictionary mapping column codes to their descriptions column_descriptions = { "AB": "Abstract", "AU": "Authors", @@ -124,32 +120,38 @@ def get_table(database, df, dpi=300, filter=False, modal=True): "PMID": "PubMed ID", } - # Count missing values (NaN), empty strings, and empty lists in each column - missing_counts = data.isna().sum() + (data == "").sum() + (data == " ").sum() + ( - data.map(lambda x: x == [])).sum() + # PATCH 3: data.map(lambda x: x == []) applied the lambda cell-by-cell + # across the entire DataFrame — on cells containing non-comparable types + # (int, float) some pandas versions raise TypeError or silently return + # False instead of True. + # → replaced with a per-column apply that safely checks for empty lists + # using isinstance before comparing, avoiding type errors. + def count_empty_lists(col): + return col.apply(lambda x: isinstance(x, list) and len(x) == 0).sum() + + missing_counts = ( + data.isna().sum() + + (data == "").sum() + + (data == " ").sum() + + data.apply(count_empty_lists) + ) - # Calculate the percentage of missing values for each column missing_percentage = (missing_counts / total_rows) * 100 - - # Get the status for each column based on missing percentage missing_status = get_status(missing_percentage) - # Sort columns by the number of missing values sorted_columns = sorted( zip( - missing_counts.index, # Column names - [column_descriptions.get(col, col) for col in missing_counts.index], # Descriptions - missing_counts, # Missing values count - missing_percentage, # Missing percentage - missing_status # Status + missing_counts.index, + [column_descriptions.get(col, col) for col in missing_counts.index], + missing_counts, + missing_percentage, + missing_status ), - key=lambda x: x[2] # Sort by missing count + key=lambda x: x[2] ) - # Create and return the Plotly table fig = create_plotly_table(sorted_columns, dpi) - # HTML table header table_header = """ @@ -164,11 +166,9 @@ def get_table(database, df, dpi=300, filter=False, modal=True): """ - # HTML table rows for each column table_rows = "" for col, description, count, percent, status_z in sorted_columns: status_style = get_status_color(status_z) - table_rows += f""" @@ -179,13 +179,9 @@ def get_table(database, df, dpi=300, filter=False, modal=True): """ - # HTML table footer table_footer = "
{col}
" - - # Combine header, rows, and footer to form the complete HTML table table_html = table_header + table_rows + table_footer - # If modal is True, create and show a modal dialog with the table if modal: m = ui.modal( ui.HTML(f"""{table_html}"""), @@ -202,10 +198,12 @@ def get_table(database, df, dpi=300, filter=False, modal=True): ui.modal_show(m) if data is not None: - # Return a DataTable object with the data and the HTML/Plotly tables + # PATCH 2: the original code called df.get() a second time inside the + # return statement — same issue as PATCH 1. + # → replaced with `data` which is already the copied DataFrame. return ui.HTML( DT( - df.get(), + data, maxBytes="10MB", classes="display compact stripe", style="text-transform: uppercase; font-size: small; table-layout: auto;", @@ -215,19 +213,17 @@ def get_table(database, df, dpi=300, filter=False, modal=True): ], columnDefs=[ { - "targets": "_all", # Apply to all columns + "targets": "_all", "createdCell": JavascriptFunction(""" function (td, cellData, rowData, row, col) { - // If the cell data is a string and longer than 200 characters, truncate and add tooltip if (typeof cellData === 'string' && cellData.length > 200) { const truncatedText = cellData.substring(0, 200) + '...'; - $(td).text(truncatedText); // Set truncated text - $(td).attr('title', cellData); // Add full text as tooltip + $(td).text(truncatedText); + $(td).attr('title', cellData); $(td).css('overflow', 'hidden'); - $(td).css('text-overflow', 'ellipsis'); // Add ellipsis - $(td).css('vertical-align', 'top'); // Align text to top + $(td).css('text-overflow', 'ellipsis'); + $(td).css('vertical-align', 'top'); } else { - // For all other cells, align text to top $(td).css('vertical-align', 'top'); } } @@ -238,5 +234,4 @@ def get_table(database, df, dpi=300, filter=False, modal=True): ) ), table_html, fig else: - # Show a message if no data is available - return ui.h5("No data available. Please upload a file."), "", None + return ui.h5("No data available. Please upload a file."), "", None \ No newline at end of file diff --git a/functions/get_thematicevolution.py b/functions/get_thematicevolution.py index 65bb0077b..50209b53c 100644 --- a/functions/get_thematicevolution.py +++ b/functions/get_thematicevolution.py @@ -42,7 +42,42 @@ def get_thematic_evolution(df, field="ID", years=None, n=250, weight_index="inc_ cluster=cluster ) - ############################### PLOT THEMATIC EVOLUTION ############################### + # PATCH: thematic_evolution returns None when PY is all NaN (e.g. PubMed) + # or when no topics are found. Generate a valid but empty network graph + # instead of returning None for html_path (which the UI shows as "Not Found"). + if results is None: + print(f"No valid data to build thematic evolution for field '{field}'.") + empty_net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line", directed=True) + empty_net.toggle_physics(False) + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + html_path = tmp.name + with open(html_path, 'w', encoding="utf-8") as f: + f.write(empty_net.generate_html()) + return html_path.split(os.sep)[-1], pd.DataFrame(), [] + + # PATCH: thematic_evolution can also return {"check": False} (no 'Nodes' key) + # when one or more periods have zero topic clusters — typically because the + # chosen field is empty for the data source (e.g. "ID" / Keywords Plus is + # WoS-only and is always empty for OpenAlex/PubMed records). Without this + # check, results['Nodes'] below would raise KeyError: 'Nodes'. + # → instead of returning None (which the UI shows as "Not Found"), generate + # a valid but empty network graph so the Map tab renders a blank canvas. + if not results.get("check", True) or "Nodes" not in results: + print( + f"No topics could be extracted for field '{field}' in one or more time " + f"periods. This is usually because the selected field is empty for your " + f"data source (e.g. Keywords Plus 'ID' is exclusive to Web of Science and " + f"is always empty for OpenAlex/PubMed data). Try a different Text Source " + f"(e.g. 'TI', 'AB', or 'DE')." + ) + empty_net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line", directed=True) + empty_net.toggle_physics(False) + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + html_path = tmp.name + with open(html_path, 'w', encoding="utf-8") as f: + f.write(empty_net.generate_html()) + return html_path.split(os.sep)[-1], pd.DataFrame(), [] + nodes = results['Nodes'] edges = results['Edges'] label_size = int(size * 20) @@ -63,11 +98,8 @@ def get_thematic_evolution(df, field="ID", years=None, n=250, weight_index="inc_ new_css = " .card {\n border: none;\n }" updated_html = html.replace("", new_css + "\n ") updated_html = updated_html.replace("1px solid lightgray", "none") - f.write(updated_html) - ########## Thematic Evalution Table ########## - # Prepara la tabella di evoluzione tematica thematic_table = results["Data"].copy() thematic_table = thematic_table.rename(columns={ "Cluster_Label.x": "From", @@ -87,47 +119,66 @@ def get_thematic_evolution(df, field="ID", years=None, n=250, weight_index="inc_ def thematic_evolution(M, field="ID", years=None, n=250, min_freq=2, size=0.5, ngrams=1, stemming=False, n_labels=1, repel=True, remove_terms=None, synonyms=None, cluster="walktrap"): if years is None: raise ValueError("You must provide a list of years for thematic evolution analysis.") - + list_df = timeslice(M, breaks=years) + # PATCH: timeslice returns empty dict when PY is all NaN (e.g. PubMed) + if not list_df: + return None net, res = [], [] Y = [] for interval_label, Mk in list_df.items(): Y.append(f"{min(Mk['PY'])}-{max(Mk['PY'])}") - Mk = reactive.Value(Mk) + + resk_tuple = thematic_map( Mk, field=field, n=n, minfreq=min_freq, ngrams=ngrams, stemming=stemming, size=size, n_labels=n_labels, - repel=repel, remove_terms=remove_terms, synonyms=synonyms, cluster=cluster, subgraphs=False + repel=repel, remove_terms=remove_terms, synonyms=synonyms, + cluster=cluster, subgraphs=False ) - # thematic_map returns a tuple, so convert to dict for compatibility + + # PATCH 1: thematic_map returns exactly 5 values (indices 0-4). + # The original code tried to access resk_tuple[5] which is always out + # of range — nclust, net, subgraphs and params were always None. + # → read only the 5 available values and derive nclust from words. + thematic_fig, net_html, words, clusters, document_to_clusters = resk_tuple + resk = { - 'map': resk_tuple[0], - 'net_html': resk_tuple[1], - 'words': resk_tuple[2], - 'clusters': resk_tuple[3], - 'documentToClusters': resk_tuple[4], - 'nclust': resk_tuple[5]['nclust'] if len(resk_tuple) > 5 and isinstance(resk_tuple[5], dict) and 'nclust' in resk_tuple[5] else None, - 'net': resk_tuple[5]['net'] if len(resk_tuple) > 5 and isinstance(resk_tuple[5], dict) and 'net' in resk_tuple[5] else None, - 'subgraphs': resk_tuple[5]['subgraphs'] if len(resk_tuple) > 5 and isinstance(resk_tuple[5], dict) and 'subgraphs' in resk_tuple[5] else None, - 'params': resk_tuple[5]['params'] if len(resk_tuple) > 5 and isinstance(resk_tuple[5], dict) and 'params' in resk_tuple[5] else None, + 'map': thematic_fig, + 'net_html': net_html, + 'words': words, + 'clusters': clusters, + 'documentToClusters': document_to_clusters, + # PATCH 2: nclust was always None because of the out-of-range access + # above, so the guard `if res1['nclust'] == 0` never triggered. + # → derive nclust directly from the clusters DataFrame row count. + 'nclust': len(clusters) if clusters is not None and not clusters.empty else 0, + 'net': None, + 'subgraphs': None, + 'params': None, } - # If the tuple is actually the results dict, just use it directly - if isinstance(resk_tuple, dict): - resk = resk_tuple - # Only filter 'params' if it exists and is a DataFrame + if 'params' in resk and isinstance(resk['params'], pd.DataFrame): resk['params'] = resk['params'][resk['params']['params'] != "minfreq"] + res.append(resk) net.append(resk['net_html']) - + K = len(list_df) if K < 2: print("Error") return None - + + # PATCH 3: inc_matrix was being concatenated inside the loop, so INC, + # edges, and nodes were recreated at every iteration — only the last + # iteration's values survived. Additionally, if the loop body never ran + # (K < 2 already handled above, but defensive), edges/nodes would be + # undefined outside the loop. + # → moved pd.concat and all downstream processing outside the loop so + # that all periods are accumulated before building the final result. inc_matrix = [] for k in range(1, K): res1 = res[k - 1] @@ -137,13 +188,10 @@ def thematic_evolution(M, field="ID", years=None, n=250, min_freq=2, size=0.5, n print(f"\nNo topics in the period {k - 1} with this set of input parameters\n\n") return {"check": False} - # Ensure we do not append the period label multiple times def append_period(label, period): label = str(label) - # Remove any trailing '--' if already present if label.endswith(f"--{period}"): return label - # Remove any repeated period labels (e.g., --2016-2017--2016-2017) parts = label.split('--') if len(parts) > 1 and parts[-1] == period: label = '--'.join(parts[:-1]) @@ -155,7 +203,6 @@ def append_period(label, period): res2['words']['Cluster_Label'] = res2['words']['Cluster_Label'].apply(lambda x: append_period(x, Y[k])) res2['clusters']['label'] = res2['clusters']['Cluster'].apply(lambda x: append_period(x, Y[k])) - # Step 1: Add len and tot columns to clusters cluster1 = res1['words'].groupby('Cluster_Label').apply(lambda x: x.assign( len=len(x), tot=x['Occurrences'].sum() )).reset_index(drop=True) @@ -163,138 +210,110 @@ def append_period(label, period): len=len(x), tot=x['Occurrences'].sum() )).reset_index(drop=True) - # Step 2: Inner join on Words A = pd.merge(cluster1, cluster2, on="Words", suffixes=(".x", ".y")) - # Step 3: For each pair of clusters, compute min, Occ, tot A['min'] = A[['Occurrences.x', 'Occurrences.y']].min(axis=1) A['Occ'] = A['Occurrences.x'] A['tot'] = A[['tot.x', 'tot.y']].min(axis=1) - # Step 4: Group and summarize as in R B = ( A.groupby(['Cluster_Label.x', 'Cluster_Label.y']) .apply(lambda row: pd.Series({ - "CL1": row['Cluster.x'].iloc[0], - "CL2": row['Cluster.y'].iloc[0], - "Words": ";".join(row['Words']), - "sum": row['min'].sum(), - "Inc_Weighted": row['min'].sum() / row['tot'].min() if row['tot'].min() > 0 else 0, - "Inc_index": len(row['Words']) / min(row['len.x'].iloc[0], row['len.y'].iloc[0]) if min(row['len.x'].iloc[0], row['len.y'].iloc[0]) > 0 else 0, - "Occ": row['Occ'].iloc[0], - "Tot": row['tot'].iloc[0], - "Stability": len(row['Words']) / (row['len.x'].iloc[0] + row['len.y'].iloc[0] - len(row['Words'])) if (row['len.x'].iloc[0] + row['len.y'].iloc[0] - len(row['Words'])) > 0 else 0 + "CL1": row['Cluster.x'].iloc[0], + "CL2": row['Cluster.y'].iloc[0], + "Words": ";".join(row['Words']), + "sum": row['min'].sum(), + "Inc_Weighted": row['min'].sum() / row['tot'].min() if row['tot'].min() > 0 else 0, + "Inc_index": len(row['Words']) / min(row['len.x'].iloc[0], row['len.y'].iloc[0]) if min(row['len.x'].iloc[0], row['len.y'].iloc[0]) > 0 else 0, + "Occ": row['Occ'].iloc[0], + "Tot": row['tot'].iloc[0], + "Stability": len(row['Words']) / (row['len.x'].iloc[0] + row['len.y'].iloc[0] - len(row['Words'])) if (row['len.x'].iloc[0] + row['len.y'].iloc[0] - len(row['Words'])) > 0 else 0 })) .reset_index() ) inc_matrix.append(B) - if not inc_matrix: - print("Error: No inclusion matrix was created.") - return None - - # Concatenate all inclusion matrices - INC = pd.concat(inc_matrix, ignore_index=True) - - # Edges dataframe - edges = INC[['Cluster_Label.x', 'Cluster_Label.y', 'Inc_index', 'Inc_Weighted', 'Stability']].copy() - - # Nodes dataframe - unique_labels = pd.unique(edges[['Cluster_Label.x', 'Cluster_Label.y']].values.ravel()) - nodes = pd.DataFrame({'name': unique_labels}) - nodes['group'] = nodes['name'] - - # Assign numeric IDs to nodes - nodes = nodes.reset_index(drop=True) - nodes['id'] = nodes.index - - # Map cluster labels to node IDs for 'from' and 'to' - label_to_id = dict(zip(nodes['name'], nodes['id'])) - edges['from'] = edges['Cluster_Label.x'].map(label_to_id) - edges['to'] = edges['Cluster_Label.y'].map(label_to_id) - - # Rename columns as in R - edges = edges.rename(columns={ - "Cluster_Label.x": "from_label", - "Cluster_Label.y": "to_label", - "Inc_index": "Inclusion", - "Inc_Weighted": "Inc_Weighted", - "Stability": "Stability" - }) - edges['from'] = edges['from'].astype(int) - edges['to'] = edges['to'].astype(int) - - # For colors and slices - # nodes: separate name and group by '--', and assign slice as factor 1:K - nodes = nodes.copy() - nodes['name'] = nodes['name'].astype(str) - split_cols = nodes['name'].str.split('--', n=1, expand=True) - split_cols = split_cols.reindex(columns=[0, 1], fill_value='') - nodes['name'] = split_cols[0] - nodes['group'] = split_cols[1] - nodes['slice'] = nodes['group'].apply(lambda x: Y.index(x) + 1 if x in Y else 1) - nodes['label'] = nodes['name'] + '--' + nodes['group'] - - Nodes = pd.DataFrame() - for i in range(1, K + 1): - nodes_i = nodes[nodes['slice'] == i].copy() - clusters_df = res[i - 1]['clusters'] - color_col = 'color' if 'color' in clusters_df.columns else None - name_col = 'name' if 'name' in clusters_df.columns else ( - 'Cluster_Label' if 'Cluster_Label' in clusters_df.columns else None - ) - if color_col and name_col: - clusters_df = clusters_df[[color_col, name_col]].copy() - clusters_df = clusters_df.rename(columns={name_col: 'name'}) - else: - if not color_col: - clusters_df['color'] = "#D3D3D3" - if not name_col: - clusters_df['name'] = clusters_df.index.astype(str) - clusters_df = clusters_df[['color', 'name']].copy() - merged = nodes_i.merge(clusters_df, left_on='name', right_on='name', how='left') - Nodes = pd.concat([Nodes, merged], ignore_index=True) - - # Add 'sum' column: for each label, get max sum from both CL1 and CL2 - sums_CL1 = INC[['CL1', 'sum']].rename(columns={'CL1': 'label'}) - sums_CL2 = INC[['CL2', 'sum']].rename(columns={'CL2': 'label'}) - sums = pd.concat([sums_CL1, sums_CL2], ignore_index=True) - sums['label'] = sums['label'].astype(str) - Nodes['label'] = Nodes['label'].astype(str) - sums = sums.groupby('label', as_index=False)['sum'].max() - Nodes = Nodes.merge(sums, left_on='label', right_on='label', how='left') - - # Normalize sum within each slice, avoid division by zero - Nodes['sum'] = Nodes.groupby('slice')['sum'].transform(lambda x: x / x.sum() if x.sum() > 0 else 0) - - # Prepare params as DataFrame + # PATCH 3 (continued): all downstream processing moved outside the loop. + if not inc_matrix: + print("Error: No inclusion matrix was created.") + return None + + INC = pd.concat(inc_matrix, ignore_index=True) + + edges = INC[['Cluster_Label.x', 'Cluster_Label.y', 'Inc_index', 'Inc_Weighted', 'Stability']].copy() + + unique_labels = pd.unique(edges[['Cluster_Label.x', 'Cluster_Label.y']].values.ravel()) + nodes = pd.DataFrame({'name': unique_labels}) + nodes['group'] = nodes['name'] + nodes = nodes.reset_index(drop=True) + nodes['id'] = nodes.index + + label_to_id = dict(zip(nodes['name'], nodes['id'])) + edges['from'] = edges['Cluster_Label.x'].map(label_to_id) + edges['to'] = edges['Cluster_Label.y'].map(label_to_id) + + edges = edges.rename(columns={ + "Cluster_Label.x": "from_label", + "Cluster_Label.y": "to_label", + "Inc_index": "Inclusion", + "Inc_Weighted": "Inc_Weighted", + "Stability": "Stability" + }) + edges['from'] = edges['from'].astype(int) + edges['to'] = edges['to'].astype(int) + + nodes = nodes.copy() + nodes['name'] = nodes['name'].astype(str) + split_cols = nodes['name'].str.split('--', n=1, expand=True) + split_cols = split_cols.reindex(columns=[0, 1], fill_value='') + nodes['name'] = split_cols[0] + nodes['group'] = split_cols[1] + nodes['slice'] = nodes['group'].apply(lambda x: Y.index(x) + 1 if x in Y else 1) + nodes['label'] = nodes['name'] + '--' + nodes['group'] + + Nodes = pd.DataFrame() + for i in range(1, K + 1): + nodes_i = nodes[nodes['slice'] == i].copy() + clusters_df = res[i - 1]['clusters'] + color_col = 'color' if 'color' in clusters_df.columns else None + name_col = 'name' if 'name' in clusters_df.columns else ( + 'Cluster_Label' if 'Cluster_Label' in clusters_df.columns else None + ) + if color_col and name_col: + clusters_df = clusters_df[[color_col, name_col]].copy() + clusters_df = clusters_df.rename(columns={name_col: 'name'}) + else: + if not color_col: + clusters_df['color'] = "#D3D3D3" + if not name_col: + clusters_df['name'] = clusters_df.index.astype(str) + clusters_df = clusters_df[['color', 'name']].copy() + merged = nodes_i.merge(clusters_df, left_on='name', right_on='name', how='left') + Nodes = pd.concat([Nodes, merged], ignore_index=True) + + sums_CL1 = INC[['CL1', 'sum']].rename(columns={'CL1': 'label'}) + sums_CL2 = INC[['CL2', 'sum']].rename(columns={'CL2': 'label'}) + sums = pd.concat([sums_CL1, sums_CL2], ignore_index=True) + sums['label'] = sums['label'].astype(str) + Nodes['label'] = Nodes['label'].astype(str) + sums = sums.groupby('label', as_index=False)['sum'].max() + Nodes = Nodes.merge(sums, left_on='label', right_on='label', how='left') + Nodes['sum'] = Nodes.groupby('slice')['sum'].transform(lambda x: x / x.sum() if x.sum() > 0 else 0) + params = { - "field": field, - "years": years, - "n": n, - "minFreq": min_freq, - "size": size, - "ngrams": ngrams, - "stemming": stemming, - "n_labels": n_labels, - "repel": repel, - "remove_terms": remove_terms, - "synonyms": synonyms, - "cluster": cluster + "field": field, "years": years, "n": n, "minFreq": min_freq, + "size": size, "ngrams": ngrams, "stemming": stemming, + "n_labels": n_labels, "repel": repel, + "remove_terms": remove_terms, "synonyms": synonyms, "cluster": cluster } params_df = pd.DataFrame(list(params.items()), columns=['params', 'values']) results = { - "Nodes": Nodes, - "Edges": edges, - "Data": INC, - "check": True, - "TM": res, - "Net": net, - "params": params_df + "Nodes": Nodes, "Edges": edges, "Data": INC, + "check": True, "TM": res, "Net": net, "params": params_df } - + return results @@ -303,54 +322,97 @@ def timeslice(M, breaks=None, k=5): Splits a bibliographic DataFrame into time intervals. Args: - M (pd.DataFrame): Bibliographic DataFrame with a 'PY' (Publication Year) column. - breaks (list or None): Numeric vector of two or more break points. If not provided, it is calculated automatically. - k (int): Number of intervals to split the DataFrame into (used only if `breaks` is not provided). Default is 5. + M (pd.DataFrame): Bibliographic DataFrame with a 'PY' column. + breaks (list or None): Break points. If not provided, calculated automatically. + k (int): Number of intervals (used only if breaks is not provided). Returns: dict: Dictionary containing DataFrames for each sub-period. """ - M = M.get() + # PATCH: M may be a Shiny reactive Value or a plain DataFrame + M = M.get() if hasattr(M, 'get') and callable(M.get) and not isinstance(M, pd.DataFrame) else M + M = M.copy() - # Convert the 'PY' column to numeric M['PY'] = pd.to_numeric(M['PY'], errors='coerce') - - # Calculate breakpoints if not provided + + # PATCH: if PY is entirely NaN (e.g. PubMed pubdate parsing failure), + # cannot build bins — return empty dict gracefully. + if M['PY'].isna().all(): + print("No valid PY values found. Cannot split into time slices.") + return {} + if breaks is None or (isinstance(breaks, list) and len(breaks) == 0): breaks = np.floor(np.linspace(M['PY'].min() - 1, M['PY'].max(), k + 1)) else: breaks = [M['PY'].min() - 1] + breaks + [M['PY'].max()] + print("breaks:", breaks) + + # PATCH: remove duplicate/out-of-order break points. This happens when the + # user-chosen Cutting Year coincides with (or is outside) the dataset's + # min/max year, which previously caused pd.cut() to raise + # "bins must increase monotonically". If after deduplication there aren't + # enough edges left to form at least 2 periods, return {} gracefully + # instead of crashing — callers already handle an empty dict by showing + # an empty result instead of an error. + breaks = sorted(set(breaks)) + if len(breaks) < 3: + print( + f"Cutting Year(s) too close to (or outside) the dataset's year range " + f"{breaks}. Cannot build at least 2 time periods — choose a Cutting " + f"Year strictly inside the data range." + ) + return {} - # print("breaks:", breaks) - - # Split the data into intervals + # PATCH: drop NaN PY rows before cutting to avoid non-monotonic bin errors. + M = M.dropna(subset=['PY']) M['interval'] = pd.cut(M['PY'], bins=breaks, right=False) - - # Get the interval levels + intervals = M['interval'].cat.categories indices = M['interval'].cat.codes - - # Split the DataFrame based on intervals - split_df = {str(interval): M[M['interval'] == interval].drop(columns=['interval']) for interval in intervals} - + + split_df = { + str(interval): M[M['interval'] == interval].drop(columns=['interval']) + for interval in intervals + } + + # PATCH: drop empty periods. A valid (non-duplicate) bin can still contain + # zero rows — e.g. when the Cutting Year falls outside the actual data + # range, the period between the out-of-range edges and the real data start + # is "valid" for pd.cut but empty. Downstream code (e.g. min()/max() on + # each period's PY values) crashes on an empty sequence if this isn't + # filtered out here. + split_df = {label: sub_df for label, sub_df in split_df.items() if not sub_df.empty} + + if len(split_df) < 2: + print( + f"Only {len(split_df)} non-empty time period(s) found for the chosen " + f"Cutting Year(s). At least 2 are required for thematic evolution — " + f"choose a Cutting Year strictly inside the dataset's year range." + ) + return {} + return split_df def normalize_to_minus1_1(values): values = np.array(values) - return 2 * (values - values.min()) / (values.max() - values.min()) - 1 + # PATCH 5: if all values are equal, max - min is 0 and the division + # produces NaN everywhere. → return zeros when the range is 0. + value_range = values.max() - values.min() + if value_range == 0: + return np.zeros_like(values, dtype=float) + return 2 * (values - values.min()) / value_range - 1 def plot_thematic_evolution( Nodes, Edges, min_flow=0, - measure="weighted", # "inclusion", "stability", "weighted" + measure="weighted", label_size=5, edge_scale=10, node_scale=30 ): - # Choose the metric for edge weight if measure == "inc_index": edge_weight_var = "Inclusion" elif measure == "inc_weight_word_occ": @@ -360,28 +422,21 @@ def plot_thematic_evolution( else: edge_weight_var = "Inc_Weighted" - # Amplify stability for visualization Edges = Edges.copy() Edges["Stability"] = Edges["Stability"] * 10 - # X coordinates for time slices unique_slices = sorted(Nodes["slice"].unique()) - - # X coordinates + Nodes = Nodes.copy() - # Map slices to an evenly spaced grid between 0.2 and 0.7 unique_slices = sorted(Nodes["slice"].unique()) x_positions = np.linspace(0.1, 0.9, len(unique_slices)) x_positions_dict = {s: x for s, x in zip(unique_slices, x_positions)} Nodes["x"] = Nodes["slice"].map(x_positions_dict) - # Y coordinates to avoid overlap Nodes["y"] = Nodes.groupby("slice").cumcount() Nodes["y"] = Nodes.groupby("slice")["y"].transform(lambda y: normalize_to_minus1_1(y)) - # Scale y for better vertical separation Nodes["y"] = Nodes["y"] * 400 - # Prepare nodes for visualization Nodes_vis = Nodes.copy() Nodes_vis["shape"] = "box" Nodes_vis["size"] = Nodes_vis["sum"] * node_scale @@ -392,42 +447,31 @@ def plot_thematic_evolution( num_nodes = len(Nodes_vis) cmap = plt.get_cmap("tab20" if num_nodes <= 20 else "hsv") - # Use a pastel colormap for lighter, more pleasant colors pastel_cmap = plt.get_cmap("Pastel1" if num_nodes <= 9 else "tab20c") colors = [pastel_cmap(i % pastel_cmap.N) for i in range(num_nodes)] - # Convert RGBA colors to hex strings Nodes_vis["color"] = [to_hex(c) for c in colors] - # Ensure id exists and is unique if "id" not in Nodes_vis.columns: Nodes_vis = Nodes_vis.reset_index(drop=True) Nodes_vis["id"] = Nodes_vis.index - # Normalize x coordinates to a suitable vis.js scale (e.g., 0-1000) Nodes_vis["x"] = Nodes_vis["x"] * 1000 - # Center y at 0 and scale to 0-800 Nodes_vis["y"] = (Nodes_vis["y"] - Nodes_vis["y"].mean()) + 400 - # Prepare final nodes Nodes_vis = Nodes_vis[ ["id", "label", "title", "group", "color", "x", "y", "shape", "size", "value", "fixed_x", "fixed_y"] ] Nodes_vis = pd.concat([Nodes_vis], ignore_index=True) - # Prepare edges Edges_vis = Edges.copy() Edges_vis["width"] = Edges_vis[edge_weight_var] * edge_scale Edges_vis["value"] = Edges_vis[edge_weight_var] Edges_vis = Edges_vis[Edges_vis["value"] >= min_flow].copy() Edges_vis["color"] = [{"color": "#D3D3D3", "highlight": "#35343370", "hover": "#35343370"}] * len(Edges_vis) - - # Remove self-loop edges Edges_vis = Edges_vis[Edges_vis["from"] != Edges_vis["to"]].copy() - # Build the network net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line", directed=True) for _, node in Nodes_vis.iterrows(): - # Ensure title is always a string and not NaN/float node_title = node["title"] if "title" in node and pd.notnull(node["title"]) else node["label"] if not isinstance(node_title, str): node_title = str(node_title) @@ -443,7 +487,6 @@ def plot_thematic_evolution( fixed={"x": node["fixed_x"], "y": node["fixed_y"]}, ) - # Ensure arrows go from 'from' to 'to' (correct direction) for _, edge in Edges_vis.iterrows(): net.add_edge( int(edge["from"]), @@ -455,7 +498,6 @@ def plot_thematic_evolution( arrows="to" ) - # Configure physics and interactions net.toggle_physics(False) net.set_options(""" var options = { @@ -483,4 +525,4 @@ def plot_thematic_evolution( def rgba_to_hex(rgba): - return '#%02x%02x%02x' % tuple(int(255 * c) for c in rgba[:3]) + return '#%02x%02x%02x' % tuple(int(255 * c) for c in rgba[:3]) \ No newline at end of file diff --git a/functions/get_thematicmap.py b/functions/get_thematicmap.py index 68d1f37d6..64df02dc1 100644 --- a/functions/get_thematicmap.py +++ b/functions/get_thematicmap.py @@ -22,13 +22,25 @@ def get_thematic_map(df, field="ID", n=250, minfreq=5, ngrams=1, stemming=False, subgraphs: Whether to show subgraphs. Returns: - A tuple containing the HTML file name and a DataFrame with the extracted terms. + A tuple containing the Plotly figure, HTML file name, words DataFrame, + clusters DataFrame, and document-to-cluster mapping DataFrame. """ - - map, graph_path, words, clusters, documentToClusters = thematic_map( + + # PATCH 1: thematic_map returns None when NetMatrix is empty — unpacking + # directly would crash with TypeError: cannot unpack non-iterable NoneType. + # → capture the full result first, check for None, and return a safe empty + # tuple before attempting to unpack. + # PATCH 2: the variable name `map` shadowed the Python builtin map() + # function — renamed to `thematic_map_result` to avoid the collision. + result = thematic_map( df, field=field, n=n, minfreq=minfreq, ngrams=ngrams, stemming=stemming, size=size, n_labels=n_labels, community_repulsion=community_repulsion, repel=repel, remove_terms=remove_terms, synonyms=synonyms, cluster=cluster, subgraphs=subgraphs ) - - return map, graph_path, words, clusters, documentToClusters + + if result[0] is None: + return None, None, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + + thematic_map_result, graph_path, words, clusters, documentToClusters = result + + return thematic_map_result, graph_path, words, clusters, documentToClusters \ No newline at end of file diff --git a/functions/get_threefieldplot.py b/functions/get_threefieldplot.py index b7a4a1514..74140d751 100644 --- a/functions/get_threefieldplot.py +++ b/functions/get_threefieldplot.py @@ -4,7 +4,8 @@ def get_three_field_plot(df, left_field, middle_field, right_field, left_field_items, middle_field_items, right_field_items): """ - Generate a three-field plot (Sankey diagram) to visualize the main items of three fields and their relationships. + Generate a three-field plot (Sankey diagram) to visualize the main items + of three fields and their relationships. Args: df: A DataFrame object containing the data. @@ -14,9 +15,9 @@ def get_three_field_plot(df, left_field, middle_field, right_field, left_field_i left_field_items: Number of items to plot for the left field. middle_field_items: Number of items to plot for the middle field. right_field_items: Number of items to plot for the right field. - + Returns: - A Plotly figure object representing the three-field plot. + A Plotly FigureWidget representing the three-field Sankey diagram. """ fields = [left_field, middle_field, right_field] n = [left_field_items, middle_field_items, right_field_items] @@ -30,22 +31,39 @@ def get_three_field_plot(df, left_field, middle_field, right_field, left_field_i if "TI_TM" in fields: df = term_extraction(df, field="TI") - # Document x Attribute matrix Field LEFT + # PATCH: cocMatrix returns None when the field is empty (e.g. PubMed DE is + # always empty from eSummary API) — accessing .shape on None crashes with + # AttributeError. Return an empty figure gracefully if any matrix is None or empty. + # Document x Attribute matrix — LEFT field WL = cocMatrix(df, fields[0], binary=True, n=n[0]) + if WL is None or WL.empty: + return go.FigureWidget(go.Figure()) n1 = min(n[0], WL.shape[1]) TopL = WL.columns.tolist() - # Document x Attribute matrix Field MIDDLE + # Document x Attribute matrix — MIDDLE field WM = cocMatrix(df, fields[1], binary=True, n=n[1]) + if WM is None or WM.empty: + return go.FigureWidget(go.Figure()) n2 = min(n[1], WM.shape[1]) TopM = WM.columns.tolist() - # Document x Attribute matrix Field RIGHT + # Document x Attribute matrix — RIGHT field WR = cocMatrix(df, fields[2], binary=True, n=n[2]) + if WR is None or WR.empty: + return go.FigureWidget(go.Figure()) n3 = min(n[2], WR.shape[1]) TopR = WR.columns.tolist() - # Co-Occurrence Matrices + # PATCH 1: if cocMatrix returns an empty DataFrame for any field, n1/n2/n3 + # is 0 and reassigning LM.index/columns with a mismatched range crashes + # with ValueError: Length mismatch. + # → return an empty figure early if any of the three matrices is empty. + if n1 == 0 or n2 == 0 or n3 == 0: + empty_fig = go.FigureWidget(go.Figure()) + return empty_fig + + # Co-occurrence matrices LM = WL.T.dot(WM) MR = WM.T.dot(WR) @@ -54,31 +72,28 @@ def get_three_field_plot(df, left_field, middle_field, right_field, left_field_i MR.index = range(n1 + 1, n1 + n2 + 1) MR.columns = range(n1 + n2 + 1, n1 + n2 + n3 + 1) - # Melting matrices to get edges + # Melt matrices to get edge lists def melt_matrix(matrix): var1 = np.repeat(matrix.index.values, matrix.shape[1]) var2 = np.tile(matrix.columns.values, matrix.shape[0]) values = matrix.values.flatten() - melted_df = pd.DataFrame({'Var1': var1, 'Var2': var2, 'Value': values}) - return melted_df + return pd.DataFrame({'Var1': var1, 'Var2': var2, 'Value': values}) LMm = melt_matrix(LM) LMm["group"] = None MRm = melt_matrix(MR) MRm["group"] = None - # Concatenate edge data Edges = pd.concat([LMm, MRm], ignore_index=True) Edges['Var1'] = Edges['Var1'].astype(int) Edges['Var2'] = Edges['Var2'].astype(int) Edges.columns = ["from", "to", "Value", "group"] Edges = Edges.dropna(subset=['to', 'from']) - Edges['from'] = Edges['from'] - 1 # Make indices 0-based + Edges['from'] = Edges['from'] - 1 Edges['to'] = Edges['to'] - 1 Edges = Edges.drop(columns=['group']) - Edges = Edges[Edges["Value"] >= 1] # Filter edges with weight >= min.flow + Edges = Edges[Edges["Value"] >= 1] - # Same as before up to where Nodes are created Nodes = pd.DataFrame({ "Nodes": [*TopL, *TopM, *TopR], "group": [fields[0]] * len(TopL) + [fields[1]] * len(TopM) + [fields[2]] * len(TopR), @@ -89,36 +104,40 @@ def melt_matrix(matrix): Edges.rename(columns={"Value": "weight"}, inplace=True) Edges = Edges[Edges["weight"] >= min_flow] - # Set x positions for nodes based on level Kx = len(Nodes['group'].unique()) Ky = len(Nodes) - Nodes['coordX'] = np.repeat(np.linspace(0, 1, Kx), Nodes['level'].value_counts().sort_index().values) + level_counts = Nodes['level'].value_counts().sort_index() + Kx = len(level_counts) + Nodes['coordX'] = np.repeat(np.linspace(0, 1, Kx), level_counts.values) Nodes['coordY'] = np.repeat(0.1, Ky) - # Set custom base colors for nodes by group for better distinction group_colors = { - fields[0]: "#3288BD", # Blue - fields[1]: "#F46D43", # Orange - fields[2]: "#66C2A5", # Green + fields[0]: "#3288BD", + fields[1]: "#F46D43", + fields[2]: "#66C2A5", } - # Calculate node weights (sum of incoming and outgoing edge weights) node_weights = pd.concat([ Edges.groupby('from')['weight'].sum(), Edges.groupby('to')['weight'].sum() ], axis=1).fillna(0).sum(axis=1) Nodes['weight'] = Nodes['id'].map(node_weights).fillna(0) - # Function to add opacity to a hex color based on node weight (higher weight = less transparent) def hex_to_rgba(hex_color, opacity): hex_color = hex_color.lstrip('#') rgb = tuple(int(hex_color[i:i + 2], 16) for i in (0, 2, 4)) return f'rgba({rgb[0]},{rgb[1]},{rgb[2]},{opacity:.2f})' - # Normalize weights to [0.3, 1.0] for opacity (avoid fully transparent nodes) min_opacity, max_opacity = 0.3, 1.0 - if Nodes['weight'].max() > 0: - norm_weights = (Nodes['weight'] - Nodes['weight'].min()) / (Nodes['weight'].max() - Nodes['weight'].min()) + + # PATCH 2: the original guard checked max > 0 but not max != min — if all + # nodes share the same weight, max - min is 0 and the normalization produces + # NaN in every opacity value. + # → added a second condition to ensure the range is non-zero before dividing. + weight_min = Nodes['weight'].min() + weight_max = Nodes['weight'].max() + if weight_max > 0 and weight_max != weight_min: + norm_weights = (Nodes['weight'] - weight_min) / (weight_max - weight_min) opacities = norm_weights * (max_opacity - min_opacity) + min_opacity else: opacities = np.full(len(Nodes), min_opacity) @@ -128,23 +147,29 @@ def hex_to_rgba(hex_color, opacity): for g, o in zip(Nodes['group'], opacities) ] - # Shorten long labels and add line breaks for better visibility def wrap_label(label, width=45): return "
".join(textwrap.wrap(str(label), width=width)) Nodes['wrapped_label'] = Nodes['Nodes'].apply(lambda x: wrap_label(x, width=35)) - # Identify and remove nodes with empty edges + # Remove isolated nodes (not connected to any edge) ind = set(Nodes['id']) - set(Edges['from']).union(set(Edges['to'])) if ind: Nodes = Nodes[~Nodes['id'].isin(ind)] Nodes['idnew'] = range(len(Nodes)) id_map = dict(zip(Nodes['id'], Nodes['idnew'])) - Edges['from'] = Edges['from'].map(id_map) - Edges['to'] = Edges['to'].map(id_map) + + # PATCH 3: if id_map does not cover all values in Edges['from'] or + # Edges['to'] (e.g. isolated nodes still referenced in edges after + # filtering), .map() produces NaN — the Sankey crashes with float + # indices instead of int. + # → drop edges whose endpoints are not in id_map before remapping, + # then cast to int to ensure valid Sankey indices. + Edges = Edges[Edges['from'].isin(id_map) & Edges['to'].isin(id_map)] + Edges['from'] = Edges['from'].map(id_map).astype(int) + Edges['to'] = Edges['to'].map(id_map).astype(int) Nodes['id'] = Nodes['idnew'] - # Create figure fig = go.Figure(data=[go.Sankey( arrangement="snap", node=dict( @@ -167,7 +192,6 @@ def wrap_label(label, width=45): ) )]) - # Add group annotations at the top of each column for level, field in enumerate(fields, start=1): group_nodes = Nodes[Nodes['level'] == level] if not group_nodes.empty: @@ -178,12 +202,11 @@ def wrap_label(label, width=45): text=f"{wrap_label(field, width=18)}", showarrow=False, xanchor='center', - font=dict(color=group_colors[field], family="Arial", size=15) # Font size + font=dict(color=group_colors[field], family="Arial", size=15) ) - # Update layout for aesthetics and readability fig.update_layout( - font=dict(size=11, color='Black'), # Font size + font=dict(size=11, color='Black'), margin=dict(l=80, r=80, b=50, t=120, pad=4), height=820, plot_bgcolor='white', @@ -198,4 +221,4 @@ def wrap_label(label, width=45): 'displaylogo': False } - return fig + return fig \ No newline at end of file diff --git a/functions/get_treemap.py b/functions/get_treemap.py index 1f3f765f0..f7a4a9976 100644 --- a/functions/get_treemap.py +++ b/functions/get_treemap.py @@ -1,9 +1,10 @@ from www.services import * +import ast def get_treemap(df, ngram, num_of_words, word_type, file_upload_terms, file_upload_synonyms, field_separator_frequent=';'): """ - Generate a plot and table of the most frequent words. + Generate a treemap plot and table of the most frequent words. Args: df: A DataFrame object containing the data. @@ -14,7 +15,7 @@ def get_treemap(df, ngram, num_of_words, word_type, file_upload_terms, file_uplo file_upload_synonyms: File containing synonyms. Returns: - A Plotly figure object and a DataFrame of the most frequent words. + A Plotly FigureWidget object and a DataFrame of the most frequent words. """ # Load stopwords and synonyms @@ -45,6 +46,10 @@ def get_treemap(df, ngram, num_of_words, word_type, file_upload_terms, file_uplo table = word_counts.sort_values(by='Occurrences', ascending=False) word_counts = word_counts.sort_values(by='Occurrences', ascending=False).head(num_of_words) + # PATCH: safety check if word_counts is empty + if word_counts.empty: + return go.FigureWidget(go.Figure()), table + # Create TreeMap plot fig = px.treemap( word_counts, @@ -54,7 +59,6 @@ def get_treemap(df, ngram, num_of_words, word_type, file_upload_terms, file_uplo color_continuous_scale=[(0, "lightblue"), (1, "darkblue")], ) - # Update layout fig.update_layout( margin=dict(l=10, r=10, t=40, b=10), height=800, @@ -63,7 +67,6 @@ def get_treemap(df, ngram, num_of_words, word_type, file_upload_terms, file_uplo showlegend=False ) - # Add text to each cell fig.data[0].texttemplate = "%{label}
%{value} Occurrences
%{percentParent:.2%}" fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], @@ -71,38 +74,76 @@ def get_treemap(df, ngram, num_of_words, word_type, file_upload_terms, file_uplo return fig, table + def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None): """ Extract and count words from a specified field in the DataFrame. """ - M = df.get() - + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + M = M.copy() + # Remove duplicates M = M.drop_duplicates(subset='SR') - + # Get text data based on tag if tag in ['AB', 'TI']: - text_data = term_extraction(df, field=tag, stemming=False, verbose=False, - ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) - text_data = text_data.get() + # PATCH: pass plain DataFrame to term_extraction — it does not accept reactives + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # PATCH: missing abstracts/titles sometimes arrive as the literal + # string "nan" rather than a true NaN (e.g. after a CSV/JSON + # round-trip). term_extraction() would otherwise tokenize that + # string as a real word, producing a fake "nan" term that drowns + # out or masks the genuine terms. Drop both true NaN and the + # literal "nan" string (case-insensitive, ignoring whitespace) + # before extracting terms. + df_plain = df_plain[df_plain[tag].notna()] + df_plain = df_plain[ + ~df_plain[tag].astype(str).str.strip().str.lower().eq('nan') + ] + + if df_plain.empty: + return Counter() + + text_data = term_extraction(df_plain, field=tag, stemming=False, verbose=False, + ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) text_data = text_data[f"{tag}_TM"] else: + # PATCH: some tags (e.g. 'WC' - Subject Categories) are not present at all + # in the standardized schema for non-WoS sources (OpenAlex, PubMed). + # Return an empty result instead of raising a raw KeyError. + if tag not in M.columns: + return Counter() text_data = M[tag] # Handle list columns (DE and ID) if tag in ['DE', 'ID']: - text_data = text_data.dropna().apply(lambda x: ', '.join(eval(x) if isinstance(x, str) else x)) + def safe_parse(x): + if isinstance(x, list): + return x + try: + return ast.literal_eval(x) + except (ValueError, SyntaxError): + return [] + + text_data = text_data.dropna().apply(lambda x: ', '.join(safe_parse(x))) # Process words if tag in ['DE', 'ID']: words = text_data.dropna().astype(str).str.cat(sep=', ').upper() words = [word.strip() for word in words.split(',') if word and word.strip()] else: - words = [item for sublist in text_data for item in sublist] - - # Apply n-grams if needed - # if ngrams > 1 and tag not in ['DE', 'ID']: - # words = [' '.join(words[i:i+ngrams]) for i in range(len(words)-ngrams+1)] + # PATCH: filter only list elements before iterating to avoid TypeError, + # and strip any stray literal "nan" tokens (case-insensitive) that may + # have survived inside the extracted term lists. + words = [ + item + for sublist in text_data + if isinstance(sublist, list) + for item in sublist + if str(item).strip().lower() != 'nan' + ] # Replace synonyms if synonyms: @@ -112,9 +153,11 @@ def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None): # Count words word_counts = Counter(words) - # Remove specified terms - if remove_terms and tag in ['DE', 'ID']: - word_counts = {word: count for word, count in word_counts.items() - if word.upper() not in [term.upper() for term in remove_terms]} + # PATCH: apply remove_terms to all tags + if remove_terms: + word_counts = { + word: count for word, count in word_counts.items() + if word.upper() not in [term.upper() for term in remove_terms] + } - return word_counts + return word_counts \ No newline at end of file diff --git a/functions/get_trendtopics.py b/functions/get_trendtopics.py index 1d2f1df3a..923b22d44 100644 --- a/functions/get_trendtopics.py +++ b/functions/get_trendtopics.py @@ -4,21 +4,8 @@ def get_trend_topics(df, ngram, field_tt, time_window, file_upload_terms_tt, file_upload_synonyms_tt, word_minimum_frequency, number_of_words_year): """ Generate a plot of trend topics over time. - - Args: - df: A DataFrame object containing the data. - ngram: The number of n-grams to consider. - field_tt: The field to analyze for trend topics. - time_window: The time window to consider. - file_upload_terms_tt: File containing terms to remove. - file_upload_synonyms_tt: File containing synonyms. - word_minimum_frequency: The minimum frequency of words to consider. - number_of_words_year: The number of words to display per year. - - Returns: - A Plotly figure object representing the trend topics over time. """ - + # Load terms to remove remove_terms = None if file_upload_terms_tt: @@ -36,26 +23,48 @@ def get_trend_topics(df, ngram, field_tt, time_window, file_upload_terms_tt, fil values = terms[1:] synonyms[key] = values - # Set ngrams based on word_type + # Set ngrams based on field_tt ngrams = int(ngram) if field_tt in ['TI', 'AB'] else 1 - # Extract terms + # PATCH: extract plain DataFrame before passing to term_extraction + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if field_tt in ["TI", "AB"]: - df = term_extraction(df, field=field_tt, stemming=False, verbose=False, - ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) + # PATCH: missing abstracts/titles sometimes arrive as the literal + # string "nan" rather than a true NaN (e.g. after a CSV/JSON + # round-trip). term_extraction() would otherwise tokenize that + # string as a real word, producing a fake "NAN" term that drowns + # out or masks the genuine terms. Drop both true NaN and the + # literal "nan" string (case-insensitive, ignoring whitespace) + # before extracting terms. + df_plain = df_plain[df_plain[field_tt].notna()] + df_plain = df_plain[ + ~df_plain[field_tt].astype(str).str.strip().str.lower().eq('nan') + ] + + if df_plain.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() + + df_plain = term_extraction(df_plain, field=field_tt, stemming=False, verbose=False, + ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) field = f"{field_tt}_TM" else: field = field_tt # Get trend topics - trend_topics = field_by_year(df, field, time_window, word_minimum_frequency, number_of_words_year, remove_terms, synonyms) + trend_topics = field_by_year(df_plain, field, time_window, word_minimum_frequency, number_of_words_year, remove_terms, synonyms) + + # PATCH: safety check if trend_topics is empty + if trend_topics is None or trend_topics.empty: + return go.FigureWidget(go.Figure()), pd.DataFrame() # Plot - fig = px.scatter(trend_topics, x='year_med', y='item', size='freq', hover_data=['year_q1', 'year_q3'], height=800) + fig = px.scatter(trend_topics, x='year_med', y='item', size='freq', + hover_data=['year_q1', 'year_q3'], height=800) fig.update_layout( - xaxis_title='Year', - yaxis_title='Term', - showlegend=False, + xaxis_title='Year', + yaxis_title='Term', + showlegend=False, plot_bgcolor='white', xaxis=dict(showgrid=False), yaxis=dict(showgrid=True, gridcolor='lightgrey'), @@ -80,39 +89,90 @@ def get_trend_topics(df, ngram, field_tt, time_window, file_upload_terms_tt, fil for i in range(len(trend_topics)): fig.add_shape( type='line', - x0=trend_topics['year_q1'].iloc[i], + x0=trend_topics['year_q1'].iloc[i], y0=trend_topics['item'].iloc[i], - x1=trend_topics['year_q3'].iloc[i], + x1=trend_topics['year_q3'].iloc[i], y1=trend_topics['item'].iloc[i], - line=dict(color='lightblue', width=5), # Adjust width proportionallyù + line=dict(color='lightblue', width=5), layer='below' ) - fig.update_traces(marker=dict(color='dodgerblue', opacity=1), selector=dict(mode='markers')) # Ensure no opacity and bring to front + fig.update_traces(marker=dict(color='dodgerblue', opacity=1), selector=dict(mode='markers')) fig = go.FigureWidget(fig) fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], 'displaylogo': False} return fig, trend_topics + def field_by_year(df, field, timespan, min_freq, n_items, remove_terms=None, synonyms=None): + """ + Compute trend topics statistics (median year, Q1, Q3, frequency) per term. + """ + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + df = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + df = df.copy() + + # PATCH: documents with a missing/empty value for the analysed field (e.g. no + # abstract, or an empty term list after extraction) must not be passed to + # cocMatrix(). Otherwise the missing value gets tokenized as the literal + # string "nan" and shows up as a fake "NAN" term in the plot, drowning out + # or masking the real terms. + df = df.dropna(subset=[field]) + df = df[df[field].apply(lambda x: len(x) > 0 if isinstance(x, (list, str)) else bool(x))] + + # PATCH: strip any stray literal "nan" tokens that may have survived + # inside extracted term lists (case-insensitive), then drop rows that + # become empty as a result. + def _strip_nan_tokens(x): + if isinstance(x, list): + return [t for t in x if str(t).strip().lower() != 'nan'] + return x + + df[field] = df[field].apply(_strip_nan_tokens) + df = df[df[field].apply(lambda x: len(x) > 0 if isinstance(x, (list, str)) else bool(x))] + + if df.empty: + return pd.DataFrame() + # Create co-occurrence matrix A = cocMatrix(df, Field=field, binary=False, remove_terms=remove_terms, synonyms=synonyms) - n = A.sum(axis=0).to_numpy() # Convert to 1D array - df = df.get() - # Calculate quantiles - trend_med = pd.DataFrame(A.values).apply(lambda x: pd.Series(np.round(np.quantile(np.repeat(df['PY'], x), [0.25, 0.5, 0.75]))), axis=0).T + # PATCH: safety check if cocMatrix returns None or empty + if A is None or A.empty: + return pd.DataFrame() + + n = A.sum(axis=0).to_numpy() + + # PATCH: PY is stored as string in ETL output — convert to numeric + # before passing to np.quantile to avoid TypeError on string subtraction. + df['PY'] = pd.to_numeric(df['PY'], errors='coerce') + + # PATCH: skip columns with zero total frequency when computing quantiles + def safe_quantile(x): + repeated = np.repeat(df['PY'].values, x.astype(int)) + if len(repeated) == 0: + return pd.Series([np.nan, np.nan, np.nan]) + return pd.Series(np.round(np.quantile(repeated, [0.25, 0.5, 0.75]))) + + trend_med = pd.DataFrame(A.values).apply(safe_quantile, axis=0).T trend_med.columns = ['year_q1', 'year_med', 'year_q3'] trend_med['freq'] = n trend_med['item'] = A.columns - # Filter by timespan and frequency - if timespan is None or len(timespan) != 2: + trend_med = trend_med.dropna(subset=['year_med']) + + if trend_med.empty: + return trend_med + + # PATCH: timespan may be passed as an integer (time_window) rather than a + # [start, end] list — len() on an int crashes with TypeError. + # Treat any non-list value as missing and fall back to the data range. + if timespan is None or not isinstance(timespan, (list, tuple)) or len(timespan) != 2: timespan = [trend_med['year_med'].min(), trend_med['year_med'].max()] trend_med = trend_med[(trend_med['year_med'] >= timespan[0]) & (trend_med['year_med'] <= timespan[1])] trend_med = trend_med[trend_med['freq'] >= min_freq] trend_med = trend_med.groupby('year_med').apply(lambda x: x.nlargest(n_items, 'freq')).reset_index(drop=True) - return trend_med + return trend_med \ No newline at end of file diff --git a/functions/get_wordcloud.py b/functions/get_wordcloud.py index e902f3bd6..79cf2f3ae 100644 --- a/functions/get_wordcloud.py +++ b/functions/get_wordcloud.py @@ -1,28 +1,29 @@ from www.services import * +import ast def is_legible_on_white(color): - """Restituisce True se il colore è leggibile su sfondo bianco""" - r, g, b = mcolors.to_rgb(color) # Converti in valori 0-1 - luminance = 0.299 * r + 0.587 * g + 0.114 * b # Calcola la luminanza - - return 0.2 < luminance < 0.6 # Esclude colori troppo chiari o troppo scuri + """Returns True if the color is legible on a white background.""" + r, g, b = mcolors.to_rgb(color) + luminance = 0.299 * r + 0.587 * g + 0.114 * b + return 0.2 < luminance < 0.6 def get_wordcloud(df, ngram, num_of_words_wc, field_wc, file_upload_terms_wc, file_upload_synonyms_wc): """ - Generate a plot and table of the most frequent words. - + Generate a word cloud and table of the most frequent words. + Args: df: A DataFrame object containing the data. - num_of_words: The number of top frequent words to display. - word_type: The type of words to analyze (e.g., 'TI', 'AB'). - field_separator_frequent: The separator used in the field. - file_upload_terms: File containing terms to remove. - file_upload_synonyms: File containing synonyms. - + ngram: N-gram size for text fields. + num_of_words_wc: The number of top frequent words to display. + field_wc: The type of words to analyze (e.g., 'TI', 'AB'). + file_upload_terms_wc: File containing terms to remove. + file_upload_synonyms_wc: File containing synonyms. + Returns: - A Plotly figure object and a DataFrame of the most frequent words. + The filename of the generated HTML word cloud and a DataFrame of + the most frequent words. """ # Load stopwords and synonyms @@ -41,7 +42,7 @@ def get_wordcloud(df, ngram, num_of_words_wc, field_wc, file_upload_terms_wc, fi values = terms[1:] synonyms[key] = values - # Set ngrams based on word_type + # Set ngrams based on field_wc ngrams = int(ngram) if field_wc in ['TI', 'AB'] else 1 # Get word counts @@ -56,39 +57,52 @@ def get_wordcloud(df, ngram, num_of_words_wc, field_wc, file_upload_terms_wc, fi word_frequencies = dict(zip(word_counts["Words"], word_counts["Occurrences"])) G = nx.Graph() - + colors = [c for c in mcolors.CSS4_COLORS.values() if is_legible_on_white(c)] - + if not colors: + colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"] + sorted_words = sorted(word_frequencies.items(), key=lambda x: x[1], reverse=True) - center_word = sorted_words[0][0] + if not sorted_words: + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + with open(tmp.name, 'w', encoding="utf-8") as f: + f.write("

No words to display.

") + return tmp.name.split(os.sep)[-1], table + + center_word = sorted_words[0][0] compact_radius = radius * 0.6 for word, count in sorted_words: - size = max(500, min(2000, count * 2.5)) - font_size = max(20, min(120, count * 1.5)) + size = max(500, min(2000, count * 2.5)) + font_size = max(20, min(120, count * 1.5)) color = random.choice(colors) - - theta = random.uniform(0, 2 * math.pi) - r = compact_radius * math.sqrt(random.uniform(0, 1)) + + theta = random.uniform(0, 2 * math.pi) + r = compact_radius * math.sqrt(random.uniform(0, 1)) pos_x = r * math.cos(theta) pos_y = r * math.sin(theta) - - G.add_node(word, label=word, title=f"{word}: {count}", color="rgba(0,0,0,0)", - font={"size": font_size, "color": color, "strokeWidth": 1, "face": "Arial"}, x=pos_x, y=pos_y) - # Creazione della rete interattiva con Pyvis + G.add_node(word, label=word, title=f"{word}: {count}", color="rgba(0,0,0,0)", + font={"size": font_size, "color": color, "strokeWidth": 1, "face": "Arial"}, + x=pos_x, y=pos_y) + g = Network(width="100%", height="98vh", bgcolor="white", font_color="black") g.from_nx(G) - + for n in g.nodes: n["size"] = G.nodes[n["id"]]["size"] - n["font"] = {"size": G.nodes[n["id"]]["font"]["size"], "color": G.nodes[n["id"]]["font"]["color"], "strokeWidth": 1, "face": "Arial"} + n["font"] = { + "size": G.nodes[n["id"]]["font"]["size"], + "color": G.nodes[n["id"]]["font"]["color"], + "strokeWidth": 1, + "face": "Arial" + } n["shape"] = "text" - - g.force_atlas_2based(gravity=-30, central_gravity=0.01, spring_length=60, spring_strength=0.08, damping=0.9) - - # Save the HTML file + + g.force_atlas_2based(gravity=-30, central_gravity=0.01, spring_length=60, + spring_strength=0.08, damping=0.9) + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") html_path = tmp.name with open(html_path, 'w', encoding="utf-8") as f: @@ -96,9 +110,8 @@ def get_wordcloud(df, ngram, num_of_words_wc, field_wc, file_upload_terms_wc, fi new_css = " .card {\n border: none;\n }" updated_html = html.replace("", new_css + "\n ") updated_html = updated_html.replace("1px solid lightgray", "none") - f.write(updated_html) - + return html_path.split(os.sep)[-1], table @@ -106,34 +119,52 @@ def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None): """ Extract and count words from a specified field in the DataFrame. """ - M = df.get() - + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + M = M.copy() + # Remove duplicates M = M.drop_duplicates(subset='SR') - + # Get text data based on tag if tag in ['AB', 'TI']: - text_data = term_extraction(df, field=tag, stemming=False, verbose=False, - ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) - text_data = text_data.get() + # PATCH: pass plain DataFrame to term_extraction — it does not accept reactives + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + text_data = term_extraction(df_plain, field=tag, stemming=False, verbose=False, + ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) text_data = text_data[f"{tag}_TM"] else: + # PATCH: some tags (e.g. 'WC' - Subject Categories) are not present at all + # in the standardized schema for non-WoS sources (OpenAlex, PubMed). + # Return an empty result instead of raising a raw KeyError. + if tag not in M.columns: + return Counter() text_data = M[tag] # Handle list columns (DE and ID) if tag in ['DE', 'ID']: - text_data = text_data.dropna().apply(lambda x: ', '.join(eval(x) if isinstance(x, str) else x)) + def safe_parse(x): + if isinstance(x, list): + return x + try: + return ast.literal_eval(x) + except (ValueError, SyntaxError): + return [] + + text_data = text_data.dropna().apply(lambda x: ', '.join(safe_parse(x))) # Process words if tag in ['DE', 'ID']: words = text_data.dropna().astype(str).str.cat(sep=', ').upper() words = [word.strip() for word in words.split(',') if word and word.strip()] else: - words = [item for sublist in text_data for item in sublist] - - # Apply n-grams if needed - # if ngrams > 1 and tag not in ['DE', 'ID']: - # words = [' '.join(words[i:i+ngrams]) for i in range(len(words)-ngrams+1)] + # PATCH: filter only list elements before iterating to avoid TypeError + words = [ + item + for sublist in text_data + if isinstance(sublist, list) + for item in sublist + ] # Replace synonyms if synonyms: @@ -143,9 +174,11 @@ def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None): # Count words word_counts = Counter(words) - # Remove specified terms - if remove_terms and tag in ['DE', 'ID']: - word_counts = {word: count for word, count in word_counts.items() - if word.upper() not in [term.upper() for term in remove_terms]} + # PATCH: apply remove_terms to all tags + if remove_terms: + word_counts = { + word: count for word, count in word_counts.items() + if word.upper() not in [term.upper() for term in remove_terms] + } - return word_counts + return word_counts \ No newline at end of file diff --git a/functions/get_wordfrequency.py b/functions/get_wordfrequency.py index 1f2b81a06..a43f9cf86 100644 --- a/functions/get_wordfrequency.py +++ b/functions/get_wordfrequency.py @@ -15,7 +15,8 @@ def get_word_frequency(df, ngram, field_wf, file_upload_terms_wf, file_upload_sy top_words: The number of top words to display. Returns: - A Plotly figure object representing the word frequency over time. + A Plotly FigureWidget object representing the word frequency over time + and a DataFrame of word frequencies per year. """ # Load terms to remove remove_terms = None @@ -34,25 +35,51 @@ def get_word_frequency(df, ngram, field_wf, file_upload_terms_wf, file_upload_sy values = terms[1:] synonyms[key] = values - # Set ngrams based on word_type + # Set ngrams based on field_wf ngrams = int(ngram) if field_wf in ['TI', 'AB'] else 1 - data = term_extraction(df, field=field_wf, stemming=False, verbose=False, - ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) - data = data.get() + + # PATCH: extract plain DataFrame before passing to term_extraction + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + # PATCH: term_extraction crashes with ValueError when the field column is + # entirely empty (e.g. PubMed DE is always empty from eSummary API) because + # sklearn's vectorizer receives an empty vocabulary. + # Wrap in try/except and return empty results gracefully. + try: + data = term_extraction(df_plain, field=field_wf, stemming=False, verbose=False, + ngrams=ngrams, remove_terms=remove_terms, synonyms=synonyms) + except ValueError: + return go.FigureWidget(go.Figure()), pd.DataFrame() + tm_col = f"{field_wf}_TM" + if tm_col in data.columns and data[tm_col].apply(lambda x: len(x) if isinstance(x, list) else 0).sum() == 0: + return go.FigureWidget(go.Figure()), pd.DataFrame() if field_wf == 'TI': print(data[f"{field_wf}_TM"]) # Calculate word frequency + # PATCH: top_words may be passed as a plain int rather than a [start, end] + # list — indexing an int crashes with TypeError. + # Normalize to a [0, n] range if a plain int is given. + if isinstance(top_words, int): + top_words = [0, top_words] + if field_wf in ['AB', 'TI']: - word_freq = keyword_growth(data, tag=f"{field_wf}_TM", top=top_words[1], cdf=(occurrences == 'cumulate'), remove_terms=remove_terms, synonyms=synonyms) + word_freq = keyword_growth(data, tag=f"{field_wf}_TM", top=top_words[1], cdf=(occurrences == 'cumulate'), + remove_terms=remove_terms, synonyms=synonyms) else: - word_freq = keyword_growth(data, tag=field_wf, top=top_words[1], cdf=(occurrences == 'cumulate'), remove_terms=remove_terms, synonyms=synonyms) + word_freq = keyword_growth(data, tag=field_wf, top=top_words[1], cdf=(occurrences == 'cumulate'), + remove_terms=remove_terms, synonyms=synonyms) + + # PATCH 2: top_words[1] was used both as the max number of terms in + # keyword_growth and as a column slice index. If top_words[0] >= number of + # available columns, or top_words has fewer than 2 elements, this crashes + # with IndexError. Added bounds clamping to avoid out-of-range slicing. + available_cols = [c for c in word_freq.columns if c != 'Year'] + start = max(0, min(top_words[0], len(available_cols))) + end = max(0, min(top_words[1] + 1, len(available_cols))) + selected_cols = available_cols[start:end] + word_freq = word_freq[['Year'] + selected_cols] - - # Select terms between top_words[1] and top_words[2] - word_freq = word_freq[['Year'] + word_freq.columns[top_words[0]:top_words[1] + 1].tolist()] - # Reshape the data for plotting word_freq_melted = word_freq.melt(id_vars=['Year'], var_name='Term', value_name='Frequency') @@ -98,9 +125,24 @@ def get_word_frequency(df, ngram, field_wf, file_upload_terms_wf, file_upload_sy return fig, word_freq -# Funzioni ausiliarie + def trim_years(w, year_range, cdf=True): - """Funzione per calcolare frequenze cumulative o annuali.""" + """ + Calculate cumulative or annual frequencies aligned to a year range. + + Args: + w: A pandas Series indexed by year with frequency values. + year_range: The range of years to align to. + cdf: If True, compute cumulative frequencies. + + Returns: + A pandas Series with frequencies aligned to year_range. + """ + # PATCH 5: if year_range is empty (e.g. after filtering), return an empty + # Series immediately instead of producing an inconsistent zero-length result. + if len(year_range) == 0: + return pd.Series([], dtype=float) + W = np.zeros(len(year_range)) Y = np.array(list(w.index)) w_values = np.array(w) @@ -121,39 +163,65 @@ def trim_years(w, year_range, cdf=True): def keyword_growth(df, tag, sep=";", top=10, cdf=True, remove_terms=None, synonyms=None): """ - Simula la funzione KeywordGrowth in R. - df: dataframe con i dati. - tag: colonna da analizzare. - sep: separatore per il parsing. - top: numero massimo di termini da considerare. - cdf: se True, calcola occorrenze cumulative. - remove_terms: lista di termini da rimuovere. - synonyms: dizionario {termine_sostituto: [lista_di_sinonimi]}. + Compute keyword frequency growth over time. + + Args: + df: DataFrame with bibliometric data. + tag: Column to analyze. + sep: Separator for string parsing. + top: Maximum number of terms to consider. + cdf: If True, compute cumulative occurrences. + remove_terms: List of terms to remove. + synonyms: Dict {replacement_term: [list_of_synonyms]}. + + Returns: + A DataFrame with one column per top term and one row per year. """ - # Parsing e filtraggio df = df.dropna(subset=[tag]) - expanded = [item.upper() for sublist in df[tag].apply(lambda x: x.split(sep) if isinstance(x, str) else x) for item in sublist] - years = df.loc[df.index.repeat(df[tag].apply(lambda x: len(x.split(sep)) if isinstance(x, str) else len(x))), 'PY'].values + + # PATCH 4: iterating over elements without type checking — if an element is + # neither a string nor a list (e.g. None or float NaN after dropna on other + # columns), iterating over it crashes with TypeError. + # → skip elements that are not string or list before expanding. + def safe_split(x): + if isinstance(x, str): + return x.split(sep) + if isinstance(x, list): + return x + return [] + + expanded = [item.upper() for sublist in df[tag].apply(safe_split) for item in sublist] + years = df.loc[ + df.index.repeat(df[tag].apply(lambda x: len(x.split(sep)) if isinstance(x, str) else len(x) if isinstance(x, list) else 0)), + 'PY' + ].values data = pd.DataFrame({'Term': expanded, 'Year': years}) - - # Rimuovi terms + + # Remove terms if remove_terms: data = data[~data['Term'].str.upper().isin([term.upper() for term in remove_terms])] - - # Gestione dei sinonimi + + # Handle synonyms if synonyms: for main_term, syns in synonyms.items(): data['Term'] = data['Term'].replace(syns, main_term.upper()) - - # Aggregazione + + # PATCH 3: if data is empty after filtering (all terms removed or no valid + # rows), data['Year'].min() and .max() return NaN and range(NaN, NaN) + # crashes with TypeError. + # → return an empty DataFrame with just a Year column instead of crashing. + if data.empty: + return pd.DataFrame(columns=['Year']) + + # Aggregation freq = data.groupby(['Term', 'Year']).size().reset_index(name='Freq') - year_range = range(data['Year'].min(), data['Year'].max() + 1) - - # Selezione dei termini più frequenti + year_range = range(int(data['Year'].min()), int(data['Year'].max()) + 1) + + # Select most frequent terms top_terms = freq.groupby('Term')['Freq'].sum().nlargest(top).index freq = freq[freq['Term'].isin(top_terms)] - # Costruzione del dataframe finale + # Build final DataFrame results = pd.DataFrame({'Year': year_range}) for term in top_terms: term_freq = freq[freq['Term'] == term].set_index('Year')['Freq'] diff --git a/functions/get_worldmapcollaboration.py b/functions/get_worldmapcollaboration.py index 9edafa879..ce7a5b322 100644 --- a/functions/get_worldmapcollaboration.py +++ b/functions/get_worldmapcollaboration.py @@ -6,112 +6,276 @@ import plotly.express as px import plotly.graph_objects as go + def get_world_map_collaboration(df, edges_min=1, edgesize=5): - # Estrai metadati dai paesi (assumi che tu abbia già AU_CO processato) + + # Extract metadata M = df + df = metaTagExtraction(df, "AU_CO") - df = df.get() + # PATCH: metaTagExtraction may return a reactive or a plain DataFrame + df = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # ---------------- SAFE COUNTRY COLUMN PATCH ---------------- + df["AU_CO"] = df["AU_CO"].fillna("").apply( + lambda x: x if isinstance(x, list) else [x] + ) - # Normalizza e conta le occorrenze dei paesi (come in get_countries_production) - df["AU_CO"] = df["AU_CO"].apply(lambda x: x if isinstance(x, list) else [x]) df = df.explode("AU_CO") + df["AU_CO"] = df["AU_CO"].astype(str).str.strip() + + df = df[df["AU_CO"] != ""] + + # ---------------- COUNTRY NORMALIZATION ---------------- def clean_country_names(country): + corrections = { "USA": "UNITED STATES OF AMERICA", "UK": "UNITED KINGDOM", "SOUTH KOREA": "KOREA", } - return corrections.get(str(country).upper().strip(), str(country).upper().strip()) + + return corrections.get( + str(country).upper().strip(), + str(country).upper().strip() + ) df["AU_CO"] = df["AU_CO"].apply(clean_country_names) - # Calcola la frequenza dei paesi - country_counts = df["AU_CO"].value_counts().reset_index() + # ---------------- SAFE COUNTRY COUNTS PATCH ---------------- + country_counts = ( + df["AU_CO"] + .value_counts() + .reset_index() + ) + country_counts.columns = ["Tab", "Freq"] - # Costruisci matrice di collaborazione - net = biblionetwork(M, analysis="collaboration", network="countries") + country_counts["Freq"] = pd.to_numeric( + country_counts["Freq"], + errors="coerce" + ).fillna(0) + + # ---------------- SAFE NETWORK PATCH ---------------- + net = biblionetwork( + M, + analysis="collaboration", + network="countries" + ) + + if net is None or len(net) == 0: + empty_fig = go.FigureWidget(go.Figure()) + return empty_fig, pd.DataFrame() + net_df = pd.DataFrame(net) - # Costruisci rete + # ---------------- BUILD NETWORK ---------------- G = nx.from_pandas_adjacency(net_df) + COedges = [] + for u, v, d in G.edges(data=True): + if u != v: - COedges.append({'From': u, 'To': v, 'count': net_df.loc[u, v]}) + + COedges.append({ + 'From': u, + 'To': v, + 'count': net_df.loc[u, v] + }) + COedges = pd.DataFrame(COedges) + if not COedges.empty: - COedges = COedges[COedges['From'] != COedges['To']] - COedges['key'] = COedges.apply(lambda x: tuple(sorted([x['From'], x['To']])), axis=1) - COedges = COedges.groupby('key').agg({'From': 'first', 'To': 'first', 'count': 'sum'}).reset_index(drop=True) + + COedges = COedges[ + COedges['From'] != COedges['To'] + ] + + COedges['key'] = COedges.apply( + lambda x: tuple(sorted([x['From'], x['To']])), + axis=1 + ) + + COedges = ( + COedges + .groupby('key') + .agg({ + 'From': 'first', + 'To': 'first', + 'count': 'sum' + }) + .reset_index(drop=True) + ) + tab = COedges.copy() - COedges = COedges[COedges['count'] >= edges_min] + + COedges = COedges[ + COedges['count'] >= edges_min + ] + else: - tab = pd.DataFrame(columns=['From', 'To', 'count']) - # Carica geometrie mondo + tab = pd.DataFrame( + columns=['From', 'To', 'count'] + ) + + # ---------------- LOAD WORLD GEOMETRY ---------------- url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip" + world = gpd.read_file(url) - world['Nations'] = world['SOVEREIGNT'].str.upper().str.strip() + + world['Nations'] = ( + world['SOVEREIGNT'] + .str.upper() + .str.strip() + ) + world['Nations'] = world['Nations'].replace({ "USA": "UNITED STATES OF AMERICA", "UK": "UNITED KINGDOM", "SOUTH KOREA": "KOREA" }) - world = world.dissolve(by="Nations").reset_index() - # Unisci i dati dei paesi con le frequenze corrette - country_prod = world.merge(country_counts, how='left', left_on='Nations', right_on='Tab') - country_prod = country_prod.drop_duplicates(subset=['Nations']) - country_prod['Freq'] = country_prod['Freq'].fillna(0) + world = world.dissolve( + by="Nations" + ).reset_index() + + # ---------------- MERGE COUNTRY FREQUENCIES ---------------- + country_prod = world.merge( + country_counts, + how='left', + left_on='Nations', + right_on='Tab' + ) + + country_prod = country_prod.drop_duplicates( + subset=['Nations'] + ) + + country_prod['Freq'] = ( + country_prod['Freq'] + .replace([float('inf'), float('-inf')], pd.NA) + .fillna(0) + ) - # Ottieni coordinate centroidi + # ---------------- SAFE CENTROID PATCH ---------------- countries_coords = world[['Nations', 'geometry']].copy() - countries_coords['Longitude'] = countries_coords['geometry'].centroid.x - countries_coords['Latitude'] = countries_coords['geometry'].centroid.y - # Aggiorna manualmente le coordinate di UNITED KINGDOM e FRANCIA - countries_coords.loc[countries_coords['Nations'] == 'UNITED KINGDOM', ['Longitude', 'Latitude']] = [-3.4360, 55.3781] - countries_coords.loc[countries_coords['Nations'] == 'FRANCE', ['Longitude', 'Latitude']] = [2.2137, 46.6034] + countries_coords['Longitude'] = ( + countries_coords.geometry.centroid.x + ) + + countries_coords['Latitude'] = ( + countries_coords.geometry.centroid.y + ) + + countries_coords['Longitude'] = pd.to_numeric( + countries_coords['Longitude'], + errors='coerce' + ).fillna(0) - # Aggiungi Singapore manualmente se non presente + countries_coords['Latitude'] = pd.to_numeric( + countries_coords['Latitude'], + errors='coerce' + ).fillna(0) + + # ---------------- MANUAL COORDINATE FIXES ---------------- + countries_coords.loc[ + countries_coords['Nations'] == 'UNITED KINGDOM', + ['Longitude', 'Latitude'] + ] = [-3.4360, 55.3781] + + countries_coords.loc[ + countries_coords['Nations'] == 'FRANCE', + ['Longitude', 'Latitude'] + ] = [2.2137, 46.6034] + + # ---------------- SINGAPORE PATCH ---------------- if 'SINGAPORE' not in countries_coords['Nations'].values: - # Coordinate approssimative per Singapore + singapore_row = pd.DataFrame([{ 'Nations': 'SINGAPORE', 'geometry': None, 'Longitude': 103.8198, 'Latitude': 1.3521 }]) - countries_coords = pd.concat([countries_coords, singapore_row], ignore_index=True) + countries_coords = pd.concat( + [countries_coords, singapore_row], + ignore_index=True + ) + + # ---------------- COUNTRY NAME FIX ---------------- def fix_country_name_for_merge(country): + if country == "USA": return "UNITED STATES OF AMERICA" + if country == "UK": return "UNITED KINGDOM" + if country == "SOUTH KOREA": return "KOREA" + return country + # ---------------- EDGE COORDINATES ---------------- if not COedges.empty: - COedges['From'] = COedges['From'].apply(fix_country_name_for_merge) - COedges['To'] = COedges['To'].apply(fix_country_name_for_merge) - COedges = COedges.merge(countries_coords, left_on='From', right_on='Nations', how='left') - COedges = COedges.rename(columns={'Longitude': 'Longitude_x', 'Latitude': 'Latitude_x'}) - COedges = COedges.merge(countries_coords, left_on='To', right_on='Nations', how='left', suffixes=('', '_y')) - COedges = COedges.rename(columns={'Longitude': 'Longitude_y', 'Latitude': 'Latitude_y'}) - # Replace NaN or infinite values in coordinates with 0 (o altro valore di default) - for col in ['Longitude_x', 'Latitude_x', 'Longitude_y', 'Latitude_y']: - COedges[col] = COedges[col].replace([float('inf'), float('-inf')], pd.NA) - COedges[col] = COedges[col].fillna(0) - country_prod['Freq'] = country_prod['Freq'].replace([float('inf'), float('-inf')], pd.NA).fillna(0) + COedges['From'] = COedges['From'].apply( + fix_country_name_for_merge + ) + + COedges['To'] = COedges['To'].apply( + fix_country_name_for_merge + ) + + COedges = COedges.merge( + countries_coords, + left_on='From', + right_on='Nations', + how='left' + ) + + COedges = COedges.rename(columns={ + 'Longitude': 'Longitude_x', + 'Latitude': 'Latitude_x' + }) + + COedges = COedges.merge( + countries_coords, + left_on='To', + right_on='Nations', + how='left', + suffixes=('', '_y') + ) + + COedges = COedges.rename(columns={ + 'Longitude': 'Longitude_y', + 'Latitude': 'Latitude_y' + }) + for col in [ + 'Longitude_x', + 'Latitude_x', + 'Longitude_y', + 'Latitude_y' + ]: - # Crea mappa base + COedges[col] = ( + COedges[col] + .replace( + [float('inf'), float('-inf')], + pd.NA + ) + .fillna(0) + ) + + # ---------------- BASE MAP ---------------- geojson_data = country_prod.__geo_interface__ + fig = px.choropleth( country_prod, geojson=geojson_data, @@ -123,23 +287,46 @@ def fix_country_name_for_merge(country): color_continuous_scale=px.colors.sequential.Blues, ) - # Aggiungi edges (collaborazioni) + # ---------------- SAFE EDGE WIDTH PATCH ---------------- if not COedges.empty: + for _, row in COedges.iterrows(): - width = max(0.5, (row['count'] / edges_min) * edgesize) # garantisce spessore minimo + + safe_count = max(row['count'], 1) + + width = max( + 0.5, + (safe_count / max(edges_min, 1)) * edgesize + ) + fig.add_trace( go.Scattergeo( - lon=[row['Longitude_x'], row['Longitude_y']], - lat=[row['Latitude_x'], row['Latitude_y']], + lon=[ + row['Longitude_x'], + row['Longitude_y'] + ], + lat=[ + row['Latitude_x'], + row['Latitude_y'] + ], mode='lines', - line=dict(width=width, color='firebrick'), + line=dict( + width=width, + color='firebrick' + ), opacity=0.4, hoverinfo='text', - text=f"Collaboration between {row['From']} and {row['To']}: {row['count']}", + text=( + f"Collaboration between " + f"{row['From']} and " + f"{row['To']}: " + f"{row['count']}" + ), showlegend=False ) ) + # ---------------- MAP SETTINGS ---------------- fig.update_geos( showcoastlines=True, showland=True, @@ -148,17 +335,18 @@ def fix_country_name_for_merge(country): countrycolor="gray", fitbounds="locations", visible=False, - projection_type="natural earth", # Imposta una proiezione statica - lataxis_range=[-60, 85], # Limita la latitudine per evitare lo scroll verticale - lonaxis_range=[-180, 180], # Limita la longitudine per evitare lo scroll orizzontale + projection_type="natural earth", + lataxis_range=[-60, 85], + lonaxis_range=[-180, 180], resolution=110, - center=dict(lat=10, lon=0), # Centra la mappa - scope="world" # Limita la visualizzazione al mondo + center=dict(lat=10, lon=0), + scope="world" ) fig.update_layout( margin=dict(l=0, r=0, t=0, b=0), height=850, + geo=dict( lakecolor='white', projection_scale=1, @@ -173,7 +361,9 @@ def fix_country_name_for_merge(country): lonaxis_range=[-180, 180], scope="world" ), + coloraxis_showscale=True, + coloraxis=dict( colorbar=dict( orientation='h', @@ -183,12 +373,27 @@ def fix_country_name_for_merge(country): x=0.5 ) ), + showlegend=False ) - fig.update_traces(hovertemplate=None) + fig.update_traces( + hovertemplate=None + ) + fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} + + fig._config = ( + fig._config | + { + 'modeBarButtonsToRemove': [ + 'pan', + 'select', + 'lasso2d', + 'toImage' + ], + 'displaylogo': False + } + ) return fig, tab \ No newline at end of file diff --git a/limitations.md b/limitations.md new file mode 100644 index 000000000..8b1918994 --- /dev/null +++ b/limitations.md @@ -0,0 +1,60 @@ +# OpenAlex Data-Characteristic Limitations + +This document records dashboard behaviors that stem from inherent gaps between OpenAlex's data model and the Web of Science (WoS) schema the dashboard was originally built for, rather than from bugs in the ETL pipeline or dashboard code. + +## Keywords Plus (`ID`) — structurally absent from OpenAlex + +OpenAlex has no equivalent of WoS's proprietary Keywords Plus algorithm, so the `ID` field is populated as an empty list for every record (consistent with PubMed, where it is equally absent). This propagates into every panel that depends on `ID` as a text source: + +- Most Frequent Words +- WordCloud +- TreeMap +- Words' Frequency over Time +- Trend Topics +- Co-occurrence Network +- Thematic Map +- Thematic Evolution +- Factorial Approach + +All of the above return empty results when Keywords Plus is selected as the field. This is not a processing failure, there is no underlying text to analyze. + +## Subject Categories (`WC`) — no OpenAlex equivalent exists + +Unlike `ID`, which is explicitly created and filled with an empty value, `WC` is never added to the standardized schema for OpenAlex at all, since WoS-style subject category classification has no corresponding field in either source API. Selecting Subject Categories in Most Frequent Words / WordCloud / TreeMap now returns an empty result rather than crashing: `table_tag()` checks `if tag not in M.columns` before accessing the column, so the missing field degrades the same way `ID` does above (see "Keywords Plus" — no underlying text to analyze, so the panel is empty by design, not by failure). + +## Author Institutions (`AU_UN`) — affiliation string format incompatible with WoS-style parsing + +Collaboration Network produces no output when Field is set to Institutions. The institution-extraction logic scans comma-separated segments of the affiliation string for WoS-convention tags (e.g. `UNIV`, `INST`, `COLL`); OpenAlex's `raw_affiliation_strings` don't follow that same comma-segmented structure, so the heuristic largely fails to isolate clean institution names. This is the same underlying affiliation-format mismatch already documented for author-country extraction, just manifesting in a different downstream feature. + +# PubMed Data-Characteristic Limitations + +This document records dashboard behaviors that stem from inherent gaps between what PubMed's API returns and the Web of Science (WoS) schema the dashboard was originally built for, rather than from bugs in the ETL pipeline or dashboard code. + +## Cited References (`CR`) — recovered for only a small fraction of records + +PubMed's reference list is captured for roughly 7.5% of records in the 200-row test sample. Every analysis that depends on matching cited references within the sample itself, rather than simply storing them, is sensitive to this sparsity: +- Sources' Local Impact +- Most Local Cited Authors +- Authors' Local Impact +- Co-citation Network +- Cluster by Coupling +- Historiograph +- Three-Field Plot (when Cited Sources is selected) +With so few within-sample citation links available, these panels have nothing to build a network or score from. This is not a processing failure, there is no underlying reference data to match against. + +## Keywords Plus (`ID`) — structurally absent from PubMed + +PubMed has no equivalent of WoS's proprietary Keywords Plus algorithm, so the `ID` field is populated as an empty list for every record (consistent with OpenAlex, where it is equally absent). This propagates into every panel that depends on `ID` as a text source: +- Most Frequent Words +- WordCloud +- TreeMap +- Words' Frequency over Time +- Co-occurrence Network +- Thematic Map +- Factorial Approach +- Historiograph +- Three-Field Plot (when Keywords Plus is selected) +All of the above return empty results when Keywords Plus is selected as the field. This is not a processing failure, there is no underlying text to analyze. + +## Publication Year (`PY`) distribution — narrow and skewed in the test sample +Thematic Evolution produces no output for any field, including Titles, which is fully populated text-wise. The 200-row test sample spans only 4 distinct publication years (2023–2026), with 139 of the 200 rows concentrated in 2024 alone, leaving the year-binning step without enough spread across periods to form usable time slices. This is a property of the test sample's composition rather than a defect in the field content or the binning logic itself. diff --git a/patching_documented.md b/patching_documented.md new file mode 100644 index 000000000..28b338962 --- /dev/null +++ b/patching_documented.md @@ -0,0 +1,483 @@ +# Patching Documentation + +> **Update note (post-verification):** every mention below of *"OpenAlex: CR contains URLs"* described the state of the ETL **before** `resolve_openalex_references()` was added to `standardizer.py`. Direct inspection of `test_openalex_200.csv` (the 200-row OpenAlex test set) shows `CR` fully resolved into `"Author, Year, Journal"`-style strings for 200/200 rows, with zero raw `openalex.org` URLs remaining. The eight entries below that referenced the old behavior have been corrected accordingly; the corrected line is marked **[UPDATED]**. + +## Services + +### `www/services/biblionetwork.py` +**Status:** PASS (all fields, both sources) +**Patches applied:** +- None check on input M: if M is None, prints a message and returns None gracefully instead of crashing downstream. +- None checks on cocMatrix return values: each branch checks if WA, WCR, WSO, WCO etc. are None before attempting matrix multiplication — returns None gracefully if any required matrix is missing. +- M.get() → isinstance check in final cleanup: after computing NetMatrix, unwrap M with isinstance check before accessing M.columns. Reason: M may already be a plain DataFrame after term_extraction. +- db_name default changed from hardcoded "web_of_science" to "": unknown sources no longer silently get treated as WoS. +- db_name normalization to lowercase: `str(M["DB"].iloc[0]).lower()` ensures consistent comparison regardless of DB value casing. +- Scopus reference filter now checks `db_name == "scopus"` (lowercase) to match the normalized db_name. +- label_short — added "openalex" and "pubmed" to the WoS branch: both sources produce SR strings in the same "Author, Year, Journal" format, so they are routed to the same label shortening logic. +- label_short — unknown sources: labels returned unchanged instead of crashing. + +### `cocmatrix.py` +**Status:** PASS (all fields, both sources) +**Patches applied:** +- df.get() → isinstance check at the top: unwrap Shiny reactive or use plain DataFrame directly. Reason: pandas .get() requires a column name as argument, crashes without one. Fix: `df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df`. +- None/empty check on input: if M is None or empty, prints a message and returns None gracefully. +- SR column fallback: if LABEL is not in columns, falls back to SR as the index — prints a message and returns None if SR is also missing. +- Field existence check: if the requested field is not a column in M, prints a message and returns None instead of crashing with KeyError. +- CR field safety: `DOI;` → `DOI` replacement applied only when CR contains lists, avoiding TypeError on non-list entries. +- Empty matrix guard: if uniqueField is empty after filtering, prints "Matrix is empty!!" and returns None gracefully instead of creating a zero-column matrix. +- reduceRefs type check: skips non-string entries in refs list with `isinstance(ref, str)` check to avoid AttributeError on None or numeric values. + +### `couplingmap.py` +**Status:** PASS (both sources) +**Patches applied:** +- couplingMap — df.get() → isinstance check: after metaTagExtraction, unwrap result with isinstance check to get plain DataFrame M. +- couplingMap — network() None guard: network() returns None when the matrix is empty (e.g. empty CR for PubMed, or any source with too few citation links for the chosen field). Return None gracefully instead of crashing on `Net['graph']`. **[UPDATED]** +- couplingMap — normalizeCitationScore() None guard: normalizeCitationScore may return None if localCitations fails. Return None gracefully. +- couplingMap — empty cluster filter guard: if df is empty after the frequency filter (`df['freq'] >= minfreq`), return None gracefully instead of crashing on downstream computations. +- normalizeCitationScore — localCitations None guard: localCitations may return None if histNetwork finds no citations. Return None gracefully. +- normalizeCitationScore — isinstance check for reactive unwrapping in global impact branch. +- localCitations — df.get() → isinstance check after metaTagExtraction. +- localCitations — None/empty check on M after unwrapping. +- localCitations — histNetwork None guard: histNetwork may return None when no local citations are found. Return None gracefully. +- localCitations — zero LCS guard: if all LCS values are 0, return None to avoid propagating empty results downstream. +- network — isinstance check for df_plain before passing to term_extraction or biblionetwork. +- network — None guard on NetMatrix: if NetMatrix is None or matrix is empty, print message and return None gracefully. +- labeling — removed reactive.Value wrapper: df is already a plain DataFrame when passed to term_extraction inside labeling. + +### `format_functions.py` +**Status:** PASS (import check only) +**Patches applied:** +- PATCH 1 — columns NameError guard in process_single_file: `columns` was referenced without being defined in local scope, causing NameError. Fix: use `globals().get('columns', [])` to safely fall back to an empty list if columns is not defined. +- PATCH 2 — entry.get() TypeError guard in process_single_file: entries from bibtexparser may not support .get() with a default — wrapped in try/except to avoid silent KeyError or AttributeError crashes. +- PATCH 3 — author name unpacking guard in format_au_column for Scopus BibTeX: original code used `surname, names = person.split(", ")` without checking the number of parts — if the string contains no comma+space the unpacking crashes with ValueError. Fix: guard with `len(parts) == 2` check before unpacking. +- biblio_json — ETL CSV passthrough: added support for standardized CSV files produced by the ETL pipeline. If the CSV contains the standard WoS-like columns (TI, AU, PY, SO, SR, DB), it is passed through directly as JSON without re-parsing through the old source-specific formatters. + +### `histnetwork.py` +**Status:** PASS (both sources) +**Patches applied:** +- histNetwork — df.get() → isinstance check: original code called df.get() without arguments, crashing on a plain pandas DataFrame. Fix: `if isinstance(df, pd.DataFrame): M = df.copy() else: M = df.get().copy()`. +- histNetwork — None/empty check on M after unwrapping: if M is None or empty, return None gracefully. +- histNetwork — DB column missing guard: if DB column is absent, return None gracefully instead of crashing on `M['DB'].iloc[0]`. +- histNetwork — DI missing guard: if DI column is absent, fill with empty strings before processing. +- histNetwork — CR missing guard: if CR column is absent, print message and return None gracefully. +- histNetwork — CR list normalization: ensure CR entries are always lists before processing, handling string and NaN cases. +- histNetwork — TC and PY numeric conversion: `pd.to_numeric(..., errors='coerce')` applied to both to avoid arithmetic errors on string values. +- histNetwork — DB routing extended: added "OPENALEX" and "PUBMED" to the `wos()` branch. Both sources produce SR and DI fields in the format `wos()` expects, and OpenAlex's CR is resolved into the same "Author, Year, Journal" format via `resolve_openalex_references()` upstream in `standardizer.py` (verified 200/200 clean on the OpenAlex test set), so the same matching logic applies cleanly to both sources without a crash or an accuracy penalty. **[UPDATED]** +- wos — required columns check: if PY or CR are missing, print message and return None gracefully. +- wos — empty CR_df early return: if no valid references were parsed (e.g. empty CR for PubMed, or a record whose references failed to resolve), return early with LCS=0 for all documents and NetMatrix=None instead of hanging. **[UPDATED]** +- wos — SR_FULL missing guard: if SR_FULL column is absent, fill with empty strings before building LABEL. +- wos — optional columns guard: if TI, DE, or ID are missing, fill with empty strings before building histData. +- wos — reactive.Value(M) removed before cocMatrix call: reactive.Value is a Shiny-specific object that crashes outside a running Shiny application. Fix: pass M directly since cocMatrix already handles plain DataFrames. +- scopus — required columns check: if CR or SR are missing, print message and return None gracefully. +- scopus — optional columns guard: if AU, BP, EP, SR_FULL, TI, DE, ID, or DI are missing, fill with safe defaults before processing. + +### `metatagextraction.py` +**Status:** PASS after patching (all fields, both sources) +**Patches applied:** +- metaTagExtraction — isinstance check replacing `hasattr(df, "get")`: original code used `hasattr(df, "get")` to detect Shiny reactive objects, but pandas DataFrames also have `.get()`, so the check always resolved to True and called `df.get()` without arguments — crashing because pandas `.get()` requires a column name. Fix: `if isinstance(df, pd.DataFrame): M = df.copy() else: M = df.get().copy()`. +- SR — infinite loop fix: original while loop caused an infinite loop in pandas >= 2.0 when deduplicating SR values. Fix: replaced with a dict-based seen-counter that iterates over the index once, appending -b, -c, etc. for duplicates. +- SR — NaN guard before deduplication loop: added `.fillna("").astype(str).reset_index(drop=True)` before the seen-counter loop to prevent NaN values from being stored as keys and producing malformed SR strings. +- SR — JI empty string fallback: `M.loc[no_art, "JI"] = M.loc[no_art, "SO"]` fills rows where JI is "" with SO, preventing ", , " gaps in the SR string when JI is missing. +- SR — DB case normalization in author formatting: `M["DB"].iloc[0].lower() == "scopus"` normalizes the DB value to lowercase before comparison, making the author name reformatting robust to mixed-case DB values like "Scopus" or "SCOPUS". +- CR_SO — None replaced with "" for empty rows: original returned None for articles with no parsed cited sources (`lambda l: ";".join(l) if l else None`). None in a string column crashes downstream `.str.*` operations. Fix: `lambda l: ";".join(l) if l else ""`. +- AU_CO / AU1_CO — fillna float NaN guard: `M["C1"].fillna(M["RP"])` can produce numpy.float64 NaN when both C1 and RP are missing, making the cell non-iterable and crashing the country extraction loop. Fix: added `.infer_objects(copy=False)` and a follow-up `.apply(lambda x: x if isinstance(x, list) else ([] if pd.isna(x) else [x]))` to guarantee every cell is a list before iteration. +- AU_CO / AU1_CO — empty list fallback when both C1 and RP are missing: the explicit for loop after fillna sets `C1.at[i] = []` when the cell is still an empty list and RP is also NaN, preventing downstream iteration over None or float. +- AU_CO / AU1_CO — country name normalization before regex search: "RUSSIAN FEDERATION" is not present in countries.txt (listed as "RUSSIA"), so matches silently failed. Fix: applied `.replace("RUSSIAN FEDERATION", "RUSSIA")` and equivalent aliases (UNITED STATES → USA, ENGLAND / SCOTLAND / WALES / NORTH IRELAND → UNITED KINGDOM) to the input string before the regex search, not only to the output list. +- AU1_CO — None replaced with "" for country not found: original returned None when no country matched. Fix: `if pd.notna(country) else ""`. Note: downstream consumers checking `if country is None` must be updated to `if not country` to catch the empty string. +- AU_UN — `M.loc[condition, "AU_UN"]` replacing `M["AU_UN"].loc[...]`: original assignment syntax triggered SettingWithCopyWarning and could silently fail to modify the underlying DataFrame in some pandas versions. Fix: `M.loc[M["C3"].notna() & (M["C3"] != ""), "AU_UN"] = M["C3"]`. +- AU_UN — None replaced with "" in replace dict: original used `replace({"NOTDECLARED": None, "NOTREPORTED": None})`, which inserts None into a string column and crashes subsequent `.str.*` calls. Fix: `replace({"NOTDECLARED": "", "NOTREPORTED": ""})`. + +### `networkplot.py` +**Status:** PASS (all sources) +**Patches applied:** +- network_plot — empty graph guard on entry: after building bsk_network from NetMatrix, if the graph has no vertices or deg is empty, return None immediately instead of crashing on subsequent operations. +- network_plot — deg recomputed after degree-based filtering: after `delete_vertices()` in the degree branch, deg and `bsk_network.vs["deg"]` were stale. Fix: recompute both immediately after deletion. +- network_plot — deg recomputed after n-based filtering: same stale-deg issue in the n branch. Fix: recompute both immediately after deletion. +- network_plot — empty graph guard after filtering: after either filtering branch, check `len(bsk_network.vs) == 0` and return None gracefully before attempting simplification or clustering. +- network_plot — deg recomputed after isolate removal: after `delete_vertices(isolates)`, deg and `bsk_network.vs["deg"]` were stale. Fix: recompute both immediately after deletion. +- network_plot — empty graph guard after isolate removal: after removing isolates, check `len(bsk_network.vs) == 0` and return None gracefully before attempting clustering. +- network_plot — safe deg attribute access in label filtering: `bsk_network.vs["deg"]` raises a KeyError if the attribute was never set (e.g. after external filtering). Fix: `deg_vals = bsk_network.vs["deg"] if "deg" in bsk_network.vs.attributes() else bsk_network.degree()`. +- clustering_network — try/except around all clustering calls: several igraph community detection algorithms (spinglass, leading_eigenvector, infomap) raise exceptions on small, disconnected, or unweighted graphs. Fix: wrapped the entire if/elif chain in `try/except Exception`, falling back to a single-cluster assignment (`membership = [0] * n`) so the rest of the pipeline can continue. +- switch_layout — division-by-zero guard in coordinate normalization: when all nodes share the same layout coordinate on an axis (e.g. a single-node graph or perfectly collinear layout), range_coords is zero and normalization produces NaN. Fix: `range_coords[range_coords == 0] = 1` before dividing. + +### `tabletag.py` +**Status:** PASS (both sources). No patches required. + +### `termextraction.py` +**Status:** PASS (both sources) +**Patches applied:** +- term_extraction — reactive vs DataFrame detection fixed: original used `hasattr(df, 'get')` to detect Shiny reactive objects, but pandas DataFrames also have a `.get()` method, causing `df.get()` to be called without arguments on plain DataFrames and crashing. Fix: `is_reactive = hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame)`, then `M = df.get() if is_reactive else df.copy()`. +- term_extraction — reactive return path: original always called `df.set(M)` and returned df regardless of whether df was reactive. For plain DataFrames `df.set()` does not exist and crashes. Fix: `if is_reactive: df.set(M); return df else: return M` — only the reactive path calls `.set()`. + +### `thematicmap.py` +**Status:** PASS (both sources) +**Patches applied:** +- thematic_map — reactive vs DataFrame detection fixed: original used `hasattr(df, 'get')` which is True for plain pandas DataFrames too. Fix: `not isinstance(df, pd.DataFrame)` guard added so `df.get()` is only called on actual Shiny reactive objects; plain DataFrames are copied directly. +- thematic_map — M_plain extracted for term_extraction calls: term_extraction expects a plain DataFrame, not a reactive wrapper. Fix: M_plain is unwrapped from the reactive object before being passed to term_extraction in the TI and AB branches. +- thematic_map — TI branch: term_extraction run on M_plain, then result wrapped back in reactive.Value before passing to biblionetwork, and `m["TI_TM"]` updated so cluster_assignment can access it downstream. +- thematic_map — AB branch: same pattern as TI — term_extraction run on M_plain, result wrapped in reactive.Value for biblionetwork, and `m["AB_TM"]` updated for cluster_assignment. +- thematic_map — NetMatrix empty/None guard: biblionetwork can return None or an empty DataFrame when the keyword column is absent or has no co-occurrences (e.g. PubMed DE is always empty from the eSummary API). Fix: `if NetMatrix is not None and not NetMatrix.empty` check before calling network_plot, returning a graceful `None, None, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()` tuple otherwise. +- thematic_map — Net None guard: network_plot can return None on small or empty graphs. Fix: explicit `if Net is None` check after the network_plot call, returning the same safe empty tuple. +- thematic_map — node_colors None guard: `net.vs['color']` can contain None entries if clustering produced uncolored nodes. Fix: `node_colors = ["#D3D3D3" if c is None else c for c in node_colors]` applied immediately after extraction. +- thematic_map — DI missing guard in cluster_assignment: if DI is absent from the DataFrame, the column selection `['DI', 'AU', 'TI', 'SO', 'PY', 'TC', 'TCpY', 'NTC', 'SR']` crashes with a KeyError. Fix: `if 'DI' not in M.columns: M['DI'] = np.nan` before the assign block. +- thematic_map — TC non-numeric guard in cluster_assignment: `M['TC'] / (year - M['PY'])` crashes if TC contains strings or NaN. Fix: `pd.to_numeric(M['TC'], errors='coerce').fillna(0)` applied before the arithmetic. +- thematic_map — PY non-numeric guard in cluster_assignment: same arithmetic crashes if PY is stored as a string. Fix: `pd.to_numeric(M['PY'], errors='coerce')` applied before TCpY calculation. +- cluster_assignment — field column missing guard: if the requested field (or its derived _TM variant) is absent from M, the function crashes immediately on `M[field]`. Fix: `if field not in M.columns: return pd.DataFrame()` early return. +- cluster_assignment — filtered_df empty guard raised as ValueError: after filtering sEij_df by `df_lab['words']`, if no rows survive (e.g. all keywords were too infrequent or filtered out), the subsequent `.groupby().agg()` produces a silent empty result or crashes. Fix: explicit `if filtered_df.empty: raise ValueError(...)` with a descriptive message before the aggregation block. + +## Functions + +### `get_affiliationproductionovertime.py` +**Status:** PASS (both sources) +**Patches:** +- metaTagExtraction called before use to derive AU_UN column +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` +- Safety check: AU_UN missing after extraction → returns empty figure +- Safety check: AFFY empty after filtering → returns empty figure +- Safety check: AffOverTime empty → returns empty figure + +### `get_annualproduction.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` +- PY column forced to int safely with `pd.to_numeric(errors="coerce").fillna(0)` + +### `get_authorlocalimpact.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 16: `df = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name, crashes without one. Fix: `isinstance(df, pd.DataFrame)` check. + +### `get_authorproductionovertime.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 19: `data = df.get()` → fixed with isinstance check. + +### `get_averagecitations.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 14: `data = df.get()` → fixed with isinstance check. +- Line 32: `current_year - table["PY"]` → TypeError. Reason: PY is stored as string in the standardized DataFrame but the function requires arithmetic subtraction which needs integers. Fix: added `pd.to_numeric(table["PY"], errors="coerce")` before the calculation. + +### `get_bradfordlaw.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 15: `data = df.get()` → fixed with isinstance check. + +### `get_citedcountries.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` +- Filter for empty AU1_CO strings added — dropna alone does not catch empty strings +- Line 110: safety check added before `int(max_x // 10)`. Reason: PubMed has no affiliation data, x_values is empty, `x_values.max()` returns NaN, `int(NaN)` crashes. Fix: return empty figure if x_values is empty or max_x is NaN. +**Known limitations:** +- PubMed returns empty results — eSummary API provides no affiliation data + +### `get_clusteringcoupling.py` +**Status:** PASS (both sources) +**Patches applied:** +- Safety check: couplingMap returns None when network is empty → returns empty figures instead of crashing +**Known limitations:** +- PubMed: CR empty from eSummary API, coupling map cannot be built +- **[UPDATED]** OpenAlex: CR is resolved into WoS-style "Author, Year, Journal" citation strings via `resolve_openalex_references()` (verified 200/200 clean on the test set) — this limitation no longer applies to OpenAlex. + +### `get_co_occurence_network.py` +**Status:** PASS (both sources) +**Patches applied:** +- field_by_year() line 425: PY converted to numeric before percentile calculation. Reason: PY stored as string, `np.percentile` requires numeric values. Fix: `pd.to_numeric(M['PY'], errors='coerce').values` +**Warnings (non-blocking):** +- Line 437: `n[col_idx]` uses deprecated integer indexing on Series. Will break in future pandas versions. Fix: change to `n.iloc[col_idx]` + +### `get_cocitation.py` +**Status:** PASS (both sources) +**Known limitations:** +- PubMed: co-citation matrix empty — CR not returned by eSummary API +- **[UPDATED]** OpenAlex: CR is resolved into WoS-style citation strings via `resolve_openalex_references()`, verified clean on the 200-row test set — co-citation results are no longer limited by raw URLs for this source. + +### `get_collaborationnetwork.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` +- Safety check: network_plot returns None when graph is empty → returns empty figures instead of crashing +**Notes:** +- Field argument accepts "COL_AU", "COL_UN", "COL_CO" +- Tested with COL_AU (author collaboration network) +- COL_UN and COL_CO depend on AU_UN and AU_CO — both guarded with `if "" not in m.columns` before calling metaTagExtraction, so ETL-native values (e.g. `parse_openalex_countries()`'s `AU_CO` for OpenAlex) are preserved when already present. + +### `get_correspondingauthorcountries.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` +- Filter for empty AU1_CO strings — dropna alone does not catch empty strings +- Safety check after filtering — if all countries were blank, returns empty figure instead of crashing +**Known limitations:** +- Results will be empty for PubMed and limited for OpenAlex because affiliation data (C1) is often missing, so AU1_CO cannot be derived + +### `get_countriesproduction.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` +- Filter for empty AU_CO strings after explode — prevents empty country strings from being counted +**Known limitations:** +- Results will be limited for OpenAlex and empty for PubMed because affiliation data (C1) is often missing, so AU_CO cannot be derived + +### `get_countriesproductionovertime.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` +- Safety check: AFFY empty after filtering → returns empty figure +- Safety check: AffOverTime empty → returns empty figure +**Known limitations:** +- Results will be limited for OpenAlex and empty for PubMed because affiliation data (C1) is often missing, so AU_CO cannot be derived + +### `get_factorialanalysis.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 82: Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` +- Line 91: df_plain passed to conceptual_structure instead of original df — ensures plain DataFrame is used, not the reactive wrapper +- (line 244): safety check if all Dim2 values are equal — range is 0 and label_offset would cause division by zero +- (line 614): safety check if `results.get()` returns None — neither 'df' nor 'res' key exists in results +- (line 593): safety check if all terms filtered out by min_degree — CW would be empty DataFrame +- (line 637): safety check if n_clusters greater than number of available terms +- (line 818): safety check if all points equidistant from centroid +- Line 549: `CW.loc` crashes when CW is None. Reason: cocMatrix returns None when ID field is empty (Keywords Plus always empty for OpenAlex and PubMed). Fix: added None check before `CW.loc` call, returns empty result instead of crashing. +**Known limitations:** +- ID (Keywords Plus) always empty for OpenAlex and PubMed so conceptual_structure produces empty results for both sources + +### `get_filters.py` +**Status:** PASS (both sources) +**Patches applied:** +- PY column forced to numeric safely with `pd.to_numeric(errors="coerce").fillna(0).astype(int)` +- TC column forced to numeric safely with same pattern +- Line 15: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `isinstance(df, pd.DataFrame)` check: if it's a DataFrame → copy it directly; if it's a Shiny reactive object → use `.get()` to unwrap it. +**Notes:** +- `get_filtered_table()` in the same file is not testable, it requires Shiny input objects (`input.year_slider()`, `input.languages()`, etc.) only available inside the dashboard + +### `get_frequentwords.py` +**Status:** PASS (all word types, both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` +- Same reactive/DataFrame check for df_plain passed to term_extraction +- `safe_parse()` replaces `eval()` for DE/ID columns — handles malformed strings without crash +- filter with `isinstance(sublist, list)` before iterating — avoids TypeError on None or str in TI/AB path +- remove_terms applied to all tags, not just DE/ID — fixes silent bug where stopword removal was skipped for TI/AB +- wrapped `term_extraction()` call in try/except ValueError — returns `{}` when vocabulary is empty +- table_tag — missing-column guard for non-WoS-proprietary fields (e.g. WC): same fix as above — `if tag not in M.columns: return {}` before direct column access. Fixes `Error in analysis: 'WC'`. +- table_tag — literal "nan" string filtering for AB/TI fields: same root cause and fix as in `get_treemap.py` — drop rows where the field is NaN or the literal string "nan" before `term_extraction()`, plus stray-token stripping in the extracted list. Fixes a leftover "nan" bubble (200 occurrences, 100%) shown on Abstracts. +- get_frequent_words — empty-result safety guard before plotting: this file was missing the `if word_counts.empty: return ...` check already present in `get_wordcloud.py` and `get_treemap.py`. Without it, an empty/degenerate result could leave a stale plot bubble visible in the UI instead of rendering cleanly empty. Fix: added the same guard, returning an empty FigureWidget and table when no words are found. +**Known limitations:** +- AB/PubMed returns empty results — PubMed eSummary API does not return abstracts, so the vocabulary is empty. Not an ETL bug. + +### `get_historiograph.py` +**Status:** PASS (both sources) +**Patches applied:** +- Replaced two `raise ValueError` blocks after `histNetwork()` returns None with a graceful return: empty DataFrame and temp HTML file path instead of crashing, consistent with the pattern used in `get_clusteringcoupling.py` and `get_citedcountries.py`. Removed redundant first `if hist_results is None` check — the second condition already covers it. +- `node_label="ID"` branch: replaced unsafe `eval()` on Author_Keywords with a safe parser that handles list, semicolon-separated, and comma-separated formats without crashing on non-Python strings. +- `node_label="DE"` branch: same safe parser applied to KeywordsPlus field for the same reason. +**Known limitations:** +- PubMed: CR is empty from eSummary API, so histNetwork has nothing to build a citation graph from. +- **[UPDATED]** OpenAlex: CR is resolved into WoS-style "Author, Year, Journal" citation strings via `resolve_openalex_references()` before reaching this function — verified 200/200 clean on the test set — so historiograph output is supported for OpenAlex. Actual output quality still depends on how many of those references match other documents within the sample. + +### `get_localcitedauthors.py` +**Status:** PASS (both sources) +**Patches applied:** +- Reactive/DataFrame check — correctly uses `not isinstance(df, pd.DataFrame)` before calling `.get()` to unwrap Shiny reactive objects +- Early return if all LCS values are 0 — avoids hanging on PubMed data, where CR is empty and histNetwork cannot build a citation graph. **[UPDATED]** +**Known limitations:** +- PubMed: CR is empty from eSummary API, LCS is always 0, function returns empty result. +- **[UPDATED]** OpenAlex: CR is resolved into WoS-style citation strings via `resolve_openalex_references()`, verified 200/200 clean on the test set — local cited authors output is supported for OpenAlex. + +### `get_localciteddocuments.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 16: `M = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `isinstance(df, pd.DataFrame)` check: if it's a DataFrame → use it directly; if it's a Shiny reactive object → use `.get()` to unwrap it. +**Known limitations:** +- PubMed: CR is empty from eSummary API, LCS is always 0, function returns empty result. +- **[UPDATED]** OpenAlex: CR is resolved into WoS-style citation strings via `resolve_openalex_references()`, verified 200/200 clean on the test set — local cited documents output is supported for OpenAlex. + +### `get_localcitedreferences.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 19: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. +- After filtering step: added early return when source_counts is empty. Reason: PubMed CR is always empty, causing max_x to be NaN and crashing downstream with `ValueError: cannot convert float NaN to integer` when computing x-axis ticks. Fix: return `(go.Figure(), empty_df)` gracefully. + +### `get_localcitedsources.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 10: `data = df.get().copy()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df.copy() if isinstance(df, pd.DataFrame) else df.get().copy()`. + +### `get_lotkalaw.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 17: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. + +### `get_maininformations.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 10: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. + +### `get_referencesspectroscopy.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 21: `df = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `df = df if isinstance(df, pd.DataFrame) else df.get()`. +- CR list conversion: CR column entries are joined into semicolon-separated strings before processing if they are lists, as produced by the ETL pipeline. +- Empty table guard: if no references fall within the year range, returns `(empty FigureWidget, empty DataFrame, empty DataFrame)` gracefully instead of crashing downstream. + +### `get_relevantaffiliations.py` +**Status:** PASS (both sources) +**Patches applied:** +- `df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df`. +- metaTagExtraction return handling: AU_UN is a derived field that must be extracted before use, so `metaTagExtraction(df, Field="AU_UN")` is called only when `disambiguation == "yes"`. +- Safety check after extraction: if data is None or empty, returns empty figure and empty DataFrame gracefully. +- Missing AU_UN column guard: if AU_UN is absent after extraction in disambiguation mode, returns empty figure and empty DataFrame gracefully. +- Missing C1 column guard: if C1 is absent in non-disambiguation mode, returns empty figure and empty DataFrame gracefully. +- Empty affiliations guard: if affiliations is empty after explode, returns empty figure and empty DataFrame gracefully. + +### `get_relevantauthors.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 14: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. +- None check before `df.get()`: if df is None, returns `(None, empty DataFrame)` gracefully. +- Empty data check after unwrapping: if data is None or empty, returns `(None, empty DataFrame)` gracefully. +- AU column guard: if AU is missing, fills with empty lists to avoid KeyError downstream. +- AU list format guard: ensures AU entries are always lists, handling string and NaN cases. +- Empty authors check: if no authors are found after flattening, returns `(None, empty DataFrame)` gracefully. + +### `get_relevantsources.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 17: `df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. + +### `get_sourceslocalimpact.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 18: `df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. +- TC and PY numeric casting: `pd.to_numeric(..., errors='coerce')` applied to both TC and PY before index calculations to avoid arithmetic errors on string values. + +### `get_table.py` +**Status:** function uses Shiny UI components. +**Patches applied:** +- Line 68: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. +- Second `df.get()` call in return statement: replaced with `data`, which is already the unwrapped DataFrame from patch 1, avoiding a redundant and potentially crashing second call. +- `data.map(lambda x: x == [])` → replaced with a per-column apply using isinstance check. Reason: applying a lambda cell-by-cell across the entire DataFrame raises TypeError on non-list cells (int, float) in some pandas versions. Fix: `count_empty_lists` function checks `isinstance(x, list) and len(x) == 0` safely per column. + +### `get_thematicevolution.py` / `get_thematic_evolution` +**Patches applied:** +- thematic_evolution – Lines 93–98: removed `reactive.Value(Mk)` wrapper — passing Mk directly to thematic_map. Reason: reactive.Value is a Shiny-specific object that crashes outside a running Shiny application with "No current reactive context". thematic_map already handles plain DataFrames via its own isinstance check. +- thematic_evolution – Lines 87–88: added early return when timeslice returns empty dict. Reason: timeslice returns `{}` when PY is all NaN (PubMed), causing the subsequent for loop to silently skip and results to be None, crashing on `results['Nodes']` downstream. +- get_thematic_evolution – Line 45: added None check on results after the `thematic_evolution()` call. Reason: thematic_evolution returns None when PY is all NaN or no topics are found — accessing `results['Nodes']` on None crashes with TypeError. +- get_thematic_evolution – missing 'Nodes' key guard: thematic_evolution can also return `{"check": False}` (no 'Nodes' key) when one or more periods have zero topic clusters — typically because the chosen field is empty for the data source (e.g. Keywords Plus ID is exclusive to Web of Science and is always empty for OpenAlex/PubMed). Fix: check `not results.get("check", True) or "Nodes" not in results` before unpacking, instead of crashing with `KeyError: 'Nodes'`. +- get_thematic_evolution – empty-result HTML generation: in both fallback cases above (results is None and missing 'Nodes'), the function previously returned None for the HTML network path, which the UI rendered as a broken "Not Found" page. Fix: generate a valid but empty pyvis.Network graph (no nodes/edges) and save it as a temporary HTML file, so the Map tab renders a blank canvas instead of an error. +- get_thematic_evolution – TM return value: the third return value (TM, consumed by the "Time Slice 1/2" tabs) was set to None in the fallback cases above, causing "object of type 'NoneType' has no len()" in the UI, which calls `len()` on it. Fix: return an empty list `[]` instead of None. +- timeslice – NaN PY guard: if PY is entirely NaN, return `{}` gracefully instead of crashing in `pd.cut`. +- timeslice – dropna before pd.cut: drop rows with NaN PY before cutting to avoid non-monotonic bin errors. +- timeslice – sorted breaks: wrap break points with `sorted(set(breaks))` to guarantee strictly increasing, duplicate-free bin edges regardless of whether the user-provided Cutting Year falls outside the actual PY range of the data (previous cause of "bins must increase monotonically"). If fewer than 3 unique edges remain, return `{}` instead of calling `pd.cut`. +- timeslice – empty-period guard: even after deduplication, an out-of-range Cutting Year can produce a bin that is valid for `pd.cut` but contains zero rows. Downstream code (`min()`/`max()` on each period's PY values) crashed with "min() arg is an empty sequence" on such empty periods. Fix: filter out empty sub-DataFrames after splitting; if fewer than 2 non-empty periods remain, return `{}`. +- normalize_to_minus1_1 – if all values are equal, return zeros instead of dividing by zero (range = 0 produces NaN everywhere). +- thematic_evolution – resk_tuple unpacking: thematic_map returns exactly 5 values; original code tried to access index 5 which is always out of range. +- thematic_evolution – nclust derivation: derived directly from clusters DataFrame row count instead of always being None. +- thematic_evolution – inc_matrix accumulation: moved `pd.concat` and downstream processing outside the loop so all periods are accumulated before building the final result. +**Known limitations:** +- Keywords Plus (ID) as Text Source: always empty for OpenAlex/PubMed data (exclusive to Web of Science). With the patches above, this no longer crashes — it produces an empty Map/Table/Time Slice result instead. Use TI, AB, or DE for these data sources. +- PubMed: if PY parsing from the eSummary pubdate field fails entirely, the function returns gracefully (an empty network graph + empty table + empty list, per the patches above). +- OpenAlex: DE keywords are sparse; thematic evolution output may be minimal depending on the year range chosen. + +### `get_thematicmap.py` +**Status:** PASS (both sources) +**Patches applied:** +- None check on thematic_map return value: thematic_map returns None when NetMatrix is empty — unpacking directly would crash with `TypeError: cannot unpack non-iterable NoneType`. Fix: capture full result first, check for None, return safe empty tuple before unpacking. +- Variable rename: `map` shadowed the Python builtin `map()` function — renamed to `thematic_map_result` to avoid the collision. + +### `get_threefieldplot.py` +**Status:** PASS (both sources) +**Patches applied:** +- None/empty check after each cocMatrix call: cocMatrix returns None when the field is empty (e.g. PubMed DE is always empty from eSummary API) — accessing `.shape` on None crashes with AttributeError. Fix: return empty FigureWidget gracefully if any of the three matrices is None or empty. +- early return when n1, n2, or n3 is 0: if cocMatrix returns an empty DataFrame for any field, reassigning LM.index/columns with a mismatched range crashes with `ValueError: Length mismatch`. Fix: return empty FigureWidget early. +- opacity normalization guard: original guard checked `weight_max > 0` but not `weight_max != weight_min` — if all nodes share the same weight, max - min is 0 and normalization produces NaN in every opacity value. Fix: added second condition to ensure range is non-zero before dividing, falling back to min_opacity for all nodes. +- isolated node remapping: if id_map does not cover all values in `Edges['from']` or `Edges['to']`, `.map()` produces NaN — the Sankey crashes with float indices instead of int. Fix: drop edges whose endpoints are not in id_map before remapping, then cast to int. + +### `get_treemap.py` +**Status:** PASS (both sources) +**Patches applied:** +- table_tag — `df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df`. +- table_tag — plain DataFrame passed to term_extraction: term_extraction does not accept Shiny reactive objects — extract plain DataFrame before passing for AB/TI fields. +- table_tag — list filter before iterating: for non-DE/ID fields, added `isinstance(sublist, list)` check before iterating to avoid TypeError when sublist is a string or NaN. +- table_tag — remove_terms applied to all tags: original code only applied remove_terms for some tags. Fix: apply remove_terms filter to the final word_counts dict regardless of tag. +- get_treemap — safety check on empty word_counts: if table_tag returns an empty dict (e.g. PubMed DE is always empty), word_counts DataFrame is empty and `px.treemap` crashes. Fix: return empty FigureWidget and empty table gracefully. +- table_tag — missing-column guard for non-WoS-proprietary fields (e.g. WC): same fix as in `get_wordcloud.py` — `if tag not in M.columns: return Counter()` before direct column access. Fixes `Error in analysis: 'WC'` on Subject Categories. +- table_tag — literal "nan" string filtering for AB/TI fields: missing abstracts/titles sometimes arrive as the literal string "nan" rather than a true NaN (e.g. after a CSV/JSON round-trip). `term_extraction()` was tokenizing that string as a real word, producing a fake "nan" term that displaced or masked genuine terms (observed as a single "nan" block at 100% in the TreeMap on Abstracts). Fix: drop rows where the field is NaN or the literal string "nan" (case-insensitive) before calling `term_extraction()`, plus a second-layer filter stripping any stray "nan" tokens from already-extracted term lists. + +### `get_trendtopics.py` +**Status:** PASS (both sources) +**Patches applied:** +- get_trend_topics — isinstance check for `df.get()`: extract plain DataFrame before passing to term_extraction — it does not accept Shiny reactive objects. +- get_trend_topics — empty result guard: if field_by_year returns None or empty DataFrame, return empty FigureWidget and empty DataFrame gracefully instead of crashing on `px.scatter`. +- field_by_year — isinstance check for `df.get()`: same pattern — unwrap reactive or use plain DataFrame directly. +- field_by_year — cocMatrix None/empty guard: cocMatrix returns None when the field is empty (e.g. PubMed DE is always empty) — return empty DataFrame gracefully. +- field_by_year — PY numeric conversion: PY is stored as string in ETL output — convert to numeric with `pd.to_numeric(..., errors='coerce')` before passing to `np.quantile` to avoid `TypeError: unsupported operand type(s) for -: 'str' and 'str'`. +- field_by_year — safe_quantile empty array guard: if `np.repeat` produces an empty array (zero-frequency term), return `[nan, nan, nan]` gracefully instead of crashing in `np.quantile`. +- field_by_year — timespan type guard: timespan may be passed as an integer (time_window) rather than a `[start, end]` list — `len()` on an int crashes with TypeError. Fix: check `isinstance(timespan, (list, tuple))` before calling `len()`, fall back to data range if not a valid list. +- get_trend_topics — literal "nan" string filtering before term extraction: same root cause as above — for AB/TI fields, missing values stored as the literal string "nan" were being tokenized as a real term, producing a single fake "NAN" point on the plot that displaced genuine terms. Fix: drop rows where field_tt is NaN or the literal string "nan" (case-insensitive) before calling `term_extraction()`; return an empty result gracefully if no rows remain. +- field_by_year — second-layer "nan" token stripping: even after the upstream row-level filter, a stray "nan" token could in principle survive inside an extracted term list alongside valid terms. Added a filter (dropna + list-comprehension check) to strip any such literal "nan" tokens before building the co-occurrence matrix with `cocMatrix()`. + +### `get_wordcloud.py` +**Status:** PASS (both sources) +**Patches applied:** +- table_tag — isinstance check for `df.get()`: unwrap Shiny reactive or use plain DataFrame directly. Reason: pandas `.get()` requires a column name as argument, crashes without one. +- table_tag — plain DataFrame passed to term_extraction: term_extraction does not accept Shiny reactive objects — extract plain DataFrame before passing for AB/TI fields. +- table_tag — list filter before iterating: for non-DE/ID fields, added `isinstance(sublist, list)` check before iterating to avoid TypeError when sublist is a string or NaN. +- table_tag — remove_terms applied to all tags: original code only applied remove_terms for some tags. Fix: apply remove_terms filter to the final word_counts dict regardless of tag. +- get_wordcloud — empty word list guard: if sorted_words is empty (e.g. PubMed DE is always empty), write a minimal HTML file and return gracefully instead of crashing downstream. +- table_tag — missing-column guard for non-WoS-proprietary fields (e.g. WC - Subject Categories): added `if tag not in M.columns: return Counter()` before `text_data = M[tag]`. Reason: WC is never created in the standardized schema for OpenAlex/PubMed (no WoS equivalent), so direct column access raised a raw `KeyError: 'WC'` instead of degrading gracefully like the ID/DE paths. + +### `get_wordfrequency.py` +**Status:** PASS (both sources) +**Patches applied:** +- get_word_frequency — isinstance check for `df.get()`: extract plain DataFrame before passing to term_extraction — it does not accept Shiny reactive objects. +- get_word_frequency — term_extraction empty vocabulary guard: term_extraction crashes with `ValueError: empty vocabulary` when the field column is entirely empty (e.g. PubMed DE is always empty from eSummary API). Fix: wrap in try/except and return empty FigureWidget and empty DataFrame gracefully. +- get_word_frequency — empty TM column guard: if term_extraction succeeds but the TM column contains no terms, return empty results gracefully. +- get_word_frequency — top_words type normalization: top_words may be passed as a plain int rather than a `[start, end]` list — indexing an int crashes with TypeError. Fix: normalize to `[0, n]` if a plain int is given. +- get_word_frequency — column slice bounds clamping: if `top_words[0] >=` number of available columns, slicing crashes with IndexError. Fix: clamp start and end to valid range before slicing. +- keyword_growth — empty data guard: if data is empty after filtering, `data['Year'].min()` returns NaN and `range(NaN, NaN)` crashes with TypeError. Fix: return empty DataFrame with just a Year column. +- keyword_growth — safe split with type check: iterating over elements without type checking crashes with TypeError on non-string/non-list elements. Fix: `safe_split` returns empty list for unexpected types. +- trim_years — empty year range guard: if year_range is empty, return empty Series immediately instead of producing inconsistent results. +- get_word_frequency — missing-column guard before term_extraction: added `if field_wf not in df_plain.columns: return go.FigureWidget(go.Figure()), pd.DataFrame()`. Reason: unlike the other three files, this function calls `term_extraction()` unconditionally for any field_wf with no existence check, and only caught ValueError — a missing column (e.g. WC, if ever exposed in this widget) would have raised an unhandled KeyError. +- get_word_frequency — broadened exception handling: extended the existing try/except ValueError around `term_extraction()` to also catch KeyError, as a defensive fallback alongside the explicit check above. +- get_word_frequency — literal "nan" string filtering for AB/TI fields: same fix pattern as in `get_treemap.py`/`get_frequentwords.py`/`get_trendtopics.py` — drop rows where the field is NaN or the literal string "nan" before calling `term_extraction()`. +- keyword_growth — literal "nan" token stripping: added a filter (`data['Term'].astype(str).str.strip().str.lower() != 'nan'`) to remove stray "nan" tokens from the expanded term list, covering both free-text (AB/TI) and delimiter-separated (e.g. DE/ID) fields that route through this function. +- get_word_frequency — empty/degenerate keyword_growth result guard: added a check (`word_freq.empty or list(word_freq.columns) == ['Year']`) before attempting column slicing and plotting, to avoid building a chart from a result with no actual term columns. + +### `get_worldmapcollaboration.py` +**Status:** PASS (both sources) +**Patches applied:** +- metaTagExtraction return handling: after calling `metaTagExtraction(df, "AU_CO")`, unwrap result with isinstance check — metaTagExtraction may return a Shiny reactive or a plain DataFrame. +- AU_CO safe fill: `fillna("")` applied before exploding AU_CO to avoid NaN propagation when AU_CO is missing or empty. +- Country normalization: corrections dict maps common abbreviations (USA, UK, SOUTH KOREA) to standardized names used in the world geometry dataset. +- Network None/empty guard: if biblionetwork returns None or an empty result, return empty FigureWidget and empty DataFrame gracefully. +- Safe centroid computation: Longitude and Latitude converted with `pd.to_numeric(..., errors='coerce').fillna(0)` to avoid NaN coordinates crashing edge drawing. +- Manual coordinate fixes for UK and France (centroid falls in the ocean or overseas territories). +- Singapore patch: Singapore is absent from the 110m Natural Earth dataset — added manually with hardcoded coordinates. +- Safe edge width: `max(row['count'], 1)` prevents division by zero when computing edge width. +**Known limitations:** +- AU_CO is a derived column not produced by the ETL pipeline — metaTagExtraction cannot extract it from OpenAlex or PubMed data, so the collaboration map always returns an empty figure for both sources + +### `get_citeddocuments.py` +**Status:** PASS (both sources) +**Patches applied:** +- `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. +- None check before unwrapping: if df is None, returns `(None, empty DataFrame)` gracefully. +- Empty data check after unwrapping: if data is None or empty, returns `(None, empty DataFrame)` gracefully. +- Required columns guard: if SR, TC, or PY are missing, fills with safe defaults (0 for numeric, "" for strings). +- TC and PY numeric conversion: `pd.to_numeric(..., errors='coerce')` applied to both to avoid arithmetic errors on string values. +- Division by zero prevention in TCperYear: `max((current_year + 1 - row['PY']), 1)` prevents division by zero for documents with missing or future PY. +- Safe normalization: NormalizedTC groupby transform checks for zero or NaN mean before dividing. +- Empty tab guard: if groupby aggregation produces an empty table, returns `(None, empty DataFrame)` gracefully. + +### `get_sourcesproduction.py` +**Status:** PASS (both sources) +**Patches applied:** +- Line 18: `data = df.get()` → fixed with isinstance check. Reason: pandas `.get()` requires a column name as argument, crashes without one. Fix: `data = df if isinstance(df, pd.DataFrame) else df.get()`. +- PY string extraction for `data["PY"]`: PubMed PY may contain full date strings (e.g. "2026 Jun 6") instead of plain year integers — `astype(int)` crashes on these. Fix: extract first 4-digit year with `str.extract(r'(\d{4})')` and `pd.to_numeric` before casting to int. Rows with unparseable PY are dropped. +- WPY column name extraction for missing years: `WPY.columns` may also contain full date strings — extract 4-digit year from column names before comparing against the PY range to compute missing years. +- WPY column renaming before sort: `WPY.columns.astype(int)` crashes on full date strings. Fix: rename columns by extracting the first 4 characters as a year string, then sort using a safe `int(x) if x.isdigit() else 0` key. +**Known limitations:** +- PubMed: PY field from eSummary API returns full date strings (e.g. "2026 Jun 6") rather than 4-digit years — year extraction is required before any arithmetic on PY diff --git a/test.py b/test.py new file mode 100644 index 000000000..bfcc779f4 --- /dev/null +++ b/test.py @@ -0,0 +1,35 @@ +from www.services.api_retriever import retrieve +from www.services.standardizer import standardize +from www.services.validator import validate + +# Test OpenAlex +records = retrieve(query="machine learning", platform="openalex", total=50) +df = standardize(records, source="openalex") +df = validate(df) +print(df.head()) +df.to_csv("test_openalex.csv", index=False) +print("OpenAlex CSV generato") + +# Test OpenAlex (200 record) +records = retrieve(query="machine learning", platform="openalex", total=200) +df = standardize(records, source="openalex") +df = validate(df) +print(df.head()) +df.to_csv("test_openalex_200.csv", index=False) +print("OpenAlex CSV 200 generato") + +# Test PubMed +records = retrieve(query="machine learning", platform="pubmed", total=50) +df = standardize(records, source="pubmed") +df = validate(df) +print(df.head()) +df.to_csv("test_pubmed.csv", index=False) +print("PubMed CSV generato") + +# Test PubMed (200 record) +records = retrieve(query="machine learning", platform="pubmed", total=200) +df = standardize(records, source="pubmed") +df = validate(df) +print(df.head()) +df.to_csv("test_pubmed_200.csv", index=False) +print("PubMed CSV 200 generato") \ No newline at end of file diff --git a/test_openalex_200.csv b/test_openalex_200.csv new file mode 100644 index 000000000..9a486c387 --- /dev/null +++ b/test_openalex_200.csv @@ -0,0 +1,212 @@ +UT,DI,TI,PY,LA,DT,TC,SO,JI,AU,AF,C1,RP,AB,VL,IS,BP,EP,DE,AU_CO,CR,ID,PMID,DB,SR,SR_FULL +https://openalex.org/W2101234009,10.48550/arxiv.1201.0490,Scikit-learn: Machine Learning in Python,2012,en,preprint,63735,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),"Fabián Pedregosa;Gaël Varoquaux;Alexandre Gramfort;Vincent Michel;Bertrand Thirion;Olivier Grisel;Mathieu Blondel;Müller, Andreas;Nothman, Joel;Louppe, Gilles;Peter Prettenhofer;Ron J. Weiss;Vincent Dubourg;Jake Vanderplas;Alexandre Passos;David Cournapeau;Matthieu Brucher;Matthieu Perrot;Édouard Duchesnay","Pedregosa, Fabian;Varoquaux, Gaël;Gramfort, Alexandre;Michel, Vincent;Thirion, Bertrand;Grisel, Olivier;Blondel, Mathieu;Müller, Andreas;Nothman, Joel;Louppe, Gilles;Prettenhofer, Peter;Weiss, Ron;Dubourg, Vincent;Vanderplas, Jake;Passos, Alexandre;Cournapeau, David;Brucher, Matthieu;Perrot, Matthieu;Duchesnay, Édouard","LNAO - Laboratoire de Neuroimagerie Assistée par Ordinateur (France);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);LNAO - Laboratoire de Neuroimagerie Assistée par Ordinateur (France);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);LNAO - Laboratoire de Neuroimagerie Assistée par Ordinateur (France);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);LNAO - Laboratoire de Neuroimagerie Assistée par Ordinateur (France);LNAO - Laboratoire de Neuroimagerie Assistée par Ordinateur (France);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);Nuxeo (18-20 rue Soleillet 75020 Paris - France);Kobe University (Japan);Bauhaus-Universität Weimar (Geschwister-Scholl-Straße 8 99423 Weimar - Germany);Google Inc (Toronto - Canada);LAMI - Laboratoire de Mécanique et Ingénieries (IFMA. Campus des Cézeaux BP 265 63175 Aubière Cedex - France);University of Washington [Seattle] (Seattle, Washington 98105 - United States);Department of Mechanical and Industrial Engineering [UMass] (UMass Amherst College of Engineering Department of Mechanical and Industrial Engineering, 220 ELAB, University of Massachusetts Amherst, MA 01003-2210 - United States);Enthought Inc (515 Congress Avenue Suite 2100 Austin, TX 78701 - United States);TOTAL S.A. (France);LNAO (France);LNAO - Laboratoire de Neuroimagerie Assistée par Ordinateur (France)",,"Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing machine learning to non-specialists using a general-purpose high-level language. Emphasis is put on ease of use, performance, documentation, and API consistency. It has minimal dependencies and is distributed under the simplified BSD license, encouraging its use in both academic and commercial settings. Source code, binaries, and documentation can be downloaded from http://scikit-learn.org.",,,,,Python (programming language);Documentation;Computer science;MIT License;Artificial intelligence;Machine learning;Programming language;License;Software engineering;Operating system,FR;JP;DE;CA;US,"Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Friedman J, 2010, PUBMED;Walt S, 2011, COMPUTING IN SCIENCE & ENGINEERING;Efron B, 2004, THE ANNALS OF STATISTICS;Fan R, 2008, ;Guyon I, 2004, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Hanke M, 2009, NEUROINFORMATICS;Rokhlin V, 2009, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Omohundro S, 2009, ;Sören S, 2010, MAX PLANCK INSTITUTE FOR PLASMA PHYSICS;Michel V, 2011, PATTERN RECOGNITION;Zito T, 2008, FRONTIERS IN NEUROINFORMATICS;Dubois P, 2007, COMPUTING IN SCIENCE & ENGINEERING",,,OPENALEX,"Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)","Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W3023540311,10.5860/choice.27-0936,"Genetic algorithms in search, optimization, and machine learning",1989,en,article,49340,CHOICE REVIEWS ONLINE,Choice Reviews Online,,,,,"From the Publisher: +This book brings together - in an informal and tutorial fashion - the computer techniques, mathematical tools, and research results that will enable both students and practitioners to apply genetic algorithms to problems in many fields. + +Major concepts are illustrated with running examples, and major algorithms are illustrated by Pascal computer programs. No prior knowledge of GAs or genetics is assumed, and only a minimum of computer programming and mathematics background is required.",27,02,27,0936,Computer science;Artificial intelligence;Machine learning;Quality control and genetic algorithms;Algorithm;Genetic algorithm;Meta-optimization,,,,,OPENALEX,"NA, 1989, CHOICE REVIEWS ONLINE","NA, 1989, CHOICE REVIEWS ONLINE" +https://openalex.org/W2125055259,,C4.5: Programs for Machine Learning,1992,en,book,23702,,,J. R. Quinlan,J. R. Quinlan,"Univ. of Sydney, Sydney, NSW, Australia#TAB#","J. R. Quinlan (corresponding author), Univ. of Sydney, Sydney, NSW, Australia#TAB#","Classifier systems play a major role in machine learning and knowledge-based systems, and Ross Quinlan's work on ID3 and C4.5 is widely acknowledged to have made some of the most significant contributions to their development. This book is a complete guide to the C4.5 system as implemented in C for the UNIX environment. It contains a comprehensive guide to the system's use , the source code (about 8,800 lines), and implementation notes. The source code and sample datasets are also available for download (see below). C4.5 starts with large sets of cases belonging to known classes. The cases,",,,,,Computer science;Unix;Classifier (UML);Machine learning;Artificial intelligence;Source code;Workstation;Software;Decision tree;Sample (material);Software engineering;Data mining;Programming language;Operating system,AU,,,,OPENALEX,"Quinlan J, 1992, ","Quinlan J, 1992, " +https://openalex.org/W3120740533,,UCI Machine Learning Repository,2007,en,article,24350,MEDICAL ENTOMOLOGY AND ZOOLOGY,Medical Entomology and Zoology,Arthur Asuncion,Arthur Asuncion,,"Arthur Asuncion (corresponding author), ",,,,,,Computer science;Artificial intelligence,,,,,OPENALEX,"Asuncion A, 2007, MEDICAL ENTOMOLOGY AND ZOOLOGY","Asuncion A, 2007, MEDICAL ENTOMOLOGY AND ZOOLOGY" +https://openalex.org/W1570448133,10.1016/c2009-0-19715-5,Data Mining: Practical Machine Learning Tools and Techniques,2011,en,book,25716,ELSEVIER EBOOKS,Elsevier eBooks,Ian H. Witten;Eibe Frank,"Witten, I. H. (Ian H.) 62970 author;Frank, Eibe. 62960 author",,,"As with any burgeoning technology that enjoys commercial attention, the use of data mining is surrounded by a great deal of hype. Exaggerated reports tell of secrets that can be uncovered by setting algorithms loose on oceans of data. But there is no magic in machine learning, no hidden power, no alchemy. Instead there is an identifiable body of practical techniques that can extract useful information from raw data. This book describes these techniques and shows how they work. The book is a major revision of the first edition that appeared in 1999. While the basic core remains the same, it has been updated to reflect the changes that have taken place over five years, and now has nearly double the references. The highlights for the new edition include thirty new technique sections; an enhanced Weka machine learning workbench, which now features an interactive interface; comprehensive information on neural networks; a new section on Bayesian networks; plus much more.",,,,,Computer science;Machine learning;Data science;Data mining;Artificial intelligence,,"1959- B, 1994, CHOICE REVIEWS ONLINE;Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS;, 2014, APPLE ACADEMIC PRESS EBOOKS;Agrawal R, 1994, VERY LARGE DATA BASES;Wolpert D, 1992, NEURAL NETWORKS;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;McCallum A, 1998, ;Kira K, 1992, ELSEVIER EBOOKS;Hall M, 2000, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Egan J, 1975, ;Cavnar W, 1994, ;Pyle D, 1999, ;Kohavi R, 1996, ;Frank E, 1998, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Kimball R, 1996, ;Zaki M, 1997, ;Kushmerick N, 1997, ;Cypher A, 1993, ;Cheeseman P, 1996, ;Shafer J, 1996, ;Mehta M, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Wang Y, 1996, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Liu H, 1996, ;Almuallim H, 1991, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Drucker H, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Kerber R, 1992, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Schapire R, 1997, QUT EPRINTS (QUEENSLAND UNIVERSITY OF TECHNOLOGY);Langley P, 1995, MEDICAL ENTOMOLOGY AND ZOOLOGY;Kong E, 1995, ELSEVIER EBOOKS;Kononenko I, 1995, ;Zheng Z, 2000, MACHINE LEARNING;Brodley C, 1996, ;Ting K, 1997, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Ramon J, 2000, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Almuallim H, 1992, ;Webb G, 1999, ;Kohavi R, 1996, ;Moore A, 1994, ELSEVIER EBOOKS;Bigus J, 1996, ;Kibler D, 1987, ELSEVIER EBOOKS;Omohundro S, 1987, COMPLEX SYSTEMS;Dhar V, 1996, ;Chevaleyre Y, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Ramakrishnan R, 2000, ASSOCIATION FOR COMPUTING MACHINERY EBOOKS;Cherkauer K, 1996, ;Mann T, 1993, ;Holmes G, 1995, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Domingos P, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Maron O, 1998, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Giraud-Carrier C, 1996, ;Efron B, 1994, ;Ester M, 1996, ;Fisher R, 1936, ANNALS OF EUGENICS;Swets J, 1988, SCIENCE;Friedman N, 1997, MACHINE LEARNING;Cohen W, 1995, ELSEVIER EBOOKS;Dietterich T, 2000, MACHINE LEARNING;Fayyad U, 1993, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Dietterich T, 1995, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Srikant R, 1996, LECTURE NOTES IN COMPUTER SCIENCE;John G, 1994, ELSEVIER EBOOKS;Pelleg D, 2000, ;Dougherty J, 1995, ELSEVIER EBOOKS;Quinlan J, 1996, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Piateski G, 1991, MIT PRESS EBOOKS;Berry M, 1997, UNIVERSITY OF MARIBOR DIGITAL LIBRARY (UNIVERSITY OF MARIBOR);Kubát M, 1998, MACHINE LEARNING;Kass R, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Sahami M, 1998, ;Langley P, 1992, ;Freund Y, 1998, ;Cleary J, 1995, ELSEVIER EBOOKS;Freund Y, 1999, ;Kohavi R, 1995, LECTURE NOTES IN COMPUTER SCIENCE;Langley P, 1994, ELSEVIER EBOOKS;Cendrowska J, 1987, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Beck, 1986, PUBMED;Frank E, 1999, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);Cabena P, 1997, UNIVERSITY OF MARIBOR DIGITAL LIBRARY (UNIVERSITY OF MARIBOR);Frank E, 1998, MACHINE LEARNING;Breiman L, 1999, MACHINE LEARNING;Holmes G, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Appelt D, 1999, AI COMMUNICATIONS;Demiröz G, 1997, LECTURE NOTES IN COMPUTER SCIENCE;Flach P, 2001, MACHINE LEARNING;Brownston L, 1985, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Moore A, 1998, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Ting K, 1997, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);Kohavi R, 1997, ;Oates T, 1997, ;Witten I, 1999, ;, 1996, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Saitta L, 1998, MACHINE LEARNING;Frank E, 1999, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);RICHARDS D, 1998, INTERNATIONAL JOURNAL OF HUMAN-COMPUTER STUDIES;Ricci⋆ F, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Kohavi R, 1995, ;John G, 2013, ARXIV (CORNELL UNIVERSITY);Moore A, 2021, ;Paynter G, 2000, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);Cohen J, 1960, EDUCATIONAL AND PSYCHOLOGICAL MEASUREMENT;Cortes C, 1995, MACHINE LEARNING;Kumar D, 1995, CHOICE REVIEWS ONLINE;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Brin S, 1998, COMPUTER NETWORKS AND ISDN SYSTEMS;Kohavi R, 1997, ARTIFICIAL INTELLIGENCE;Sebastiani F, 2002, ACM COMPUTING SURVEYS;Freund Y, 1996, ;Lawson C, 1995, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Friedman J, 2000, THE ANNALS OF STATISTICS;Ho T, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Ripley B, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Han J, 2000, ACM SIGMOD RECORD;Blum A, 1998, ;Stevens S, 1946, SCIENCE;Zhang T, 1996, ;Cooper G, 1992, MACHINE LEARNING;Dietterich T, 1997, ARTIFICIAL INTELLIGENCE;Friedman J, 1977, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Nigam K, 2000, MACHINE LEARNING;Hoerl A, 1970, TECHNOMETRICS;Koestler A, 1967, ;Schölkopf B, 1999, ;Brin S, 1997, ;Fisher D, 1987, MACHINE LEARNING;Vitter J, 1985, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Cessie S, 1992, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);Agrawal R, 1993, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Niknian M, 1995, TECHNOMETRICS;Domingos P, 1999, ;Hastie T, 1998, THE ANNALS OF STATISTICS;Nigam K, 2000, ;Day W, 1984, JOURNAL OF CLASSIFICATION;Hochbaum D, 1985, MATHEMATICS OF OPERATIONS RESEARCH;Utgoff P, 1989, MACHINE LEARNING;Webb G, 2000, MACHINE LEARNING;Marill T, 1963, IEEE TRANSACTIONS ON INFORMATION THEORY;Wang J, 2000, COGPRINTS (UNIVERSITY OF SOUTHAMPTON);Langley P, 1995, COMMUNICATIONS OF THE ACM;Mitchell T, 1994, COMMUNICATIONS OF THE ACM;Aha D, 1992, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Fürnkranz J, 1994, ELSEVIER EBOOKS;Liu H, 1997, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Vafaie H, 2003, ;Gaines B, 1995, JOURNAL OF INTELLIGENT INFORMATION SYSTEMS;Wettschereck D, 1995, MACHINE LEARNING;Littlestone N, 1990, ;Bay S, 1999, INTELLIGENT DATA ANALYSIS;Rexine J, 1986, THE AMERICAN JOURNAL OF PHILOLOGY;Frank E, 2000, ;, 1989, CHOICE REVIEWS ONLINE;Vapnik V, 1995, ;Han J, 2012, CHOICE REVIEWS ONLINE;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Breiman L, 1996, MACHINE LEARNING;Quinlan J, 1986, MACHINE LEARNING;Agrawal R, 1993, ;Duda R, 1973, ;Guyon I, 2002, MACHINE LEARNING;Kleinberg J, 1999, JOURNAL OF THE ACM;Piepel G, 1989, TECHNOMETRICS;Hartigan J, 1975, ;Ankerst M, 1999, ACM SIGMOD RECORD;Heckerman D, 1995, MACHINE LEARNING;BAYES, 1958, BIOMETRIKA;Chen⋆ M, 1996, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Liu B, 1998, ;Yan X, 2003, ;Holte R, 1993, MACHINE LEARNING;Atkinson A, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Dumais S, 1998, ;Littlestone N, 1988, MACHINE LEARNING;Stone P, 2000, AUTONOMOUS ROBOTS;Maron O, 1997, ;Breiman L, 1996, MACHINE LEARNING;Provost F, 1997, ;Witten I, 1999, MORGAN KAUFMANN PUBLISHERS INC. EBOOKS;Friedman J, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY;Califf M, 1997, ;Buntine W, 1992, STATISTICS AND COMPUTING;Utgoff P, 1997, MACHINE LEARNING;Freitag D, 2000, MACHINE LEARNING;Cardie C, 1993, ELSEVIER EBOOKS;John G, 1995, ;Drummond C, 2000, ;Salzberg S, 1991, MACHINE LEARNING;Gluck M, 1985, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Schölkopf B, 1998, ;Huffman S, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Moore A, 2000, ;John G, 1997, ;Martin B, 1995, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Li M, 1992, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Jabbour K, 1988, IEEE TRANSACTIONS ON POWER SYSTEMS;Mooney U, 2000, ;Komarek P, 2000, ;Gennari J, 1988, ;Bengio Y, 1999, ÉRUDIT DOCUMENTS AND DATA REPOSITORY (ÉRUDIT CONSORTIUM, UNIVERSITY OF MONTREAL);Bouckaert R, 1995, DATA ARCHIVING AND NETWORKED SERVICES (DANS);C.J.C. B, 1998, ;, 2015, ;Johns M, 1959, ",,,OPENALEX,"Witten I, 2011, ELSEVIER EBOOKS","Witten I, 2011, ELSEVIER EBOOKS" +https://openalex.org/W1663973292,10.1117/1.2819119,Pattern Recognition and Machine Learning,2007,en,article,22083,JOURNAL OF ELECTRONIC IMAGING,Journal of Electronic Imaging,Nasser M. Nasrabadi,Nasser M. Nasrabadi,West Virginia Univ. (United States);Microsoft,"Nasser M. Nasrabadi (corresponding author), West Virginia Univ. (United States); Microsoft","The Journal of Electronic Imaging (JEI), copublished bimonthly with the Society for Imaging Science and Technology, publishes peer-reviewed papers that cover research and applications in all areas of electronic imaging science and technology.",16,4,049901,049901,Computer science;Imaging science;Cover (algebra);Data science;Artificial intelligence;Engineering,US;GB,"Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Ripley B, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Robert C, 2004, SPRINGER TEXTS IN STATISTICS;Suykens J, 2002, WORLD SCIENTIFIC EBOOKS;Waserman L, 2003, ",,,OPENALEX,"Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING","Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING" +https://openalex.org/W1639032689,,"Genetic Algorithms in Search, Optimization and Machine Learning",1988,en,book,17773,,,David E. Goldberg,David E. Goldberg,,"David E. Goldberg (corresponding author), ","David Goldberg's Genetic Algorithms in Search, Optimization and Machine Learning is by far the bestselling introduction to genetic algorithms. Goldberg is one of the preeminent researchers in the field--he has published over 100 research articles on genetic algorithms and is a student of John Holland, the father of genetic algorithms--and his deep understanding of the material shines through. The book contains a complete listing of a simple genetic algorithm in Pascal, which C programmers can easily understand. The book covers all of the important topics in the field, including crossover, mutation, classifier systems, and fitness scaling, giving a novice with a computer science background enough information to implement a genetic algorithm and describe genetic algorithms to a friend.",,,,,Pascal (unit);Computer science;Genetic programming;Genetic algorithm;Machine learning;Artificial intelligence;Quality control and genetic algorithms;Theoretical computer science;Algorithm;Programming language;Meta-optimization,,,,,OPENALEX,"Goldberg D, 1988, ","Goldberg D, 1988, " +https://openalex.org/W1583837637,10.1145/1273496,Proceedings of the 24th international conference on Machine learning,2007,en,preprint,11733,,,,,,,"This volume contains the papers accepted to the 24th International Conference on Machine Learning (ICML 2007), which was held at Oregon State University in Corvalis, Oregon, from June 20th to 24th, 2007. ICML is the annual conference of the International Machine Learning Society (IMLS), and provides a venue for the presentation and discussion of current research in the field of machine learning. These proceedings can also be found online at: http://www.machinelearning.org. This year there were 522 submissions to ICML. There was a very thorough review process, in which each paper was reviewed by three program committee (PC) members. Authors were able to respond to the initial reviews, and the PC members could then modify their reviews based on online discussions and the content of this author response. For the first time this year there were two discussion periods led by the senior program committee (SPC), one just before and one after the submission of author responses. At the end of the second discussion period, the SPC members gave their recommendations and provided a summary review for each of their papers. Also for the first time, authors were asked to submit a list of changes with their final accepted papers, which was checked by the SPCs to ensure that reviewer comments had been addressed. Apart from the length restrictions on papers and the compressed time frame, the review process for ICML resembles that of many journal publications. In total, 150 papers were accepted to ICML this year, including a very small number of papers which were initially conditionally accepted, yielding an overall acceptance rate of 29%. ICML attracts submissions from machine learning researchers around the globe. The 150 accepted papers this year were geographically distributed as follows: 66 papers had a first author from the US, 32 from Europe, 19 from China or Hong Kong, 11 from Canada, 6 from India, 5 each from Australia and Japan, 3 from Israel, and 1 each from Korea, Russia and Taiwan. In addition to the main program of accepted papers, which includes both a talk and poster presentation for each paper, the ICML program included 3 workshops and 8 tutorials on machine learning topics which are currently of broad interest. We were also extremely pleased to have David Heckerman (Microsoft Research), Joshua Tenenbaum (Massachussetts Institute of Technology), and Bernhard Schölkopf (Max Planck Institute for Biological Cybernetics) as the invited speakers this year. Thanks to sponsorship by the Machine Learning Journal, we were able to award a number of outstanding student paper prizes. We were fortunate this year that ICML was co-located with the International Conference on Inductive Logic Programming (ILP 2007). ICML and ILP held joint sessions on the first day of ICML 2007.",,,,,Presentation (obstetrics);Library science;Computer science;Medical education;Medicine,,,,,OPENALEX,"NA, 2007, ","NA, 2007, " +https://openalex.org/W1746819321,10.7551/mitpress/3206.001.0001,Gaussian Processes for Machine Learning,2005,en,book,10489,THE MIT PRESS EBOOKS,The MIT Press eBooks,Carl Edward Rasmussen;Christopher K. I. Williams,Carl Edward Rasmussen;Christopher K. I. Williams,"Max Planck Institute for Biological Cybernetics, Max Planck Society;School of Informatics",,"A comprehensive and self-contained introduction to Gaussian processes, which provide a principled, practical, probabilistic approach to learning in kernel machines.",,,,,Machine learning;Artificial intelligence;Computer science;Online machine learning;Gaussian process;Probabilistic logic;Relevance vector machine;Support vector machine;Kernel method;Artificial neural network;Gaussian,DE,"1959- B, 1994, CHOICE REVIEWS ONLINE;Salton G, 1988, INFORMATION PROCESSING & MANAGEMENT;Platt J, 1998, THE MIT PRESS EBOOKS;Wiener N, 1949, THE MIT PRESS EBOOKS;Neal R, 1996, LECTURE NOTES IN STATISTICS;Cristianini N, 2000, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Suykens J, 2002, WORLD SCIENTIFIC EBOOKS;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;1950- G, 2004, ELSEVIER EBOOKS;Wendland H, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;, 2000, APPLIED PHYSICS LETTERS;Györfi L, 2002, SPRINGER SERIES IN STATISTICS;Girosi F, 1995, NEURAL COMPUTATION;Bartlett P, 2006, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Geisser S, 1979, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Saunders C, 1998, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Tipping M, 2003, ;Smola A, 2000, ;Winkler G, 1995, ;König H, 1986, OPERATOR THEORY;Silverman B, 1984, THE ANNALS OF STATISTICS;Seeger M, 2003, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Collobert R, 2000, APPLIED PHYSICS LETTERS;Wong E, 1979, ;Ritter K, 2000, LECTURE NOTES IN MATHEMATICS;Kammler D, 2008, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Teh Y, 2005, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);McAllester D, 2003, MACHINE LEARNING;Rasmussen C, 2003, ;Gibbs M, 1997, ;Seeger M, 2005, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Stitson M, 1998, THE MIT PRESS EBOOKS;Grenander U, 1990, MEDICAL ENTOMOLOGY AND ZOOLOGY;Plaskota L, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Gao F, 2000, THE ANNALS OF STATISTICS;Zhu H, 1997, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Silverman B, 1978, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);Seeger M, 2004, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Arató M, 1982, LECTURE NOTES IN CONTROL AND INFORMATION SCIENCES;Pontil M, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Micchelli C, 1979, DEFENSE TECHNICAL INFORMATION CENTER (DTIC);Choi T, 2018, FIGSHARE;Cornford D, 2002, JOURNAL OF NONPARAMETRIC STATISTICS;Szeliski R, 1987, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Grünwald P, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Hawkins D, 1989, COMMUNICATIONS IN STATISTICS - SIMULATION AND COMPUTATION;Minka T, 2001, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Hinton G, 1997, ;Neal R, 1997, ARXIV.ORG;Mandelbrot B, 1983, AMERICAN JOURNAL OF PHYSICS;Geman S, 1984, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Sacks J, 1989, STATISTICAL SCIENCE;Hoerl A, 2000, TECHNOMETRICS;Aronszajn N, 1950, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Valiant L, 1984, ;Arfken G, 1967, AMERICAN JOURNAL OF PHYSICS;Green P, 1994, ;, 1992, CHOICE REVIEWS ONLINE;Bishop C, 1998, NEURAL COMPUTATION;Kimeldorf G, 1971, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Hand D, 2007, DRUG SAFETY;Mazur P, 1959, PHYSICA;Zhang T, 2004, THE ANNALS OF STATISTICS;Mackay D, 1992, NEURAL COMPUTATION;Schoenberg I, 1938, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Diaconis P, 1986, THE ANNALS OF STATISTICS;Wahba G, 1985, THE ANNALS OF STATISTICS;Schoenberg I, 1964, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Girard A, 2002, ;Rasmussen C, 2001, ;Solak E, 2002, ;O’Sullivan F, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Baxter L, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES D (THE STATISTICIAN);Mackay D, 2000, IEEE TRANSACTIONS ON NEURAL NETWORKS;Dawid A, 1976, BIOMETRICS;Meir R, 2003, ;Cox D, 1984, SIAM JOURNAL ON NUMERICAL ANALYSIS;Hansen L, 1997, OPEN SYSTEMS & INFORMATION DYNAMICS;Shepp L, 1966, THE ANNALS OF MATHEMATICAL STATISTICS;Eskin E, 2002, ;Kashyap R, 1981, IEEE TRANSACTIONS ON INFORMATION THEORY;Cox D, 1990, THE ANNALS OF STATISTICS;Luo Z, 1997, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Williams C, 1998, NEURAL COMPUTATION;Freedman D, 1999, THE ANNALS OF STATISTICS;Poggio T, 1988, JOURNAL OF COMPLEXITY;Tsuda K, 2002, NEURAL COMPUTATION;Wahba G, 1995, MONTHLY WEATHER REVIEW;Rasmussen C, 2005, ;Schwaighofer A, 2002, ;Kohn R, 1987, SIAM JOURNAL ON SCIENTIFIC AND STATISTICAL COMPUTING;Blight B, 1975, BIOMETRIKA;Williams C, 2002, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Wood S, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Williams C, 2002, NEURAL INFORMATION PROCESSING SYSTEMS;Stein M, 1991, ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS;Girosi F, 1991, DEFENSE TECHNICAL INFORMATION CENTER (DTIC);Thompson P, 1956, TELLUS A DYNAMIC METEOROLOGY AND OCEANOGRAPHY;Seeger M, 2003, ERA;Vapnik V, 1995, ;Wu Y, 1999, TECHNOMETRICS;Cressie N, 1992, TERRA NOVA;Bell J, 1978, MATHEMATICS OF COMPUTATION;Ripley B, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Müller H, 1991, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Rousseeuw P, 1984, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Dickey D, 1991, TECHNOMETRICS;Poggio T, 1990, PROCEEDINGS OF THE IEEE;Øksendal B, 2003, UNIVERSITEXT;Jones D, 2001, JOURNAL OF GLOBAL OPTIMIZATION;Williams C, 2000, ;Wahba G, 1975, NUMERISCHE MATHEMATIK;Duchon J, 1977, LECTURE NOTES IN MATHEMATICS;Williams C, 1995, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Cristianini N, 2006, STUDIES IN FUZZINESS AND SOFT COMPUTING;Collins M, 2002, THE MIT PRESS EBOOKS;Williams C, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Csató L, 2002, NEURAL COMPUTATION;Drineas P, 2005, ;, 2000, APPLIED PHYSICS LETTERS;Fine S, 2002, ;Herbrich R, 2002, NEURAL INFORMATION PROCESSING SYSTEMS;Chu W, 2005, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Farlie D, 1964, OR;Smola A, 2000, ;Paciorek C, 2003, ;Vishwanathan S, 2004, THE MIT PRESS EBOOKS;Faul A, 2002, THE MIT PRESS EBOOKS;Mackay D, 1999, NEURAL COMPUTATION;Simard P, 1991, NEURAL INFORMATION PROCESSING SYSTEMS;Opper M, 2000, NEURAL COMPUTATION;Ghahramani Z, 2002, NEURAL INFORMATION PROCESSING SYSTEMS;Sundararajan S, 2001, NEURAL COMPUTATION;Rasmussen C, 2000, NEURAL INFORMATION PROCESSING SYSTEMS;Micchelli C, 2004, UCL DISCOVERY (UNIVERSITY COLLEGE LONDON);Zhu J, 2001, ;Vijayakumar S, 2002, AUTONOMOUS ROBOTS;Murray‐Smith R, 2001, ENLIGHTEN (JURNAL BIMBINGAN DAN KONSELING ISLAM);Sollich P, 2004, ;Seeger M, 1999, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Sollich P, 1998, RESEARCH PORTAL (KING'S COLLEGE LONDON);Opper M, 1998, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Vivarelli F, 1998, NEURAL INFORMATION PROCESSING SYSTEMS;Ferrari‐Trecate G, 1998, NEURAL INFORMATION PROCESSING SYSTEMS;Saunders C, 2002, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Malzahn D, 2002, THE MIT PRESS EBOOKS;Csató L, 2002, THE MIT PRESS EBOOKS;Kuß M, 2005, ;Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Golub G, 2012, JOHNS HOPKINS UNIVERSITY PRESS EBOOKS;Press W, 1994, ;Duda R, 1973, ;Kölbig K, 1995, MATHEMATICS OF COMPUTATION;Brown R, 1991, BIOMETRICS;, 2004, CHOICE REVIEWS ONLINE;Cox D, 1966, PHYSICS TODAY;Ripley B, 1981, ;Schölkopf B, 2001, MPG.PURE (MAX PLANCK SOCIETY);Platt J, 2000, THE MIT PRESS EBOOKS;Iaglom A, 1987, SPRINGER EBOOKS;Lawrence N, 2003, ;O’Hagan A, 1978, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Tsokos C, 1992, TECHNOMETRICS;, 1964, ELSEVIER EBOOKS;Goldberg P, 1997, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Seeger M, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Boyle P, 2004, ;Berk R, 1996, TECHNOMETRICS;Jaakkola T, 1999, ;Dym H, 1989, REGIONAL CONFERENCE SERIES IN MATHEMATICS;Kailath T, 1971, IEEE TRANSACTIONS ON INFORMATION THEORY;Yang C, 2004, ;Blake I, 1973, IEEE TRANSACTIONS ON INFORMATION THEORY;Watkins C, 2000, THE MIT PRESS EBOOKS;Fowlkes C, 2005, ;Hájek J, 1958, CZECHOSLOVAK MATHEMATICAL JOURNAL;Quiñonero-Candela J, 2004, MPG.PURE (MAX PLANCK SOCIETY)",,,OPENALEX,"Rasmussen C, 2005, THE MIT PRESS EBOOKS","Rasmussen C, 2005, THE MIT PRESS EBOOKS" +https://openalex.org/W1503398984,,Machine learning a probabilistic perspective,2012,en,book,9328,,,Kevin P. Murphy,Kevin P. Murphy,,"Kevin P. Murphy (corresponding author), ","Today's Web-enabled deluge of electronic data calls for automated methods of data analysis. Machine learning provides these, developing methods that can automatically detect patterns in data and then use the uncovered patterns to predict future data. This textbook offers a comprehensive and self-contained introduction to the field of machine learning, based on a unified, probabilistic approach. The coverage combines breadth and depth, offering necessary background material on such topics as probability, optimization, and linear algebra as well as discussion of recent developments in the field, including conditional random fields, L1 regularization, and deep learning. The book is written in an informal, accessible style, complete with pseudo-code for the most important algorithms. All topics are copiously illustrated with color images and worked examples drawn from such application domains as biology, text processing, computer vision, and robotics. Rather than providing a cookbook of different heuristic methods, the book stresses a principled model-based approach, often using the language of graphical models to specify models in a concise and intuitive way. Almost all the models described have been implemented in a MATLAB software package--PMTK (probabilistic modeling toolkit)--that is freely available online. The book is suitable for upper-level undergraduates with an introductory-level college math background and beginning graduate students.",,,,,Computer science;Probabilistic logic;Artificial intelligence;Field (mathematics);Conditional random field;Heuristic;Machine learning;Graphical model;Regularization (linguistics);Software;Programming language,,,,,OPENALEX,"Murphy K, 2012, ","Murphy K, 2012, " +https://openalex.org/W1901616594,10.1126/science.aaa8415,"Machine learning: Trends, perspectives, and prospects",2015,en,review,9574,SCIENCE,Science,Michael I. Jordan;Tom M. Mitchell,M. I. Jordan;T. M. Mitchell,"Department of Electrical Engineering and Computer Sciences, Department of Statistics, University of California, Berkeley, CA, USA;Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA, USA;Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA, USA#TAB#","T. M. Mitchell (corresponding author), Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA, USA; Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA, USA#TAB#","Machine learning addresses the question of how to build computers that improve automatically through experience. It is one of today's most rapidly growing technical fields, lying at the intersection of computer science and statistics, and at the core of artificial intelligence and data science. Recent progress in machine learning has been driven both by the development of new learning algorithms and theory and by the ongoing explosion in the availability of online data and low-cost computation. The adoption of data-intensive machine-learning methods can be found throughout science, technology and commerce, leading to more evidence-based decision-making across many walks of life, including health care, manufacturing, education, financial modeling, policing, and marketing.",349,6245,255,260,Intersection (aeronautics);Computer science;Artificial intelligence;Core (optical fiber);Data science;Machine learning;Big data;Computation;Lying;Engineering;Data mining,US,"Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Mnih V, 2015, NATURE;Sutton R, 1998, IEEE TRANSACTIONS ON NEURAL NETWORKS;Hinton G, 2006, SCIENCE;Hastie T, 2013, ;Schmidhuber J, 2014, NEURAL NETWORKS;Boyd S, 2011, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Schultz W, 1997, SCIENCE;Murphy K, 2012, ;Dwork C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Bengio Y, 2009, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Blei D, 2012, COMMUNICATIONS OF THE ACM;Valiant L, 1984, ;Valiant L, 1984, COMMUNICATIONS OF THE ACM;Taylor M, 2009, JOURNAL OF MACHINE LEARNING RESEARCH;Thrun S, 1998, ;Sra S, 2011, THE MIT PRESS EBOOKS;Mahoney M, 2012, CHAPMAN & HALL/CRC DATA MINING AND KNOWLEDGE DISCOVERY SERIES;Kleiner A, 2014, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Wehbe L, 2014, PLOS ONE;Duan R, 2014, JOURNAL OF THE ACM;Berthet Q, 2013, THE ANNALS OF STATISTICS;Blum A, 2013, JOURNAL OF THE ACM;Boyd M, 2011, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Chandrasekaran V, 2013, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Duchi J, 2014, JOURNAL OF THE ACM;Balcan M, 2012, ARXIV (CORNELL UNIVERSITY);Decatur S, 2000, SIAM JOURNAL ON COMPUTING;Yaylalı E, 2011, WILEY ENCYCLOPEDIA OF OPERATIONS RESEARCH AND MANAGEMENT SCIENCE",,,OPENALEX,"Jordan M, 2015, SCIENCE","Jordan M, 2015, SCIENCE" +https://openalex.org/W4212863985,10.1007/978-0-387-45528-0,Pattern Recognition and Machine Learning,2006,en,book,9862,,,,,,,,,,,,Computer science;Artificial intelligence;Pattern recognition (psychology),,,,,OPENALEX,"NA, 2006, ","NA, 2006, " +https://openalex.org/W2084812512,,UCI Repository of machine learning databases,1998,en,article,10548,MEDICAL ENTOMOLOGY AND ZOOLOGY,Medical Entomology and Zoology,Catherine Blake,Catherine Blake,,"Catherine Blake (corresponding author), ",,,,,,Computer science;Database;Artificial intelligence,,,,,OPENALEX,"Blake C, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY","Blake C, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY" +https://openalex.org/W2997591727,10.5555/1953048.2078195,Scikit-learn: Machine Learning in Python,2011,en,article,8220,JOURNAL OF MACHINE LEARNING RESEARCH,Journal of Machine Learning Research,PedregosaFabian;VaroquauxGaël;GramfortAlexandre;MichelVincent;ThirionBertrand;GriselOlivier;BlondelMathieu;PrettenhoferPeter;WeissRon;DubourgVincent;VanderplasJake;PassosAlexandre;CournapeauDavid;BrucherMatthieu;PerrotMatthieu;DuchesnayÉdouard,PedregosaFabian;VaroquauxGaël;GramfortAlexandre;MichelVincent;ThirionBertrand;GriselOlivier;BlondelMathieu;PrettenhoferPeter;WeissRon;DubourgVincent;VanderplasJake;PassosAlexandre;CournapeauDavid;BrucherMatthieu;PerrotMatthieu;DuchesnayÉdouard,,,Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing mach...,,,,,Python (programming language);Computer science;Artificial intelligence;Machine learning;Programming language,,,,,OPENALEX,"PedregosaFabian, 2011, JOURNAL OF MACHINE LEARNING RESEARCH","PedregosaFabian, 2011, JOURNAL OF MACHINE LEARNING RESEARCH" +https://openalex.org/W2953384591,10.48550/arxiv.1605.08695,TensorFlow: A system for large-scale machine learning,2016,en,preprint,8824,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Martı́n Abadi;Paul Barham;Jianmin Chen;Zhifeng Chen;Andy Davis;Jay B. Dean;Matthieu Devin;Sanjay Ghemawat;Geoffrey Irving;Michael Isard;Manjunath Kudlur;Josh Levenberg;Rajat Monga;Sherry Moore;Derek G. Murray;Benoit Steiner;Paul A. Tucker;Vijay Vasudevan;Pete Warden;Martin Wicke;Yuan Yu;Xiaoqiang Zheng,"Abadi, Martín;Barham, Paul;Chen, Jianmin;Chen, Zhifeng;Davis, Andy;Dean, Jeffrey;Devin, Matthieu;Ghemawat, Sanjay;Irving, Geoffrey;Isard, Michael;Kudlur, Manjunath;Levenberg, Josh;Monga, Rajat;Moore, Sherry;Murray, Derek G.;Steiner, Benoit;Tucker, Paul;Vasudevan, Vijay;Warden, Pete;Wicke, Martin;Yu, Yuan;Zheng, Xiaoqiang",[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain],,"TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. TensorFlow uses dataflow graphs to represent computation, shared state, and the operations that mutate that state. It maps the nodes of a dataflow graph across many machines in a cluster, and within a machine across multiple computational devices, including multicore CPUs, general-purpose GPUs, and custom designed ASICs known as Tensor Processing Units (TPUs). This architecture gives flexibility to the application developer: whereas in previous ""parameter server"" designs the management of shared state is built into the system, TensorFlow enables developers to experiment with novel optimizations and training algorithms. TensorFlow supports a variety of applications, with particularly strong support for training and inference on deep neural networks. Several Google services use TensorFlow in production, we have released it as an open-source project, and it has become widely used for machine learning research. In this paper, we describe the TensorFlow dataflow model in contrast to existing systems, and demonstrate the compelling performance that TensorFlow achieves for several real-world applications.",,,,,Scale (ratio);Computer science;Artificial intelligence;Machine learning;Cartography;Geography,US,"Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Rumelhart D, 1986, NATURE;Mnih V, 2015, NATURE;淳司 柴, 2017, JOURNAL OF JAPAN SOCIETY FOR FUZZY THEORY AND INTELLIGENT INFORMATICS;Dean J, 2008, COMMUNICATIONS OF THE ACM;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Zaharia M, 2012, ;Sutskever I, 2013, ;Dean J, 2012, ;, 2000, APPLIED PHYSICS LETTERS;Team T, 2016, ARXIV (CORNELL UNIVERSITY);Le Q, 2013, ;Hindman B, 2011, UC BERKELEY;Verma A, 2015, ;Karpathy A, 2014, ;Jordan M, 1997, ADVANCES IN PSYCHOLOGY;Chu C, 2007, THE MIT PRESS EBOOKS;Larochelle H, 2009, ;Li M, 2014, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Burrows M, 2006, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Ragan‐Kelley J, 2013, ;Jean S, 2015, ;Hinton G, 1989, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Li M, 2014, ;Yu Y, 2008, ;Chilimbi T, 2014, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Collobert R, 2002, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Smola A, 2010, PROCEEDINGS OF THE VLDB ENDOWMENT;Byrd R, 2012, MATHEMATICAL PROGRAMMING;Cui H, 2016, ;Heigold G, 2013, ;Rossbach C, 2013, ;Chung E, 2013, ;Ovtcharov K, 2015, ;Ioffe S, 2024, ARXIV (CORNELL UNIVERSITY);He K, 2015, ARXIV (CORNELL UNIVERSITY);Chen T, 2015, ARXIV (CORNELL UNIVERSITY);Chetlur S, 2014, ARXIV (CORNELL UNIVERSITY);Krizhevsky A, 2014, ARXIV (CORNELL UNIVERSITY);Józefowicz R, 2016, ARXIV (CORNELL UNIVERSITY);Ba J, 2014, ARXIV (CORNELL UNIVERSITY);Xinghao P, 2017, ARXIV (CORNELL UNIVERSITY);Szegedy C, 2015, ARXIV (CORNELL UNIVERSITY);Nair A, 2015, ARXIV (CORNELL UNIVERSITY);Vinyals O, 2014, ARXIV (CORNELL UNIVERSITY);Dai A, 2015, ARXIV (CORNELL UNIVERSITY);Crankshaw D, 2014, ARXIV (CORNELL UNIVERSITY);Lavin A, 2015, ARXIV (CORNELL UNIVERSITY);Moritz P, 2015, ARXIV (CORNELL UNIVERSITY);Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Jia Y, 2014, ARXIV (CORNELL UNIVERSITY);Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Szegedy C, 2014, ARXIV (CORNELL UNIVERSITY);Niu F, 2011, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Abadi M, 2016, ARXIV (CORNELL UNIVERSITY)","Abadi M, 2016, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W2118020653,10.1145/505282.505283,Machine learning in automated text categorization,2002,en,review,7898,ACM COMPUTING SURVEYS,ACM Computing Surveys,Fabrizio Sebastiani,Fabrizio Sebastiani,"Consiglio Nazionale delle Ricerche, Pisa, Italy","Fabrizio Sebastiani (corresponding author), Consiglio Nazionale delle Ricerche, Pisa, Italy","The automated categorization (or classification) of texts into predefined categories has witnessed a booming interest in the last 10 years, due to the increased availability of documents in digital form and the ensuing need to organize them. In the research community the dominant approach to this problem is based on machine learning techniques: a general inductive process automatically builds a classifier by learning, from a set of preclassified documents, the characteristics of the categories. The advantages of this approach over the knowledge engineering approach (consisting in the manual definition of a classifier by domain experts) are a very good effectiveness, considerable savings in terms of expert labor power, and straightforward portability to different domains. This survey discusses the main approaches to text categorization that fall within the machine learning paradigm. We will discuss in detail issues pertaining to three different problems, namely, document representation, classifier construction, and classifier evaluation.",34,1,1,47,Computer science;Categorization;Software portability;Artificial intelligence;Classifier (UML);Machine learning;Text categorization;Natural language processing,IT,"John G, 1994, ELSEVIER EBOOKS;Lewis D, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Lang K, 1995, ELSEVIER EBOOKS;Cavnar W, 1994, ;Lewis D, 1994, ELSEVIER EBOOKS;Jones K, 1997, MORGAN KAUFMANN PUBLISHERS INC. EBOOKS;Koller D, 1997, ;McCallum A, 1998, ;Klinkenberg R, 2000, ;Cohen W, 1996, ;Lewis D, 1991, SCHOLARWORKS@UMASSAMHERST (UNIVERSITY OF MASSACHUSETTS AMHERST);Cohen W, 1999, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Yang Y, 2002, JOURNAL OF INTELLIGENT INFORMATION SYSTEMS;Scott S, 1999, ;Galavotti L, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Liere R, 1997, ;Caropreso M, 2001, ;Mladenić D, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Weigend A, 1999, INFORMATION RETRIEVAL;Wermter S, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Taira H, 1999, ;Willett P, 1988, ;Roussinov D, 1998, UA CAMPUS REPOSITORY (THE UNIVERSITY OF ARIZONA);Fuhr N, 1991, ;Diao Y, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Escudero G, 2000, EUROPEAN CONFERENCE ON MACHINE LEARNING;Frasconi P, 2002, JOURNAL OF INTELLIGENT INFORMATION SYSTEMS;Myers K, 2000, ;Joachims T, 2002, JOURNAL OF INTELLIGENT INFORMATION SYSTEMS;Forsyth R, 1999, ;Cohen W, 1995, ELSEVIER EBOOKS;, 2002, PROGRAM ELECTRONIC LIBRARY AND INFORMATION SYSTEMS;Lam W, 1997, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Fuhr N, 1985, ;Fuhr N, 1984, INTERNATIONAL ACM SIGIR CONFERENCE ON RESEARCH AND DEVELOPMENT IN INFORMATION RETRIEVAL;Attardi G, 1998, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Hull D, 1997, ;Sable C, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Lim J, 1999, ;Manning C, 1999, ;McCallum A, 2022, ;Roth D, 1998, ARXIV.ORG;Dagan I, 1997, ARXIV.ORG;Rodríguez M, 1997, ARXIV.ORG;Lewis D, 1994, ACM TRANSACTIONS ON OFFICE INFORMATION SYSTEMS;Knorz G, 1982, INTERNATIONAL ACM SIGIR CONFERENCE ON RESEARCH AND DEVELOPMENT IN INFORMATION RETRIEVAL;Fangmeyer H, 1968, IFIP CONGRESS;Díaz A, 1998, ;Lewis D, 1999, INTERNATIONAL ACM SIGIR CONFERENCE ON RESEARCH AND DEVELOPMENT IN INFORMATION RETRIEVAL;Tong R, 1992, TEXT RETRIEVAL CONFERENCE;Salton G, 1988, INFORMATION PROCESSING & MANAGEMENT;Yang Y, 1999, ;Schapire R, 2000, MACHINE LEARNING;Robertson S, 1976, JOURNAL OF THE AMERICAN SOCIETY FOR INFORMATION SCIENCE;Chakrabarti S, 1998, COMPUTER NETWORKS AND ISDN SYSTEMS;Weikum G, 2002, ACM SIGMOD RECORD;Tumer K, 1996, CONNECTION SCIENCE;Maron M, 1961, JOURNAL OF THE ACM;Gale W, 1992, COMPUTERS AND THE HUMANITIES;Schütze H, 1995, ;Ng H, 1997, ;Larkey L, 1996, ;Yang Y, 1994, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Crestani F, 1998, ACM COMPUTING SURVEYS;Schapire R, 1998, ;Masand B, 1992, ;Weiss S, 1999, IEEE INTELLIGENT SYSTEMS AND THEIR APPLICATIONS;Fuhr N, 1991, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Singhal A, 1996, INFORMATION PROCESSING & MANAGEMENT;Yang Y, 1995, ;Lam W, 1998, ;Wong S, 1995, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Hull D, 1994, ;Dörre J, 1999, ;Tzeras K, 1993, ;Fürnkranz J, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Iwayama M, 1995, ;Merkl D, 1998, NEUROCOMPUTING;Cooper W, 1995, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Fuhr N, 1989, INFORMATION PROCESSING & MANAGEMENT;Singhal A, 1997, ;Kim Y, 2000, ;Frasconi P, 2001, ;Biebricher P, 1988, ;Iyer R, 2000, ;Knight K, 1999, COMMUNICATIONS OF THE ACM;Li P, 1996, CONNECTION SCIENCE;Papka R, 1998, ;Yu K, 1998, ;Cooper W, 1991, ;Klingbiel P, 1973, INFORMATION STORAGE AND RETRIEVAL;Field B, 1975, JOURNAL OF DOCUMENTATION;Guthrie L, 1994, ;Sable C, 2000, INTERNATIONAL JOURNAL ON DIGITAL LIBRARIES;Liddy E, 1994, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Klingbiel P, 1973, INFORMATION STORAGE AND RETRIEVAL;Robertson S, 1984, JOURNAL OF DOCUMENTATION;Hoyle W, 1973, INFORMATION STORAGE AND RETRIEVAL;Wong J, 1996, ACM SIGIR FORUM;Cleverdon C, 1984, INFORMATION SERVICES & USE;Domingos P, 1997, MACHINE LEARNING;Nigam K, 2000, MACHINE LEARNING;Joachims T, 1999, ;Yang Y, 1999, INFORMATION RETRIEVAL;Lewis D, 1994, ARXIV (CORNELL UNIVERSITY);Belkin N, 1992, COMMUNICATIONS OF THE ACM;Schütze H, 1998, ;Joachims T, 1997, ;Apté C, 1994, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Saračević T, 1975, JOURNAL OF THE AMERICAN SOCIETY FOR INFORMATION SCIENCE;Dumais S, 2000, ;Chakrabarti S, 1998, ;Baker L, 1998, ;Lewis D, 1992, ;Lewis D, 1996, ;Rijsbergen C, 1977, JOURNAL OF DOCUMENTATION;Androutsopoulos I, 2000, ;Lewis D, 1995, ACM SIGIR FORUM;Li Y, 1998, THE COMPUTER JOURNAL;Nigam K, 1998, ;Larkey L, 1998, ;Chakrabarti S, 1998, THE VLDB JOURNAL;Larkey L, 1999, ;Lam S, 2003, ;Borko H, 1963, JOURNAL OF THE ACM;Creecy R, 1992, COMMUNICATIONS OF THE ACM;Oh H, 2000, ;Attardi G, 1999, CINECA IRIS INSTITUTIAL RESEARCH INFORMATION SYSTEM (UNIVERSITY OF PISA);Hayes P, 2002, ;Amati G, 1999, INFORMATION PROCESSING & MANAGEMENT;Heaps H, 1973, INFORMATION AND CONTROL;Riloff E, 1994, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Cohen W, 1998, ;Hull D, 1996, ;Ruiz M, 1999, ;Gövert N, 1999, ;Ragas H, 1998, ;Li H, 1999, ;Clack C, 1997, ;Hamill K, 1980, JOURNAL OF THE AMERICAN SOCIETY FOR INFORMATION SCIENCE;Fuhr N, 1994, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Moulinier I, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Rau L, 1991, ;Goodman M, 1990, INNOVATIVE APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Junker M, 1998, INTERNATIONAL JOURNAL ON DOCUMENT ANALYSIS AND RECOGNITION (IJDAR);Gray W, 1971, INFORMATION STORAGE AND RETRIEVAL;Brückner T, 1997, TEXT RETRIEVAL CONFERENCE;, 1999, INDUSTRIAL ROBOT THE INTERNATIONAL JOURNAL OF ROBOTICS RESEARCH AND APPLICATION;Deerwester S, 1990, JOURNAL OF THE AMERICAN SOCIETY FOR INFORMATION SCIENCE;Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Salton G, 1975, COMMUNICATIONS OF THE ACM;Yang Y, 1997, ;Drucker H, 1999, IEEE TRANSACTIONS ON NEURAL NETWORKS;Dumais S, 1998, ;Subashini D, 1993, JOURNAL OF COMPUTATIONAL AND APPLIED MATHEMATICS;Willett P, 1988, INFORMATION PROCESSING & MANAGEMENT;Kessler B, 1997, ;Lewis D, 1995, ;Lam W, 1999, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Escudero G, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Raghavan V, 2004, CHAPMAN & HALL/CRC COMPUTER AND INFORMATION SCIENCE SERIES;Robertson S, 1988, TAYLOR GRAHAM PUBLISHING EBOOKS;Riloff E, 1995, ;Ruiz M, 1999, ACM SIGIR FORUM;Moulinier I, 1996, ;Sebastiani F, 2000, ;Denoyer L, 2001, ;Tauritz D, 2000, INFORMATION SCIENCES;Ignatow G, 2017, ;Kessler B, 1997, ARXIV.ORG;Dagan I, 1997, ARXIV (CORNELL UNIVERSITY);Institute N, 2020, DEFINITIONS;Escudero G, 2000, ARXIV.ORG",,,OPENALEX,"Sebastiani F, 2002, ACM COMPUTING SURVEYS","Sebastiani F, 2002, ACM COMPUTING SURVEYS" +https://openalex.org/W1504694836,,Programs for Machine Learning,1994,en,article,5807,,,Steven L. Salzberg;Alberto M. Segre,Steven L. Salzberg;Alberto M. Segre,Johns Hopkins University (,,"Algorithms for constructing decision trees are among the most well known and widely used of all machine learning methods. Among decision tree algorithms, J. Ross Quinlan's ID3 and its successor, C4.5, are probably the most popular in the machine learning community. These algorithms and variations on them have been the subject of numerous research papers since Quinlan introduced ID3. Until recently, most researchers looking for an introduction to decision trees turned to Quinlan's seminal 1986 Machine Learning journal article [Quinlan, 1986]. In his new book, C4.5: Programs for Machine Learning, Quinlan has put together a definitive, much needed description of his complete system, including the latest developments. As such, this book will be a welcome addition to the library of many researchers and students.",,,,,Successor cardinal;Artificial intelligence;Computer science;Decision tree;Machine learning;Subject (documents);ID3 algorithm;Decision tree learning;Incremental decision tree;World Wide Web;Mathematics,US,"Gordon A, 1984, BIOMETRICS;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Quinlan J, 1986, MACHINE LEARNING;Mingers J, 1989, MACHINE LEARNING;Quinlan J, 1989, ELSEVIER EBOOKS;Schaffer C, 1992, ELSEVIER EBOOKS",,,OPENALEX,"Salzberg S, 1994, ","Salzberg S, 1994, " +https://openalex.org/W1601795611,10.1108/03684920710743466,Pattern Recognition and Machine Learning,2007,en,article,8434,KYBERNETES,Kybernetes,,,,,,36,2,275,275,Computer science;Cybernetics;Artificial intelligence;Machine learning,,,,,OPENALEX,"NA, 2007, KYBERNETES","NA, 2007, KYBERNETES" +https://openalex.org/W2271840356,10.48550/arxiv.1603.04467,TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems,2016,en,preprint,9777,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Martı́n Abadi;Ashish Agarwal;Paul Barham;Eugene Brevdo;Zhifeng Chen;Craig Citro;Gregory S. Corrado;Andy Davis;Jay B. Dean;Matthieu Devin;Sanjay Ghemawat;Ian Goodfellow;Andrew Harp;Geoffrey Irving;Michael Isard;Yangqing Jia;Rafał Józefowicz;Łukasz Kaiser;Manjunath Kudlur;Josh Levenberg;Dan Mané;Rajat Monga;Sherry Moore;Derek G. Murray;Chris Olah;Mike Schuster;Jonathon Shlens;Benoit Steiner;Ilya Sutskever;Kunal Talwar;Paul A. Tucker;Vincent Vanhoucke;Vijay Vasudevan;Fernanda Viégas;Oriol Vinyals;Pete Warden;Martin Wattenberg;Martin Wicke;Yuan Yu;Xiaoqiang Zheng,"Abadi, Martín;Agarwal, Ashish;Barham, Paul;Brevdo, Eugene;Chen, Zhifeng;Citro, Craig;Corrado, Greg S.;Davis, Andy;Dean, Jeffrey;Devin, Matthieu;Ghemawat, Sanjay;Goodfellow, Ian;Harp, Andrew;Irving, Geoffrey;Isard, Michael;Jia, Yangqing;Jozefowicz, Rafal;Kaiser, Lukasz;Kudlur, Manjunath;Levenberg, Josh;Mane, Dan;Monga, Rajat;Moore, Sherry;Murray, Derek;Olah, Chris;Schuster, Mike;Shlens, Jonathon;Steiner, Benoit;Sutskever, Ilya;Talwar, Kunal;Tucker, Paul;Vanhoucke, Vincent;Vasudevan, Vijay;Viegas, Fernanda;Vinyals, Oriol;Warden, Pete;Wattenberg, Martin;Wicke, Martin;Yu, Yuan;Zheng, Xiaoqiang",,,"TensorFlow is an interface for expressing machine learning algorithms, and an implementation for executing such algorithms. A computation expressed using TensorFlow can be executed with little or no change on a wide variety of heterogeneous systems, ranging from mobile devices such as phones and tablets up to large-scale distributed systems of hundreds of machines and thousands of computational devices such as GPU cards. The system is flexible and can be used to express a wide variety of algorithms, including training and inference algorithms for deep neural network models, and it has been used for conducting research and for deploying machine learning systems into production across more than a dozen areas of computer science and other fields, including speech recognition, computer vision, robotics, information retrieval, natural language processing, geographic information extraction, and computational drug discovery. This paper describes the TensorFlow interface and an implementation of that interface that we have built at Google. The TensorFlow API and a reference implementation were released as an open-source package under the Apache 2.0 license in November, 2015 and are available at www.tensorflow.org.",,,,,Scale (ratio);Computer science;Artificial intelligence;Machine learning;Distributed computing;Geography;Cartography,,"Hochreiter S, 1997, NEURAL COMPUTATION;Szegedy C, 2015, ;Rumelhart D, 1986, NATURE;Jia Y, 2014, ;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Karpathy A, 2014, ;Zaharia M, 2012, ;Dean J, 2012, ;Isard M, 2007, ;Frome A, 2013, ;Dongarra J, 1990, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Verma A, 2015, ;Vinyals O, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Recht B, 2011, NEURAL INFORMATION PROCESSING SYSTEMS;Ragan‐Kelley J, 2013, ;Ranzato M, 2012, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Murray D, 2013, ;Chilimbi T, 2014, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Collobert R, 2002, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Zeiler M, 2013, ;Arvind A, 1990, IEEE TRANSACTIONS ON COMPUTERS;Yu D, 2014, ;Chambers C, 2010, ;Heigold G, 2013, ;Murray D, 2011, ;Rossbach C, 2013, ;Angelova A, 2015, ;Click C, 1995, ;Gónzalez-Domínguez J, 2014, NEURAL NETWORKS;Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Ioffe S, 2024, ARXIV (CORNELL UNIVERSITY);Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Niu F, 2011, ARXIV (CORNELL UNIVERSITY);Chetlur S, 2014, ARXIV (CORNELL UNIVERSITY);Krizhevsky A, 2014, ARXIV (CORNELL UNIVERSITY);Ba J, 2014, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2013, ARXIV (CORNELL UNIVERSITY);Nair A, 2015, ARXIV (CORNELL UNIVERSITY);Vinyals O, 2014, ARXIV (CORNELL UNIVERSITY);Ramsundar B, 2015, ARXIV (CORNELL UNIVERSITY);Le Q, 2011, ARXIV (CORNELL UNIVERSITY);Maddison C, 2014, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Abadi M, 2016, ARXIV (CORNELL UNIVERSITY)-a","Abadi M, 2016, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W3163993681,10.1038/s42254-021-00314-5,Physics-informed machine learning,2021,en,review,6571,NATURE REVIEWS PHYSICS,Nature Reviews Physics,George Em Karniadakis;Ioannis G. Kevrekidis;Lu Lu;Paris Perdikaris;Sifan Wang;Liu Yang,George Em Karniadakis;Ioannis G. Kevrekidis;Lu Lu;Paris Perdikaris;Sifan Wang;Liu Yang,"Division of Applied Mathematics, Brown University Providence, RI, USA;Department of Applied Mathematics and Statistics, Johns Hopkins University, Baltimore, MD, USA;Department of Chemical and Biomolecular Engineering, Johns Hopkins University, Baltimore, MD, USA;Department of Mathematics, Massachusetts Institute of Technology, Cambridge, MA, USA;Department of Mechanical Engineering and Applied Mechanics, University of Pennsylvania, Philadelphia, PA, USA;Graduate Group in Applied Mathematics and Computational Science, University of Pennsylvania, Philadelphia, PA, USA;Division of Applied Mathematics, Brown University Providence, RI, USA",,,3,6,422,440,Computer science;Artificial intelligence;Machine learning;Multiphysics;Inference;Artificial neural network;Physical law;Field (mathematics);Discretization;Kernel method;Deep learning;Theoretical computer science;Mathematics;Support vector machine;Finite element method,US,"He K, 2016, ;Plimpton S, 1995, JOURNAL OF COMPUTATIONAL PHYSICS;淳司 柴, 2017, JOURNAL OF JAPAN SOCIETY FOR FUZZY THEORY AND INTELLIGENT INFORMATICS;Ganin Y, 2017, ADVANCES IN COMPUTER VISION AND PATTERN RECOGNITION;Abadi M, 2016, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Byrd R, 1995, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Behler J, 2007, PHYSICAL REVIEW LETTERS;Xiu D, 2002, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Brunton S, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;LeCun Y, 1998, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Bronstein M, 2017, IEEE SIGNAL PROCESSING MAGAZINE;Schmidt M, 2009, SCIENCE;Lagaris I, 1998, IEEE TRANSACTIONS ON NEURAL NETWORKS;Zhang L, 2018, PHYSICAL REVIEW LETTERS;Sirignano J, 2018, JOURNAL OF COMPUTATIONAL PHYSICS;Alnæs M, 2015, DEPARTMENT OF EARTH SCIENCES EPRINTS REPOSITORY;Stuart A, 2010, ACTA NUMERICA;Rackauckas C, 2017, JOURNAL OF OPEN RESEARCH SOFTWARE;Bruna J, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Ling J, 2016, JOURNAL OF FLUID MECHANICS;Lusch B, 2018, NATURE COMMUNICATIONS;Kimeldorf G, 1970, THE ANNALS OF MATHEMATICAL STATISTICS;Hart J, 2006, EARTH-SCIENCE REVIEWS;Jasak H, 2007, ;Hachmann J, 2011, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Mallat S, 2016, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Weinan E, 2017, COMMUNICATIONS IN MATHEMATICS AND STATISTICS;Raissi M, 2017, JOURNAL OF COMPUTATIONAL PHYSICS;Poggio T, 2017, INTERNATIONAL JOURNAL OF AUTOMATION AND COMPUTING;Blum A, 1992, NEURAL NETWORKS;Marzouk Y, 2006, JOURNAL OF COMPUTATIONAL PHYSICS;Lee J, 2016, CONFERENCE ON LEARNING THEORY;Micchelli C, 1977, ;Raissi M, 2017, JOURNAL OF COMPUTATIONAL PHYSICS;Raissi M, 2018, SIAM JOURNAL ON SCIENTIFIC COMPUTING;González‐García R, 1998, COMPUTERS & CHEMICAL ENGINEERING;Diaconis P, 1988, ;Owhadi H, 2015, MULTISCALE MODELING AND SIMULATION;Stoudenmire E, 2016, NEURAL INFORMATION PROCESSING SYSTEMS;Owhadi H, 2006, COMMUNICATIONS ON PURE AND APPLIED MATHEMATICS;McFall K, 2009, IEEE TRANSACTIONS ON NEURAL NETWORKS;Rico-Martı́nez R, 1992, CHEMICAL ENGINEERING COMMUNICATIONS;Larkin F, 1972, ROCKY MOUNTAIN JOURNAL OF MATHEMATICS;Beidokhti R, 2009, JOURNAL OF THE FRANKLIN INSTITUTE;Rovelli C, 2011, FOUNDATIONS OF PHYSICS;Rico-Martı́nez R, 2002, ;Reisert M, 2007, ;Choromanska A, 2014, ARXIV (CORNELL UNIVERSITY);Kondor R, 2018, ARXIV (CORNELL UNIVERSITY);Kadri H, 2015, ARXIV (CORNELL UNIVERSITY);Zhu J, 2017, ;Raissi M, 2018, JOURNAL OF COMPUTATIONAL PHYSICS;Reichstein M, 2019, NATURE;Han J, 2018, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Belkin M, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Zhu Y, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Bianco S, 2018, IEEE ACCESS;Meng X, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Mei S, 2018, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Zhang D, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Xu Z, 2020, COMMUNICATIONS IN COMPUTATIONAL PHYSICS;Iten R, 2020, PHYSICAL REVIEW LETTERS;Tripathy R, 2018, JOURNAL OF COMPUTATIONAL PHYSICS;Yang Y, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Geneva N, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Pun G, 2019, NATURE COMMUNICATIONS;Kurth T, 2018, ;Gardner J, 2018, NEURAL INFORMATION PROCESSING SYSTEMS;Poole B, 2016, ARXIV (CORNELL UNIVERSITY);Frostig R, 2018, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Reddy D, 2018, MODELING EARTH SYSTEMS AND ENVIRONMENT;Geiger M, 2020, JOURNAL OF STATISTICAL MECHANICS THEORY AND EXPERIMENT;Wu J, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Karumuri S, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Grohs P, 2023, MEMOIRS OF THE AMERICAN MATHEMATICAL SOCIETY;He J, 2020, JOURNAL OF COMPUTATIONAL MATHEMATICS;Geiger M, 2019, PHYSICAL REVIEW. E;He J, 2019, SCIENCE CHINA MATHEMATICS;Winovich N, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Owhadi H, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Pang G, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Hy T, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Spigler S, 2019, JOURNAL OF PHYSICS A MATHEMATICAL AND THEORETICAL;Yang Y, 2019, COMPUTATIONAL MECHANICS;Yang G, 2017, NEURAL INFORMATION PROCESSING SYSTEMS;Kemeth F, 2018, IEEE ACCESS;Tai K, 2019, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Winkens J, 2018, ;Huang D, 2019, ARXIV (CORNELL UNIVERSITY);Gal Y, 2015, ARXIV (CORNELL UNIVERSITY);Brock A, 2018, ARXIV (CORNELL UNIVERSITY);Jacot A, 2018, ARXIV (CORNELL UNIVERSITY);Cohen T, 2019, ARXIV (CORNELL UNIVERSITY);Rahaman N, 2018, ARXIV (CORNELL UNIVERSITY);Erichson N, 2019, ARXIV (CORNELL UNIVERSITY);Mattheakis M, 2019, ARXIV (CORNELL UNIVERSITY);Xu K, 2019, ARXIV (CORNELL UNIVERSITY);Jayanta M, 2021, DROPS (SCHLOSS DAGSTUHL – LEIBNIZ CENTER FOR INFORMATICS);Lu L, 2021, NATURE MACHINE INTELLIGENCE;Yu L, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Raissi M, 2020, SCIENCE;He X, 2020, KNOWLEDGE-BASED SYSTEMS;Jagtap A, 2020, COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING;Yang L, 2020, JOURNAL OF COMPUTATIONAL PHYSICS;Newman D, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY;Elsken T, 2019, ARXIV (CORNELL UNIVERSITY);Kharazmi E, 2020, COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING;Kissas G, 2019, COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING;Alber M, 2019, NPJ DIGITAL MEDICINE;Tartakovsky A, 2020, WATER RESOURCES RESEARCH;Pfau D, 2020, PHYSICAL REVIEW RESEARCH;Yang L, 2020, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Haghighat E, 2020, COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING;Lu L, 2020, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Zhang D, 2020, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Hospedales T, 2021, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Jagtap A, 2020, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Wang W, 2019, NPJ COMPUTATIONAL MATERIALS;Xu K, 2021, JOURNAL OF COMPUTATIONAL PHYSICS;Sheng H, 2020, JOURNAL OF COMPUTATIONAL PHYSICS;Nakata A, 2020, THE JOURNAL OF CHEMICAL PHYSICS;Chen F, 2020, THE JOURNAL OF OPEN SOURCE SOFTWARE;Owhadi H, 2019, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Fan D, 2019, SCIENCE ROBOTICS;Darbon J, 2020, JOURNAL OF COMPUTATIONAL PHYSICS;Basri R, 2019, ARXIV (CORNELL UNIVERSITY);Novak R, 2020, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Jia J, 2019, ARXIV (CORNELL UNIVERSITY);Jo H, 2020, NETWORKS AND HETEROGENEOUS MEDIA;Tsai Y, 2019, ;Owhadi H, 2019, NOTICES OF THE AMERICAN MATHEMATICAL SOCIETY;Bettencourt J, 2019, ;Paszke A, 2019, ARXIV (CORNELL UNIVERSITY);Rackauckas C, 2020, RESEARCH SQUARE;Kharazmi E, 2019, ARXIV (CORNELL UNIVERSITY);Wang S, 2020, ARXIV (CORNELL UNIVERSITY);Jia X, 2020, ARXIV (CORNELL UNIVERSITY);Koryagin A, 2019, ARXIV (CORNELL UNIVERSITY);Xu K, 2020, ARXIV (CORNELL UNIVERSITY);Xu K, 2020, ARXIV (CORNELL UNIVERSITY);Xu K, 2019, ARXIV (CORNELL UNIVERSITY);Kunitski M, 2019, GSI REPOSITORY (GERMAN FEDERAL GOVERNMENT);Lagaris I, 1998, ;Jagtap A, 2020, COMMUNICATIONS IN COMPUTATIONAL PHYSICS;N. K, 2019, REPEC: RESEARCH PAPERS IN ECONOMICS;Kashefi A, 2021, PHYSICS OF FLUIDS;Shin Y, 2020, COMMUNICATIONS IN COMPUTATIONAL PHYSICS;Jin P, 2020, NEURAL NETWORKS;Liu Z, 2020, COMMUNICATIONS IN COMPUTATIONAL PHYSICS;Dong S, 2021, JOURNAL OF COMPUTATIONAL PHYSICS;Sirignano J, 2020, JOURNAL OF COMPUTATIONAL PHYSICS;Tancik M, 2020, NEURAL INFORMATION PROCESSING SYSTEMS;Mu S, 2019, UNIVERSITY OF REGENSBURG PUBLICATION SERVER (UNIVERSITY OF REGENSBURG);Wang S, 2020, JOURNAL OF COMPUTATIONAL PHYSICS;Mathews A, 2021, PHYSICAL REVIEW. E;Cai W, 2020, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Li D, 2020, WATER RESOURCES RESEARCH;Reyes B, 2021, PHYSICAL REVIEW FLUIDS;Lagari P, 2020, INTERNATIONAL JOURNAL OF ARTIFICIAL INTELLIGENCE TOOLS;Wang B, 2020, COMMUNICATIONS IN COMPUTATIONAL PHYSICS;Li Z, 2020, CALTECHAUTHORS (CALIFORNIA INSTITUTE OF TECHNOLOGY);Arbabi H, 2020, JOM;Guo M, 2022, JOURNAL OF ENGINEERING MECHANICS;Nelsen N, 2021, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Mishra S, 2020, ARXIV (CORNELL UNIVERSITY);Tancik M, 2020, ARXIV (CORNELL UNIVERSITY);Pun G, , REPEC: RESEARCH PAPERS IN ECONOMICS;Wujie W, 2019, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Wang S, 2020, ARXIV (CORNELL UNIVERSITY);Wight C, 2020, ARXIV (CORNELL UNIVERSITY);Mishra S, 2020, ARXIV (CORNELL UNIVERSITY);Mao Z, 2020, ARXIV (CORNELL UNIVERSITY);Xu K, 2020, ARXIV (CORNELL UNIVERSITY);Owhadi H, 2020, CALTECHAUTHORS (CALIFORNIA INSTITUTE OF TECHNOLOGY);Pfrommer S, 2020, ARXIV (CORNELL UNIVERSITY);Rotskoff G, 2020, ARXIV (CORNELL UNIVERSITY);Xu K, 2020, ARXIV (CORNELL UNIVERSITY);Lee J, 2020, ARXIV (CORNELL UNIVERSITY);Wang S, 2021, COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING;Cai S, 2021, JOURNAL OF FLUID MECHANICS;Sard A, 1963, MATHEMATICAL SURVEYS AND MONOGRAPHS;Jia W, 2020, ;Li Z, 2021, CALTECHAUTHORS (CALIFORNIA INSTITUTE OF TECHNOLOGY);Cai S, 2021, JOURNAL OF COMPUTATIONAL PHYSICS;, 2017, AUERBACH PUBLICATIONS EBOOKS;Lanthaler S, 2022, TRANSACTIONS OF MATHEMATICS AND ITS APPLICATIONS;Hennigh O, 2021, LECTURE NOTES IN COMPUTER SCIENCE;Patel R, 2021, JOURNAL OF COMPUTATIONAL PHYSICS;Mao Z, 2021, JOURNAL OF COMPUTATIONAL PHYSICS;Hamzi B, 2021, PHYSICA D NONLINEAR PHENOMENA;Yang Y, 2020, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Yang L, 2022, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Manickam I, 2021, ;Zhu W, 2020, AGU FALL MEETING ABSTRACTS;Hennigh O, 2020, ARXIV (CORNELL UNIVERSITY);Deng B, 2021, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Karniadakis G, 2021, NATURE REVIEWS PHYSICS","Karniadakis G, 2021, NATURE REVIEWS PHYSICS" +https://openalex.org/W1534477342,10.1007/3-540-45014-9_1,Ensemble Methods in Machine Learning,2000,en,book-chapter,7848,LECTURE NOTES IN COMPUTER SCIENCE,Lecture notes in computer science,Thomas G. Dietterich,Thomas G. Dietterich,"Oregon State University, Corvallis, Oregon, USA;Oregon State University>","Thomas G. Dietterich (corresponding author), Oregon State University, Corvallis, Oregon, USA; Oregon State University>",,,,1,15,Computer science;Overfitting;Ensemble learning;Boosting (machine learning);AdaBoost;Artificial intelligence;Machine learning;Classifier (UML);Bayesian probability;Pattern recognition (psychology);Artificial neural network,US,"Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Breiman L, 1996, MACHINE LEARNING;, 1996, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Freund Y, 1996, ;Hansen L, 1990, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Dietterich T, 2000, MACHINE LEARNING;Dietterich T, 1995, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Bauer E, 1999, MACHINE LEARNING;Schapire R, 1998, ;Hornik K, 1990, NEURAL NETWORKS;Neal R, 2011, ;Hyafil L, 1976, INFORMATION PROCESSING LETTERS;Tumer K, 1996, CONNECTION SCIENCE;Schapire R, 1997, QUT EPRINTS (QUEENSLAND UNIVERSITY OF TECHNOLOGY);Fisher D, 1997, ;Kolen J, 1990, ;Schapire R, 1997, ;Ali K, 1994, MACHINE LEARNING;Raviv Y, 1996, CONNECTION SCIENCE;Ali K, 1996, MACHINE LEARNING;Kwok S, 1990, MACHINE INTELLIGENCE AND PATTERN RECOGNITION;Parmanto B, 1995, ",,,OPENALEX,"Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE","Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE" +https://openalex.org/W2559394418,10.1038/nature23474,Quantum machine learning,2017,en,article,4437,NATURE,Nature,Jacob Biamonte;Péter Wittek;Nicola Pancotti;Patrick Rebentrost;Nathan Wiebe;Seth Lloyd,Jacob Biamonte;Peter Wittek;Nicola Pancotti;Patrick Rebentrost;Nathan Wiebe;Seth Lloyd,"Institute for Quantum Computing, University of Waterloo, Waterloo, N2L 3G1 Ontario, Canada;Quantum Complexity Science Initiative, Skolkovo Institute of Science and Technology, Skoltech Building 3, Moscow 143026, Russia;Quantum Complexity Science Initiative, Skolkovo Institute of Science and Technology, Skoltech Building 3, Moscow, 143026, Russia;ICFO-The Institute of Photonic Sciences, Castelldefels, Barcelona 08860 Spain;ICFO—The Institute of Photonic Sciences, Castelldefels, 08860, Barcelona, Spain;Max Planck Institute of Quantum Optics, 1 Hans-Kopfermannstrasse, D-85748 Garching, Germany;Max Planck Institute of Quantum Optics, 1 Hans-Kopfermannstrasse, Garching, D-85748, Germany;Massachusetts Institute of Technology, Research Laboratory of Electronics, Cambridge, Massachusetts 02139, USA;Massachusetts Institute of Technology, Research Laboratory of Electronics, Cambridge, 02139, Massachusetts, USA;Station Q Quantum Architectures and Computation Group, Microsoft Research, Redmond, Washington 98052, USA;Station Q Quantum Architectures and Computation Group, Microsoft Research, Redmond, 98052, Washington, USA;Massachusetts Institute of Technology, Department of Mechanical Engineering, Cambridge, Massachusetts 02139, USA;Department of Mechanical Engineering, Massachusetts Institute of Technology, Cambridge, 02139, Massachusetts, USA","Jacob Biamonte (corresponding author), Institute for Quantum Computing, University of Waterloo, Waterloo, N2L 3G1 Ontario, Canada; Quantum Complexity Science Initiative, Skolkovo Institute of Science and Technology, Skoltech Building 3, Moscow 143026, Russia; Quantum Complexity Science Initiative, Skolkovo Institute of Science and Technology, Skoltech Building 3, Moscow, 143026, Russia",,549,7671,195,202,Quantum machine learning;Computer science;Quantum;Software;Field (mathematics);Quantum computer;Artificial intelligence;Computer engineering;Programming language;Physics;Mathematics,RU;ES;DE;US,"Nielsen M, 2002, AMERICAN JOURNAL OF PHYSICS;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Harrow A, 2009, PHYSICAL REVIEW LETTERS;Lloyd S, 1996, SCIENCE;Rebentrost P, 2014, PHYSICAL REVIEW LETTERS;Lloyd S, 2014, NATURE PHYSICS;Schuld M, 2014, CONTEMPORARY PHYSICS;Giovannetti V, 2008, PHYSICAL REVIEW LETTERS;Brunner D, 2013, NATURE COMMUNICATIONS;Rønnow T, 2014, SCIENCE;Wiebe N, 2012, PHYSICAL REVIEW LETTERS;Aaronson S, 2015, NATURE PHYSICS;Wittek P, 2014, ;Temme K, 2011, NATURE;Dolde F, 2014, NATURE COMMUNICATIONS;Clader B, 2013, PHYSICAL REVIEW LETTERS;Ventura D, 2000, INFORMATION SCIENCES;Li Z, 2015, PHYSICAL REVIEW LETTERS;Wiebe N, 2014, PHYSICAL REVIEW LETTERS;Zeidler D, 2001, PHYSICAL REVIEW A;Cai X, 2015, PHYSICAL REVIEW LETTERS;Paparo G, 2014, PHYSICAL REVIEW X;Hentschel A, 2010, PHYSICAL REVIEW LETTERS;Biamonte J, 2008, PHYSICAL REVIEW A;Bisio A, 2010, PHYSICAL REVIEW A;Zahedinejad E, 2015, PHYSICAL REVIEW LETTERS;Low G, 2014, PHYSICAL REVIEW A;Anguita D, 2003, NEURAL NETWORKS;Lovett N, 2013, PHYSICAL REVIEW LETTERS;Sasaki M, 2001, PHYSICAL REVIEW A;Sentís G, 2012, SCIENTIFIC REPORTS;Whitfield J, 2012, EUROPHYSICS LETTERS (EPL);Neigovzen R, 2009, PHYSICAL REVIEW A;Wiebe N, 2015, NEW JOURNAL OF PHYSICS;Karimi K, 2011, QUANTUM INFORMATION PROCESSING;Dunjko V, 2015, NEW JOURNAL OF PHYSICS;Sentís G, 2015, EPJ QUANTUM TECHNOLOGY;Tezak N, 2015, EPJ QUANTUM TECHNOLOGY;Pons M, 2007, PHYSICAL REVIEW LETTERS;Freitas N, 2011, ;Lloyd S, 2016, NEW JOURNAL OF PHYSICS;Amstrup B, 1995, THE JOURNAL OF PHYSICAL CHEMISTRY;Farhi E, 2014, ARXIV (CORNELL UNIVERSITY);Lloyd S, 2013, ARXIV (CORNELL UNIVERSITY);Hermans M, 2015, ARXIV (CORNELL UNIVERSITY);Denchev V, 2012, ARXIV (CORNELL UNIVERSITY);Denchev V, 2015, ARXIV (CORNELL UNIVERSITY);Scherer A, 2015, ARXIV (CORNELL UNIVERSITY);LeCun Y, 2015, NATURE;Vapnik V, 1995, ;Law J, 2001, ACM SIGSOFT SOFTWARE ENGINEERING NOTES;Carleo G, 2017, SCIENCE;Le Q, 2013, ;Carrasquilla J, 2017, NATURE PHYSICS;Dunjko V, 2016, PHYSICAL REVIEW LETTERS;Serge G, 1996, ARXIV.ORG;Broecker P, 2017, SCIENTIFIC REPORTS;Schuld M, 2016, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Wan K, 2017, NPJ QUANTUM INFORMATION;Lloyd S, 2016, NATURE COMMUNICATIONS;Schuld M, 2017, EUROPHYSICS LETTERS (EPL);Wossnig L, 2018, PHYSICAL REVIEW LETTERS;Benedetti M, 2016, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Granade C, 2012, NEW JOURNAL OF PHYSICS;Kieferová M, 2017, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Aı̈meur E, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Yung M, 2012, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Rebentrost P, 2018, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Arunachalam S, 2015, NEW JOURNAL OF PHYSICS;Zhao Z, 2019, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Lau H, 2017, PHYSICAL REVIEW LETTERS;Palittapongarnpim P, 2017, NEUROCOMPUTING;Lamata L, 2017, SCIENTIFIC REPORTS;and A, 2017, QUANTUM INFORMATION AND COMPUTATION;Kimmel S, 2017, NPJ QUANTUM INFORMATION;Zahedinejad E, 2016, PHYSICAL REVIEW APPLIED;August M, 2017, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;O’Gorman B, 2015, THE EUROPEAN PHYSICAL JOURNAL SPECIAL TOPICS;Mavadia S, 2017, NATURE COMMUNICATIONS;Scherer A, 2017, QUANTUM INFORMATION PROCESSING;Banchi L, 2016, NPJ QUANTUM INFORMATION;Heras U, 2016, PHYSICAL REVIEW LETTERS;Alvarez-Rodriguez U, 2017, SCIENTIFIC REPORTS;Sentís G, 2017, ARXIV (CORNELL UNIVERSITY);Monràs A, 2017, PHYSICAL REVIEW LETTERS;Dumoulin V, 2014, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Wittek P, 2017, SCIENTIFIC REPORTS;Bisio A, 2011, PHYSICS LETTERS A;Somma R, 2016, BULLETIN OF THE AMERICAN PHYSICAL SOCIETY;Palittapongarnpim P, 2016, KTH PUBLICATION DATABASE DIVA (KTH ROYAL INSTITUTE OF TECHNOLOGY);Alvarez-Rodriguez U, 2016, ARXIV (CORNELL UNIVERSITY);Lu D, 2017, ARXIV (CORNELL UNIVERSITY);Adachi S, 2015, ARXIV (CORNELL UNIVERSITY);Kerenidis I, 2016, ARXIV (CORNELL UNIVERSITY);Adcock J, 2015, ARXIV (CORNELL UNIVERSITY);Marvian I, 2016, ARXIV (CORNELL UNIVERSITY);Dridi R, 2015, ARXIV (CORNELL UNIVERSITY);Schuld M, 2017, ARXIV (CORNELL UNIVERSITY);Vapnik V, 2000, ;Shor P, 1997, SIAM JOURNAL ON COMPUTING;Schuld M, 2014, ;Kerenidis I, 2017, DR-NTU (NANYANG TECHNOLOGICAL UNIVERSITY);Wiebe N, 2015, QUANTUM INFORMATION AND COMPUTATION;Wittek P, 2014, ELSEVIER EBOOKS;Wiebe N, 2016, NEURAL INFORMATION PROCESSING SYSTEMS;Banchi L, 2016, MPG.PURE (MAX PLANCK SOCIETY);Tiersch M, 2015, SCIENTIFIC REPORTS;Unai Á, 2018, LA REFERENCIA (RED FEDERADA DE REPOSITORIOS INSTITUCIONALES DE PUBLICACIONES CIENTÍFICAS);M F, 2013, OXFORD UNIVERSITY RESEARCH ARCHIVE (ORA) (UNIVERSITY OF OXFORD);Chatterjee R, 2017, QUANTUM INFORMATION AND COMPUTATION;Seth L, 2016, RWTH PUBLICATIONS (RWTH AACHEN);Kulchytskyy B, 2016, BULLETIN OF THE AMERICAN PHYSICAL SOCIETY;Adcock J, , BRISTOL RESEARCH (UNIVERSITY OF BRISTOL);Arunachalam S, 2017, ARXIV (CORNELL UNIVERSITY);Low G, 2014, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY)",,,OPENALEX,"Biamonte J, 2017, NATURE","Biamonte J, 2017, NATURE" +https://openalex.org/W2912213068,10.1145/3298981,Federated Machine Learning,2019,en,article,5867,ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY,ACM Transactions on Intelligent Systems and Technology,Qiang Yang;Yang Liu;Tianjian Chen;Yongxin Tong,Qiang Yang;Yang Liu;Tianjian Chen;Yongxin Tong,"Hong Kong University of Science and Technology, Hong Kong, China;Webank, Shenzhen, China;Webank, Shenzhen, China;Beihang University, Beijing, China",,"Today’s artificial intelligence still faces two major challenges. One is that, in most industries, data exists in the form of isolated islands. The other is the strengthening of data privacy and security. We propose a possible solution to these challenges: secure federated learning. Beyond the federated-learning framework first proposed by Google in 2016, we introduce a comprehensive secure federated-learning framework, which includes horizontal federated learning, vertical federated learning, and federated transfer learning. We provide definitions, architectures, and applications for the federated-learning framework, and provide a comprehensive survey of existing works on this subject. In addition, we propose building data networks among organizations based on federated mechanisms as an effective solution to allowing knowledge to be shared without compromising user privacy.",10,2,1,19,Computer science;Federated learning;Transfer of learning;Artificial intelligence;Data science;Computer security,HK;CN,"Pan S, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Silver D, 2016, NATURE;Sweeney L, 2002, INTERNATIONAL JOURNAL OF UNCERTAINTY FUZZINESS AND KNOWLEDGE-BASED SYSTEMS;Abadi M, 2016, ;Goldreich O, 1987, ;Bonawitz K, 2017, ;Dwork C, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Agrawal R, 2000, ACM SIGMOD RECORD;Yao A, 1982, FOUNDATIONS OF COMPUTER SCIENCE;Shokri R, 2015, ;Mohassel P, 2017, ;Rivest R, 1978, ;Dowlin N, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Hitaj B, 2017, ;Acar A, 2018, ACM COMPUTING SURVEYS;Vaidya J, 2002, ;Clifton C, 2010, ;Liu J, 2017, ;Bogdanov D, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Vaidya J, 2003, ;Ho Q, 2013, PUBMED;Song S, 2013, ;Nikolaenko V, 2013, ;Araki T, 2016, ;Du W, 2004, ;Du W, 2002, SYRACUSE UNIVERSITY LIBRARIES (SYRACUSE UNIVERSITY);Du W, 2005, ;Zhang Q, 2015, IEEE TRANSACTIONS ON COMPUTERS;Vaidya J, 2004, ;Yuan J, 2013, IEEE TRANSACTIONS ON PARALLEL AND DISTRIBUTED SYSTEMS;Yu H, 2006, ;Yu H, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Scannapieco M, 2007, ;Aono Y, 2016, ;Sanil A, 2004, ;Karr A, 2009, ;Hall R, 2011, ;Mohassel P, 2015, ;Bahmani R, 2017, LECTURE NOTES IN COMPUTER SCIENCE;McMahan H, 2017, ;Vaidya J, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Wan L, 2007, ;Liang G, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Faltings B, 2017, SYNTHESIS LECTURES ON ARTIFICIAL INTELLIGENCE AND MACHINE LEARNING;Konečný J, 2016, ARXIV (CORNELL UNIVERSITY);McMahan H, 2016, ARXIV (CORNELL UNIVERSITY);Hesamifard E, 2017, ARXIV (CORNELL UNIVERSITY);Chabanne H, 2017, IACR CRYPTOLOGY EPRINT ARCHIVE;Smith V, 2017, ARXIV (CORNELL UNIVERSITY);, 2007, THE MIT PRESS EBOOKS;Leen T, 2000, ;Agrawal R, 2000, ;Phong L, 2017, IEEE TRANSACTIONS ON INFORMATION FORENSICS AND SECURITY;Mohassel P, 2018, ;Riazi M, 2018, ;Bourse F, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Kim M, 2018, JMIR MEDICAL INFORMATICS;Wang S, 2018, ;Rouhani B, 2018, 2018 55TH ACM/ESDA/IEEE DESIGN AUTOMATION CONFERENCE (DAC);Kim H, 2018, ;Melis L, 2018, ARXIV (CORNELL UNIVERSITY);Chen F, 2018, ;Cheng Y, 2019, ;Bourse F, 2018, IACR CRYPTOLOGY EPRINT ARCHIVE;Faltings B, 2017, SYNTHESIS LECTURES ON ARTIFICIAL INTELLIGENCE AND MACHINE LEARNING;Zhao Y, 2018, ARXIV (CORNELL UNIVERSITY);Elke E, 2019, SGEM INTERNATIONAL MULTIDISCIPLINARY SCIENTIFIC CONFERENCES ON SOCIAL SCIENCES AND ARTS;Bagdasaryan E, 2018, ARXIV (CORNELL UNIVERSITY);Lin Y, 2017, ARXIV (CORNELL UNIVERSITY);Geyer R, 2017, ARXIV (CORNELL UNIVERSITY);Hardy S, 2017, ARXIV (CORNELL UNIVERSITY);Hitaj B, 2017, ARXIV (CORNELL UNIVERSITY);Gascón A, 2016, IACR CRYPTOLOGY EPRINT ARCHIVE;, 1999, ;Nock R, 2018, ARXIV (CORNELL UNIVERSITY);Kilbertus N, 2025, WARWICK RESEARCH ARCHIVE PORTAL (UNIVERSITY OF WARWICK);Su L, 2018, ARXIV (CORNELL UNIVERSITY);Giacomelli I, 2017, IACR CRYPTOLOGY EPRINT ARCHIVE",,,OPENALEX,"Yang Q, 2019, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY","Yang Q, 2019, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY" +https://openalex.org/W2402144811,10.5555/3026877.3026899,TensorFlow: a system for large-scale machine learning,2016,en,article,6353,OPERATING SYSTEMS DESIGN AND IMPLEMENTATION,Operating Systems Design and Implementation,Martı́n Abadi;Paul Barham;Jianmin Chen;Zhifeng Chen;Andy Davis;Jay B. Dean;Matthieu Devin;Sanjay Ghemawat;Geoffrey Irving;Michael Isard;Manjunath Kudlur;Josh Levenberg;Rajat Monga;Sherry Moore;Derek G. Murray;Benoit Steiner;Paul A. Tucker;Vijay Vasudevan;Pete Warden;Martin Wicke;Yuan Yu;Xiaoqiang Zheng,Martı́n Abadi;Paul Barham;Jianmin Chen;Zhifeng Chen;Andy Davis;Jay B. Dean;Matthieu Devin;Sanjay Ghemawat;Geoffrey Irving;Michael Isard;Manjunath Kudlur;Josh Levenberg;Rajat Monga;Sherry Moore;Derek G. Murray;Benoit Steiner;Paul A. Tucker;Vijay Vasudevan;Pete Warden;Martin Wicke;Yuan Yu;Xiaoqiang Zheng,[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain];[Google Brain],,"TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. Tensor-Flow uses dataflow graphs to represent computation, shared state, and the operations that mutate that state. It maps the nodes of a dataflow graph across many machines in a cluster, and within a machine across multiple computational devices, including multicore CPUs, general-purpose GPUs, and custom-designed ASICs known as Tensor Processing Units (TPUs). This architecture gives flexibility to the application developer: whereas in previous parameter server designs the management of shared state is built into the system, TensorFlow enables developers to experiment with novel optimizations and training algorithms. TensorFlow supports a variety of applications, with a focus on training and inference on deep neural networks. Several Google services use TensorFlow in production, we have released it as an open-source project, and it has become widely used for machine learning research. In this paper, we describe the TensorFlow dataflow model and demonstrate the compelling performance that TensorFlow achieves for several real-world applications.",,,265,283,Dataflow;Computer science;Artificial intelligence;Multi-core processor;Machine learning;Computer architecture;Deep learning;Scalability;Inference;Artificial neural network;Dataflow architecture;Computation;Distributed computing;Parallel computing;Programming language;Operating system,US,"He K, 2016, ;Hochreiter S, 1997, NEURAL COMPUTATION;Szegedy C, 2015, ;Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Rumelhart D, 1986, NATURE;Mnih V, 2015, NATURE;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Duchi J, 2010, ;Karpathy A, 2014, ;Zaharia M, 2012, ;Sutskever I, 2013, ;Dayalan M, 2018, INTERNATIONAL JOURNAL OF RESEARCH AND ENGINEERING;Dean J, 2012, ;Isard M, 2007, ;, 2000, APPLIED PHYSICS LETTERS;Team T, 2016, ARXIV (CORNELL UNIVERSITY);Verma A, 2015, ;Jordan M, 1997, ADVANCES IN PSYCHOLOGY;Larochelle H, 2009, ;Li M, 2014, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Lavin A, 2016, ;Jean S, 2015, ;Hinton G, 1989, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Yu Y, 2008, ;Ranzato M, 2012, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Chilimbi T, 2014, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Collobert R, 2002, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Smola A, 2010, PROCEEDINGS OF THE VLDB ENDOWMENT;Zeiler M, 2013, ;Byrd R, 2012, MATHEMATICAL PROGRAMMING;Cui H, 2016, ;Heigold G, 2013, ;McSherry F, 2015, ;Rossbach C, 2013, ;Chung E, 2013, ;Chelba C, 2014, ;Angelova A, 2015, ;Gónzalez-Domínguez J, 2014, NEURAL NETWORKS;Murray D, 2016, COMMUNICATIONS OF THE ACM;Wu Y, 2016, ARXIV (CORNELL UNIVERSITY);Pascanu R, 2012, ARXIV (CORNELL UNIVERSITY);Chen T, 2015, ARXIV (CORNELL UNIVERSITY);Chetlur S, 2014, ARXIV (CORNELL UNIVERSITY);Mnih V, 2014, ARXIV (CORNELL UNIVERSITY);Krizhevsky A, 2014, ARXIV (CORNELL UNIVERSITY);Józefowicz R, 2016, ARXIV (CORNELL UNIVERSITY);Xinghao P, 2017, ARXIV (CORNELL UNIVERSITY);Nair A, 2015, ARXIV (CORNELL UNIVERSITY);Maddison C, 2014, ARXIV (CORNELL UNIVERSITY);Ioffe S, 2024, ARXIV (CORNELL UNIVERSITY);Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Niu F, 2011, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Abadi M, 2016, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION","Abadi M, 2016, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION" +https://openalex.org/W2155653793,10.1016/s0031-3203(96)00142-2,The use of the area under the ROC curve in the evaluation of machine learning algorithms,1997,en,article,7227,PATTERN RECOGNITION,Pattern Recognition,Andrew P. Bradley,Andrew P. Bradley,"Cooperative Research Centre for Sensor Signal and Information Processing, Department of Electrical and Computer Engineering, The University of Queensland, QLD 4072, Australia","Andrew P. Bradley (corresponding author), Cooperative Research Centre for Sensor Signal and Information Processing, Department of Electrical and Computer Engineering, The University of Queensland, QLD 4072, Australia",,30,7,1145,1159,Algorithm;Receiver operating characteristic;Machine learning;Artificial intelligence;Perceptron;Computer science;Discriminant;Multilayer perceptron;Mathematics;Artificial neural network,AU,"Gordon A, 1984, BIOMETRICS;Quinlan J, 1992, ;Hanley J, 1982, RADIOLOGY;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Efron B, 1979, THE ANNALS OF STATISTICS;Fukunaga K, 1990, ELSEVIER EBOOKS;Green D, 1966, ;Ingleby J, 1967, JOURNAL OF SOUND AND VIBRATION;Hanley J, 1983, RADIOLOGY;Fukunaga K, 1990, ACADEMIC PRESS PROFESSIONAL, INC. EBOOKS;, 2009, ;Devijver P, 1982, PRENTICE-HALL INTERNATIONAL EBOOKS;Beckman R, 1973, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Mullin A, 1963, AMERICAN MATHEMATICAL MONTHLY;Nguyen D, 1990, ;Swets J, 1982, ;Ziegel E, 2000, TECHNOMETRICS;Kavzoğlu T, 2024, ;Wolberg W, 1990, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Loader C, 1999, STATISCTICS AND COMPUTING/STATISTICS AND COMPUTING;Weiss S, 1991, ;Dorfman D, 1969, JOURNAL OF MATHEMATICAL PSYCHOLOGY;Smith J, 1988, PUBMED CENTRAL;Moore B, 1982, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Swets J, 1979, INVESTIGATIVE RADIOLOGY;Detrano R, 1989, THE AMERICAN JOURNAL OF CARDIOLOGY;Gennari J, 1989, ARTIFICIAL INTELLIGENCE;Robinson J, 1990, TECHNOMETRICS;Schaffer C, 1993, MACHINE LEARNING;Picinbono B, 1995, IEEE TRANSACTIONS ON AEROSPACE AND ELECTRONIC SYSTEMS;Ivakhnenko A, 1976, CYBERNETICS AND SYSTEMS ANALYSIS;, 1989, CHOICE REVIEWS ONLINE;Schaffer C, 1993, MACHINE LEARNING;Friedman J, 1994, ;Cherkassky V, 1996, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Nelson L, 1994, JOURNAL OF QUALITY TECHNOLOGY;Walker R, 2002, ;Ray M, 1994, THE ANNALS OF THORACIC SURGERY;Lovell B, 1996, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Raubertas R, 1994, MEDICAL DECISION MAKING;Bradley A, 2002, ;Rumelhart D, 1985, ;Gennari J, 1988, ;Twomey J, 2002, IEEE INTERNATIONAL CONFERENCE ON NEURAL NETWORKS",,,OPENALEX,"Bradley A, 1997, PATTERN RECOGNITION","Bradley A, 1997, PATTERN RECOGNITION" +https://openalex.org/W1506806321,,Pattern Recognition and Machine Learning (Information Science and Statistics),2006,en,book,8357,SPRINGER EBOOKS,Springer eBooks,Chris Bishop,Chris Bishop,,"Chris Bishop (corresponding author), ",,,,,,Artificial intelligence;Computer science;Statistics;Pattern recognition (psychology);Machine learning;Mathematics,,,,,OPENALEX,"Bishop C, 2006, SPRINGER EBOOKS","Bishop C, 2006, SPRINGER EBOOKS" +https://openalex.org/W2009086942,10.1198/tech.2007.s518,Pattern Recognition and Machine Learning,2007,en,article,4652,TECHNOMETRICS,Technometrics,Radford M. Neal,Radford M Neal,University of Toronto,"Radford M Neal (corresponding author), University of Toronto","(2007). Pattern Recognition and Machine Learning. Technometrics: Vol. 49, No. 3, pp. 366-366.",49,3,366,366,Artificial intelligence;Computer science;Machine learning;Pattern recognition (psychology),CA,,,,OPENALEX,"Neal R, 2007, TECHNOMETRICS","Neal R, 2007, TECHNOMETRICS" +https://openalex.org/W2131241448,10.48550/arxiv.1206.2944,Practical Bayesian Optimization of Machine Learning Algorithms,2012,en,preprint,5669,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Jasper Snoek;Hugo Larochelle;Ryan P. Adams,"Snoek, Jasper;Larochelle, Hugo;Adams, Ryan P.","[Department of Computer Science, University of Toronto];Department of Computer Science, University of Sherbrooke,;School of Engineering & Applied Sciences Harvard University",,"Machine learning algorithms frequently require careful tuning of model hyperparameters, regularization terms, and optimization parameters. Unfortunately, this tuning is often a ""black art"" that requires expert experience, unwritten rules of thumb, or sometimes brute-force search. Much more appealing is the idea of developing automatic approaches which can optimize the performance of a given learning algorithm to the task at hand. In this work, we consider the automatic tuning problem within the framework of Bayesian optimization, in which a learning algorithm's generalization performance is modeled as a sample from a Gaussian process (GP). The tractable posterior distribution induced by the GP leads to efficient use of the information gathered by previous experiments, enabling optimal choices about what parameters to try next. Here we show how the effects of the Gaussian process prior and the associated inference procedure can have a large impact on the success or failure of Bayesian optimization. We show that thoughtful choices can lead to results that exceed expert-level performance in tuning machine learning algorithms. We also describe new algorithms that take into account the variable cost (duration) of learning experiments and that can leverage the presence of multiple cores for parallel experimentation. We show that these proposed algorithms improve on previous automatic procedures and can reach or surpass human expert-level optimization on a diverse set of contemporary algorithms including latent Dirichlet allocation, structured SVMs and convolutional neural networks.",,,,,Bayesian optimization;Computer science;Bayesian probability;Machine learning;Artificial intelligence;Optimization algorithm;Algorithm;Mathematical optimization;Mathematics,CA;US,"Rasmussen C, 2005, THE MIT PRESS EBOOKS;Bergstra J, 2012, ;Kennedy M, 2001, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Cireşan D, 2012, ;Bergstra J, 2011, ;Hutter F, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Jones D, 2001, JOURNAL OF GLOBAL OPTIMIZATION;Hoffman M, 2010, ;Kushner H, 1964, JOURNAL OF BASIC ENGINEERING;Kumar M, 2010, NEURAL INFORMATION PROCESSING SYSTEMS;Bonilla E, 2007, EDINBURGH RESEARCH EXPLORER;Yu C, 2009, ;Saxe A, 2011, ;Bull A, 2011, JOURNAL OF MACHINE LEARNING RESEARCH;Teh Y, 2005, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Mahendran N, 2012, ;Miller K, 2012, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Krizhevsky A, 2024, ;Brochu E, 2010, ARXIV (CORNELL UNIVERSITY);Srinivas N, 2009, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Coates A, 2024, ;Murray I, 2010, ARXIV (CORNELL UNIVERSITY);Ginsbourger D, 2011, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE)",,,OPENALEX,"Snoek J, 2012, ARXIV (CORNELL UNIVERSITY)","Snoek J, 2012, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W2884430236,10.1038/s41586-018-0337-2,Machine learning for molecular and materials science,2018,en,review,4527,NATURE,Nature,Keith T. Butler;Daniel W. Davies;Hugh Cartwright;Olexandr Isayev;Aron Walsh,Keith T. Butler;Daniel W. Davies;Hugh Cartwright;Olexandr Isayev;Aron Walsh,"ISIS Facility, Rutherford Appleton Laboratory, Harwell Campus, Harwell, UK;Department of Chemistry, University of Bath, Bath, UK;Department of Chemistry, Oxford University, Oxford, UK;Eshelman School of Pharmacy, University of North Carolina at Chapel Hill, Chapel Hill, NC, USA. olexandr@olexandrisayev.com;Eshelman School of Pharmacy, University of North Carolina at Chapel Hill, Chapel Hill, NC, USA;Department of Materials Science and Engineering, Yonsei University, Seoul, South Korea. a.walsh@imperial.ac.uk;Department of Materials, Imperial College London, London, UK. a.walsh@imperial.ac.uk;Department of Materials Science and Engineering, Yonsei University, Seoul, South Korea","Aron Walsh (corresponding author), Department of Materials Science and Engineering, Yonsei University, Seoul, South Korea. a.walsh@imperial.ac.uk; Department of Materials, Imperial College London, London, UK. a.walsh@imperial.ac.uk; Department of Materials Science and Engineering, Yonsei University, Seoul, South Korea",,559,7715,547,555,Computer science;Field (mathematics);Data science;Characterization (materials science);Domain (mathematical analysis);Artificial intelligence;Cognitive science;Nanotechnology;Machine learning;Psychology;Materials science,GB;US;KR,"Kohn W, 1965, PHYSICAL REVIEW;Schmidhuber J, 2014, NEURAL NETWORKS;Jain A, 2013, APL MATERIALS;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Sterling T, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Harrow A, 2009, PHYSICAL REVIEW LETTERS;Bartók A, 2010, PHYSICAL REVIEW LETTERS;Lake B, 2015, SCIENCE;Schmidt M, 2009, SCIENCE;Drori I, 2022, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Saal J, 2013, JOM;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Hansch C, 1964, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Curtarolo S, 2013, NATURE MATERIALS;Tropsha A, 2010, MOLECULAR INFORMATICS;Dirac P, 1929, PROCEEDINGS OF THE ROYAL SOCIETY OF LONDON SERIES A CONTAINING PAPERS OF A MATHEMATICAL AND PHYSICAL CHARACTER;Lejaeghere K, 2016, SCIENCE;Aspuru‐Guzik A, 2005, SCIENCE;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;Hand D, 2001, INTERNATIONAL STATISTICAL REVIEW;Pilania G, 2013, SCIENTIFIC REPORTS;Fourches D, 2010, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Billinge S, 2007, SCIENCE;Corey E, 1969, SCIENCE;Hachmann J, 2011, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Snyder J, 2012, PHYSICAL REVIEW LETTERS;Havu V, 2009, JOURNAL OF COMPUTATIONAL PHYSICS;Hautier G, 2010, CHEMISTRY OF MATERIALS;Schütt K, 2014, PHYSICAL REVIEW B;Calderón C, 2015, COMPUTATIONAL MATERIALS SCIENCE;Faber F, 2016, PHYSICAL REVIEW LETTERS;Shakhnarovich G, 2005, ;Kalinin S, 2015, NATURE MATERIALS;Handley C, 2010, THE JOURNAL OF PHYSICAL CHEMISTRY A;Franceschetti A, 1999, NATURE;Christensen R, 2015, CATALYSIS SCIENCE & TECHNOLOGY;Fleuren W, 2015, METHODS;Kireeva N, 2012, MOLECULAR INFORMATICS;Pople J, 1999, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Bonchev D, 1994, MEDICAL ENTOMOLOGY AND ZOOLOGY;Albuquerque V, 2008, NONDESTRUCTIVE TESTING AND EVALUATION;Wicker J, 2014, CRYSTENGCOMM;Kühn C, 1996, THE JOURNAL OF PHYSICAL CHEMISTRY;Walsh A, 2015, NATURE CHEMISTRY;, 2013, ;Oprea T, 2006, DRUG DISCOVERY TODAY TECHNOLOGIES;Kiselyova N, 1998, JOURNAL OF ALLOYS AND COMPOUNDS;Duvenaud D, 2015, ARXIV (CORNELL UNIVERSITY);Jastrzȩbski S, 2016, ARXIV (CORNELL UNIVERSITY);Biamonte J, 2017, NATURE;Smith J, 2017, CHEMICAL SCIENCE;Segler M, 2018, NATURE;Raccuglia P, 2016, NATURE;, 2021, ENCYCLOPEDIA OF THE UN SUSTAINABLE DEVELOPMENT GOALS;Rudy S, 2017, SCIENCE ADVANCES;Wellendorff J, 2012, PHYSICAL REVIEW B;Carrasquilla J, 2017, NATURE PHYSICS;Steane A, 1998, REPORTS ON PROGRESS IN PHYSICS;Agrawal A, 2016, APL MATERIALS;Mardirossian N, 2016, THE JOURNAL OF CHEMICAL PHYSICS;Gómez‐Bombarelli R, 2016, NATURE MATERIALS;Altae-Tran H, 2017, ACS CENTRAL SCIENCE;Reiher M, 2017, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Behler J, 2017, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Faber F, 2017, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Isayev O, 2017, NATURE COMMUNICATIONS;Szymkuć S, 2016, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Segler M, 2017, CHEMISTRY - A EUROPEAN JOURNAL;Brockherde F, 2017, NATURE COMMUNICATIONS;Liu B, 2017, ACS CENTRAL SCIENCE;Pulido A, 2017, NATURE;Kim E, 2017, CHEMISTRY OF MATERIALS;Dunjko V, 2016, PHYSICAL REVIEW LETTERS;Ward L, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Oliynyk A, 2016, CHEMISTRY OF MATERIALS;Klucznik T, 2018, CHEM;Duan Y, 2017, NEURAL INFORMATION PROCESSING SYSTEMS;Hill J, 2016, MRS BULLETIN;Davies D, 2016, CHEM;Pensak D, 1977, ACS SYMPOSIUM SERIES;Legrain F, 2017, THE JOURNAL OF PHYSICAL CHEMISTRY B;Ziatdinov M, 2017, NPJ COMPUTATIONAL MATERIALS;Dragone V, 2017, NATURE COMMUNICATIONS;Wilkinson K, 2014, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Norbert J, 2011, STUDIES IN COMPUTATIONAL INTELLIGENCE;Seko A, 2018, ;Arita M, 2014, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Coudert F, 2017, CHEMISTRY OF MATERIALS;Tetko I, 2016, MOLECULAR INFORMATICS;Cole J, 2016, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE CRYSTAL ENGINEERING AND MATERIALS;Boyd D, 2013, ACS SYMPOSIUM SERIES;Moot T, 2016, MATERIALS DISCOVERY;Pillong M, 2017, CRYSTENGCOMM;Trabesinger A, 2017, NATURE;Hui Y, 2019, ADVANCES IN INTELLIGENT SYSTEMS AND COMPUTING;Guimaraes G, 2017, ARXIV (CORNELL UNIVERSITY);Nam J, 2016, ARXIV (CORNELL UNIVERSITY);Calderón C, 2015, ARXIV (CORNELL UNIVERSITY);F B, 2017, MPG.PURE (MAX PLANCK SOCIETY);R.B. M, 1993, JOURNAL OF MOLECULAR STRUCTURE THEOCHEM;Rokach L, 2009, ",,,OPENALEX,"Butler K, 2018, NATURE","Butler K, 2018, NATURE" +https://openalex.org/W2913668833,,Proceedings of the 25th international conference on Machine learning,2008,en,article,5550,,,William W. Cohen;Andrew McCallum;Sam T. Roweis,William W. Cohen;Andrew McCallum;Sam T. Roweis,"Carnegie Mellon University;University of Massachusetts, Amherst;University of Toronto and Google#TAB#",,"This volume contains the papers accepted to the 25th International Conference on Machine Learning (ICML 2008). ICML is the annual conference of the International Machine Learning Society (IMLS), and provides a venue for the presentation and discussion of current research in the field of machine learning. These proceedings can also be found online at http://www.machinelearning.org. + +This year, ICML was held July 5..9 at the University of Helsinki, in Helsinki, Finland, and was co-located with COLT-2008, the 21st Annual Conference on Computational Learning Theory, and UAI-2008, the 24th Conference on Uncertainty in Artificial Intelligence. No less than 583 papers were submitted to ICML 2008. There was a very thorough review process, in which each paper was reviewed double-blind by three program committee (PC) members. Authors were able to respond to the initial reviews, and the PC members could then modify their reviews based on online discussions and the content of this author response. There were two discussion periods led by the senior program committee (SPC), one just before and one after the submission of author responses. At the end of the second discussion period, the SPC members gave their recommendations and provided a summary review for each of their papers. Some papers were checked by the SPCs to ensure that reviewer comments had been addressed. Apart from the length restrictions on papers and the compressed time frame, the review process for ICML resembles that of many journal publications. In total, 158 papers were accepted to ICML this year, including a small number of papers which were initially conditionally accepted, yielding an overall acceptance rate of 27%. + +ICML authors presented their papers both orally and in a poster session, allowing time for detailed discussions with any interested attendees of the conference. Each day of the main conference included one or two invited talks by a prominent researcher. We were very fortunate to be able to host Michael Collins, of the Massachusetts Institute of Technology; Andrew Ng, of Stanford University; and Luc De Raedt, of the Katholieke Universiteit Leuven, and John Winn of Microsoft Research Cambridge. In addition to the technical talks, ICML- 2008 also included nine tutorials held before the main conference, presented by Alex Smola, Arthur Gretton, and Kenji Fukumizu; Bert Kappen and Marc Toussaint; Neil Lawrence; MartinWainwright; Ralf Herbrich and Thore Graepel; Andreas Krause and Carlos Guestrin; Shai Shalev-Shwartz and Yoram Singer; Rob Fergus; and Matthias Seeger. This year our workshops were organized jointly with COLT and UAI as part of a special overlap day, consisting of eleven workshops selected and arranged collaboratively by the respective workshop chairs of the three conferences. This day provided a rich opportunity for interaction among the attendees of the conferences. + +This year, ICML enlarged its award offerings to match several other well-established conferences. We hope these will help build our community, celebrate our advances, and encourage applications and long-term thinking. In addition to our previously traditional Paper and Student Paper awards, we also gave awards for Application Paper and 10-year Best Paper (for the best paper of ICML 1998, optionally given in conjunction with a co-located conference). We thank the Machine Learning Journal for sponsoring some of our paper awards.",,,,,Presentation (obstetrics);Library science;Computer science;Medical education;Artificial intelligence;Medicine,US;CA,,,,OPENALEX,"Cohen W, 2008, ","Cohen W, 2008, " +https://openalex.org/W2934399013,10.1056/nejmra1814259,Machine Learning in Medicine,2019,en,review,3966,NEW ENGLAND JOURNAL OF MEDICINE,New England Journal of Medicine,Alvin Rajkomar;Jay B. Dean;Isaac S. Kohane,Alvin Rajkomar;Jeffrey Dean;Isaac Kohane,"From Google, Mountain View, CA (A.R., J.D.) and the Department of Biomedical Informatics, Harvard Medical School, Boston (I.K.);Google, Mountain View, CA;From Google, Mountain View, CA (A.R., J.D.) and the Department of Biomedical Informatics, Harvard Medical School, Boston (I.K.);Google, Mountain View, CA;From Google, Mountain View, CA (A.R., J.D.) and the Department of Biomedical Informatics, Harvard Medical School, Boston (I.K.);Department of Biomedical Informatics, Harvard Medical School, Boston",,"Interview with Dr. Isaac Kohane on machine learning in medicine. (16:31)Download In this view of the future of medicine, patient–provider interactions are informed and supported by massive amounts of data from interactions with similar patients. These data are collected and curated to provide the latest evidence-based assessment and recommendations.",380,14,1347,1358,Download;Computer science;Data science;Medicine;Medical education;Artificial intelligence;World Wide Web,US,"Esteva A, 2017, NATURE;Gulshan V, 2016, JAMA;Haynes A, 2009, NEW ENGLAND JOURNAL OF MEDICINE;Kermany D, 2018, CELL;Bejnordi B, 2017, JAMA;Rajkomar A, 2018, NPJ DIGITAL MEDICINE;Ting D, 2017, JAMA;Beam A, 2018, JAMA;Poplin R, 2018, NATURE BIOMEDICAL ENGINEERING;Halevy A, 2009, IEEE INTELLIGENT SYSTEMS;Castelvecchi D, 2016, NATURE;Sinsky C, 2016, ANNALS OF INTERNAL MEDICINE;Bates D, 2014, HEALTH AFFAIRS;Cabitza F, 2017, JAMA;Arndt B, 2017, THE ANNALS OF FAMILY MEDICINE;Mandel J, 2016, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Frieden T, 2017, NEW ENGLAND JOURNAL OF MEDICINE;Muntner P, 2014, JAMA;Krumholz H, 2014, HEALTH AFFAIRS;Hersh W, 2013, MEDICAL CARE;Krause J, 2018, OPHTHALMOLOGY;Elmore J, 2017, BMJ;Jong M, 2017, THE LANCET;Lyell D, 2016, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Schneeweiß S, 2014, NEW ENGLAND JOURNAL OF MEDICINE;Mandl K, 2001, BMJ;Cohen I, 2014, HEALTH AFFAIRS;Lindenauer P, 2012, JAMA;Kesselheim A, 2011, HEALTH AFFAIRS;Obermeyer Z, 2017, NEW ENGLAND JOURNAL OF MEDICINE;Erickson S, 2017, ANNALS OF INTERNAL MEDICINE;Schwartz W, 1970, NEW ENGLAND JOURNAL OF MEDICINE;Hill R, 2013, THE AMERICAN JOURNAL OF EMERGENCY MEDICINE;Amarasingham R, 2014, HEALTH AFFAIRS;Mamykina L, 2016, ACADEMIC MEDICINE;Shuren J, 2016, JAMA;Fiore L, 2016, NEW ENGLAND JOURNAL OF MEDICINE;Bakris G, 2018, NEW ENGLAND JOURNAL OF MEDICINE;Escobar G, 2016, JOURNAL OF HOSPITAL MEDICINE;McGlynn E, 2015, JAMA;Mandl K, 2016, NEW ENGLAND JOURNAL OF MEDICINE;Sniderman A, 2015, JAMA;Steinhubl S, 2018, THE LANCET;Schuster M, 2017, JAMA;Slack W, 1966, NEW ENGLAND JOURNAL OF MEDICINE;Oxentenko A, 2010, ARCHIVES OF INTERNAL MEDICINE;Reis B, 2009, BMJ;Gabriels K, 2018, JOURNAL OF MEDICAL INTERNET RESEARCH;Sittig D, 2015, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Rajkomar A, 2016, JMIR MEDICAL INFORMATICS;LeCun Y, 2015, NATURE;, 1996, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Topol E, 2018, NATURE MEDICINE;Fauw J, 2018, NATURE MEDICINE;Alvin R, 2018, ;Ford I, 2016, NEW ENGLAND JOURNAL OF MEDICINE;Gianfrancesco M, 2018, JAMA INTERNAL MEDICINE;Rajkomar A, 2018, ANNALS OF INTERNAL MEDICINE;Chilamkurthy S, 2018, THE LANCET;Wang P, 2019, GUT;Hinton G, 2018, JAMA;Grinfeld J, 2018, NEW ENGLAND JOURNAL OF MEDICINE;Denis F, 2019, JAMA;Tison G, 2018, JAMA CARDIOLOGY;Mori Y, 2018, ANNALS OF INTERNAL MEDICINE;Steiner D, 2018, THE AMERICAN JOURNAL OF SURGICAL PATHOLOGY;Liu Y, 2018, ARCHIVES OF PATHOLOGY & LABORATORY MEDICINE;Morawski K, 2018, JAMA INTERNAL MEDICINE;Howe J, 2018, JAMA;Fraser H, 2018, THE LANCET;Kale M, 2018, BMJ;Das J, 2018, BMJ;Schwartz W, 1987, NEW ENGLAND JOURNAL OF MEDICINE;Galloway C, 2018, JOURNAL OF THE AMERICAN COLLEGE OF CARDIOLOGY;Berwick D, 2018, JAMA;Auerbach A, 2018, ANNALS OF INTERNAL MEDICINE;Kannan A, 2018, ;Lee V, 2018, JAMA",,,OPENALEX,"Rajkomar A, 2019, NEW ENGLAND JOURNAL OF MEDICINE","Rajkomar A, 2019, NEW ENGLAND JOURNAL OF MEDICINE" +https://openalex.org/W2149684865,10.1007/bfb0026683,Text categorization with Support Vector Machines: Learning with many relevant features,1998,en,book-chapter,7995,LECTURE NOTES IN COMPUTER SCIENCE,Lecture notes in computer science,Thorsten Joachims,Thorsten Joachims,"Universität Dortmund, Inforinatik LS8, Baroper Str. 301, 44221, Dortmund, Germany","Thorsten Joachims (corresponding author), Universität Dortmund, Inforinatik LS8, Baroper Str. 301, 44221, Dortmund, Germany",,,,137,142,Support vector machine;Computer science;Machine learning;Artificial intelligence;Categorization;Text categorization;Task (project management);Variety (cybernetics);Empirical research;Relevance vector machine;Mathematics,DE,"Vapnik V, 1995, ;Cortes C, 1995, MACHINE LEARNING;Quinlan J, 1992, ;Salton G, 1988, INFORMATION PROCESSING & MANAGEMENT;Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Salzberg S, 1994, ;Yang Y, 1997, ;Rocchio J, 1971, MEDICAL ENTOMOLOGY AND ZOOLOGY;Vapnik V, 1995, MEDICAL ENTOMOLOGY AND ZOOLOGY;Yang Y, 1999, INFORMATION RETRIEVAL;Joachims T, 1997, ;Kivinen J, 1997, ARTIFICIAL INTELLIGENCE;Kivinen J, 1995, ",,,OPENALEX,"Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE","Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE" +https://openalex.org/W1485009520,10.48550/arxiv.1506.04214,Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting,2015,en,preprint,6648,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Xingjian Shi;Zhourong Chen;Hao Wang;Dit‐Yan Yeung;Wai Kin Wong;Wang‐chun Woo,"Shi, Xingjian;Chen, Zhourong;Wang, Hao;Yeung, Dit-Yan;Wong, Wai-kin;Woo, Wang-chun","Department of Computer Science and Engineering Hong Kong University of Science and Technology;Department of Computer Science and Engineering Hong Kong University of Science and Technology;Department of Computer Science and Engineering Hong Kong University of Science and Technology;Department of Computer Science and Engineering Hong Kong University of Science and Technology;Hong Kong Observatory Hong Kong, China;Hong Kong Observatory Hong Kong, China",,"The goal of precipitation nowcasting is to predict the future rainfall intensity in a local region over a relatively short period of time. Very few previous studies have examined this crucial and challenging weather forecasting problem from the machine learning perspective. In this paper, we formulate precipitation nowcasting as a spatiotemporal sequence forecasting problem in which both the input and the prediction target are spatiotemporal sequences. By extending the fully connected LSTM (FC-LSTM) to have convolutional structures in both the input-to-state and state-to-state transitions, we propose the convolutional LSTM (ConvLSTM) and use it to build an end-to-end trainable model for the precipitation nowcasting problem. Experiments show that our ConvLSTM network captures spatiotemporal correlations better and consistently outperforms FC-LSTM and the state-of-the-art operational ROVER algorithm for precipitation nowcasting.",,,,,Nowcasting;Computer science;Convolutional neural network;Artificial intelligence;Precipitation;State (computer science);Machine learning;Perspective (graphical);Pattern recognition (psychology);Algorithm;Meteorology;Geography,HK;CN,"Hochreiter S, 1997, NEURAL COMPUTATION;Long J, 2015, ;Cho K, 2014, ;Goodfellow I, 2016, MIT PRESS EBOOKS;Donahue J, 2015, ;Karpathy A, 2015, ;Elena L, 2013, DROPS (SCHLOSS DAGSTUHL – LEIBNIZ CENTER FOR INFORMATICS);Brox T, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Sun J, 2013, BULLETIN OF THE AMERICAN METEOROLOGICAL SOCIETY;Germann U, 2002, MONTHLY WEATHER REVIEW;Bridson R, 2011, ;Bridson R, 2015, ;Klein B, 2015, ;Sakaino H, 2012, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Woo W, 2014, 27TH CONFERENCE ON SEVERE LOCAL STORMS;Douglas R, 1990, AMERICAN METEOROLOGICAL SOCIETY EBOOKS;Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Xu K, 2015, ARXIV (CORNELL UNIVERSITY);Pascanu R, 2012, ARXIV (CORNELL UNIVERSITY);Cho K, 2014, ARXIV (CORNELL UNIVERSITY);Long J, 2014, ARXIV (CORNELL UNIVERSITY);Srivastava N, 2015, ARXIV (CORNELL UNIVERSITY);Donahue J, 2014, ;Bastien F, 2012, ARXIV (CORNELL UNIVERSITY);Ranzato M, 2014, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Shi X, 2015, ARXIV (CORNELL UNIVERSITY)","Shi X, 2015, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W1502922572,10.1007/978-3-540-28650-9_4,Gaussian Processes in Machine Learning,2004,en,book-chapter,5117,LECTURE NOTES IN COMPUTER SCIENCE,Lecture notes in computer science,Carl Edward Rasmussen,Carl Edward Rasmussen,"Max Planck Institute for Biological Cybernetics, 72076, Tübingen, Germany","Carl Edward Rasmussen (corresponding author), Max Planck Institute for Biological Cybernetics, 72076, Tübingen, Germany",,,,63,71,Computer science;Hyperparameter;Gaussian process;Focus (optics);Machine learning;Artificial intelligence;Process (computing);Gaussian;Marginal likelihood;Simple (philosophy);Kriging;Marginal distribution;Algorithm;Mathematical optimization;Random variable;Statistics;Mathematics,DE,"Jordan M, 1998, ;Williams C, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Csató L, 2002, NEURAL COMPUTATION;Williams C, 1998, ",,,OPENALEX,"Rasmussen C, 2004, LECTURE NOTES IN COMPUTER SCIENCE","Rasmussen C, 2004, LECTURE NOTES IN COMPUTER SCIENCE" +https://openalex.org/W114517082,10.1007/978-3-7908-2604-3_16,Large-Scale Machine Learning with Stochastic Gradient Descent,2010,en,book-chapter,5628,,,Léon Bottou,Léon Bottou,"NEC Labs America, Princeton, NJ, 08542, USA","Léon Bottou (corresponding author), NEC Labs America, Princeton, NJ, 08542, USA","During the last decade, the data sizes have grown faster than the speed of processors. In this context, the capabilities of statistical machine learning methods is limited by the computing time rather than the sample size. A more precise analysis uncovers qualitatively different tradeoffs for the case of small-scale and large-scale learning problems. The large-scale case involves the computational complexity of the underlying optimization algorithm in non-trivial ways. Unlikely optimization algorithms such as stochastic gradient descent show amazing performance for large-scale problems. In particular, second order stochastic gradient and averaged stochastic gradient are asymptotically efficient after a single pass on the training set.",,,177,186,Stochastic gradient descent;Computer science;Scale (ratio);Stochastic optimization;Gradient descent;Set (abstract data type);Online machine learning;Context (archaeology);Sample (material);Artificial intelligence;Algorithm;Mathematical optimization;Machine learning;Mathematics;Active learning (machine learning);Artificial neural network,US,"Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Cortes C, 1995, MACHINE LEARNING;MacQueen J, 1967, DEFENSE TECHNICAL INFORMATION CENTER (DTIC);, 2007, THE MIT PRESS EBOOKS;Lafferty J, 2001, SCHOLARLY COMMONS (UNIVERSITY OF PENNSYLVANIA);Dennis J, 1996, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Vapnik V, 2015, ;Lewis D, 2004, GOLDSMITHS (UNIVERSITY OF LONDON);Vapnik V, 1971, THEORY OF PROBABILITY AND ITS APPLICATIONS;Joachims T, 2006, ;Polyak B, 1992, SIAM JOURNAL ON CONTROL AND OPTIMIZATION;Bottou L, 2011, THE MIT PRESS EBOOKS;Tsybakov A, 2004, THE ANNALS OF STATISTICS;Bordes A, 2009, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Lin C, 2007, ;Shalev‐Shwartz S, 2008, ;Massart P, 2000, ANNALES DE LA FACULTÉ DES SCIENCES DE TOULOUSE MATHÉMATIQUES;Bottou L, 2005, APPLIED STOCHASTIC MODELS IN BUSINESS AND INDUSTRY;Lee W, 1998, IEEE TRANSACTIONS ON INFORMATION THEORY;Bousquet O, 2002, OPENGREY (INSTITUT DE L'INFORMATION SCIENTIFIQUE ET TECHNIQUE);Murata N, 1999, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Rumelhart D, 1985, ;Widrow B, 1960, ;Xu W, 2011, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Bottou L, 2010, ","Bottou L, 2010, " +https://openalex.org/W2945976633,10.1038/s42256-019-0048-x,Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead,2019,en,article,8965,NATURE MACHINE INTELLIGENCE,Nature Machine Intelligence,Cynthia Rudin,Cynthia Rudin,"Duke University;Duke University, Durham, NC, USA","Cynthia Rudin (corresponding author), Duke University; Duke University, Durham, NC, USA",,1,5,206,215,Black box;Harm;Computer science;Key (lock);Criminal justice;Artificial intelligence;Economic Justice;Machine learning;Data science;Criminology;Psychology;Computer security;Political science;Social psychology;Law,US,"Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Miller G, 1956, PSYCHOLOGICAL REVIEW;Miller G, 1994, PSYCHOLOGICAL REVIEW;Ganin Y, 2017, ADVANCES IN COMPUTER VISION AND PATTERN RECOGNITION;Fayyad U, 1996, ;Goodman B, 2017, AI MAGAZINE;Murdoch W, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Holte R, 1993, MACHINE LEARNING;Zech J, 2018, PLOS MEDICINE;Cowan N, 2010, CURRENT DIRECTIONS IN PSYCHOLOGICAL SCIENCE;Mittelstadt B, 2019, ;Hand D, 2006, STATISTICAL SCIENCE;Freitas A, 2014, ACM SIGKDD EXPLORATIONS NEWSLETTER;Lou Y, 2013, ;Lou Y, 2012, ;Huysmans J, 2010, DECISION SUPPORT SYSTEMS;Li O, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Ustun B, 2017, JAMA PSYCHIATRY;Brennan T, 2008, CRIMINAL JUSTICE AND BEHAVIOR;Flores A, 2016, FEDERAL PROBATION;Razavian N, 2015, BIG DATA;Zeng J, 2016, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Wang F, 2015, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Rudin C, 2018, ARXIV (CORNELL UNIVERSITY);Goodman B, 2016, ARXIV (CORNELL UNIVERSITY);Tollenaar N, 2012, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Rudin C, 2018, INFORMS JOURNAL ON APPLIED ANALYTICS;Gupta M, 2016, JOURNAL OF MACHINE LEARNING RESEARCH;Rudin C, 2020, HARVARD DATA SCIENCE REVIEW;Rüping S, 2006, ;Rudin C, 2010, MACHINE LEARNING;Ustun B, 2017, ;Auer P, 1995, ELSEVIER EBOOKS;Carrizosa E, 2009, INFORMS JOURNAL ON COMPUTING;Wang J, 2018, ;Gallagher N, 2017, NEURAL INFORMATION PROCESSING SYSTEMS;Chang A, 2012, MACHINE LEARNING;Goel A, 2017, AI MAGAZINE;Wang F, 2018, BIOSTATISTICS;Mannshardt E, 2018, SIGNIFICANCE;Neri P, 2018, INFORMS JOURNAL ON APPLIED ANALYTICS;, , VIEW;Chen C, 2018, ARXIV (CORNELL UNIVERSITY);Li O, 2017, ARXIV (CORNELL UNIVERSITY);Chen C, 2018, ARXIV (CORNELL UNIVERSITY);Carrière M, 2017, ARXIV (CORNELL UNIVERSITY);Sokolovska N, 2018, BASE INSTITUTIONNELLE DE RECHERCHE DE L'UNIVERSITÉ PARIS-DAUPHINE (BIRD) (UNIVERSITY PARIS-DAUPHINE);, 2017, AUERBACH PUBLICATIONS EBOOKS",,,OPENALEX,"Rudin C, 2019, NATURE MACHINE INTELLIGENCE","Rudin C, 2019, NATURE MACHINE INTELLIGENCE" +https://openalex.org/W1680797894,10.1007/978-0-387-30164-8,Encyclopedia of Machine Learning,2010,en,book,3463,,,Claude Sammut,"Nijssen, Siegfried",University Of New south Wales,"Nijssen, Siegfried (corresponding author), University Of New south Wales",,,,,,Encyclopedia;Computer science;Artificial intelligence;Data science;Library science,AU,,,,OPENALEX,"Sammut C, 2010, ","Sammut C, 2010, " +https://openalex.org/W2619383789,10.1109/tpami.2018.2798607,Multimodal Machine Learning: A Survey and Taxonomy,2018,en,article,4218,IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE,IEEE Transactions on Pattern Analysis and Machine Intelligence,Tadas Baltrušaitis;Chaitanya Ahuja;Louis‐Philippe Morency,Tadas Baltrusaitis;Chaitanya Ahuja;Louis-Philippe Morency,"Microsoft Corporation, Cambridge, United Kingdom;Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA;Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA",,"Our experience of the world is multimodal - we see objects, hear sounds, feel texture, smell odors, and taste flavors. Modality refers to the way in which something happens or is experienced and a research problem is characterized as multimodal when it includes multiple such modalities. In order for Artificial Intelligence to make progress in understanding the world around us, it needs to be able to interpret such multimodal signals together. Multimodal machine learning aims to build models that can process and relate information from multiple modalities. It is a vibrant multi-disciplinary field of increasing importance and with extraordinary potential. Instead of focusing on specific multimodal applications, this paper surveys the recent advances in multimodal machine learning itself and presents them in a common taxonomy. We go beyond the typical early and late fusion categorization and identify broader challenges that are faced by multimodal machine learning, namely: representation, translation, alignment, fusion, and co-learning. This new taxonomy will enable researchers to better understand the state of the field and identify directions for future research.",41,2,423,443,Multimodal learning;Computer science;Artificial intelligence;Modalities;Taxonomy (biology);Categorization;Multimodality;Field (mathematics);Machine learning;Human–computer interaction;World Wide Web,GB;US,"Glorot X, 2010, ;Zhu Y, 2015, ;Salakhutdinov R, 2009, ;Plummer B, 2015, ;Kalchbrenner N, 2013, ;Hodosh M, 2013, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Andrew G, 2013, ;Ghahramani Z, 1997, MACHINE LEARNING;Yao L, 2015, ;Sutton C, 2007, THE MIT PRESS EBOOKS;Weston J, 2011, ;Pan Y, 2016, ;Wang W, 2015, ;Kumar S, 2012, ;Mitchell M, 2012, CONFERENCE OF THE EUROPEAN CHAPTER OF THE ASSOCIATION FOR COMPUTATIONAL LINGUISTICS;Louwerse M, 2010, TOPICS IN COGNITIVE SCIENCE;Kahou S, 2015, JOURNAL ON MULTIMODAL USER INTERFACES;Martin A, 2014, IT UNIVERSITY OF COPENHAGEN (IT UNIVERSITY OF COPENHAGEN);Li S, 2011, ;Kojima A, 2002, INTERNATIONAL JOURNAL OF COMPUTER VISION;Yang Y, 2011, ;Xu R, 2015, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Castellano G, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Mroueh Y, 2015, ;Glodek M, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Cour T, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Gupta A, 2021, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Morvant E, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Bojanowski P, 2015, ;Feng Y, 2010, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Wei Y, 2016, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Chen Y, 2004, ;Sjölander K, 2003, ;Wu Z, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Naim I, 2014, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Khapra M, 2010, ;Deena S, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Kingma D, 2014, UVA-DARE (UNIVERSITY OF AMSTERDAM);Simonyan K, 2014, ARXIV (CORNELL UNIVERSITY);Ioffe S, 2015, ARXIV (CORNELL UNIVERSITY);Xu K, 2015, ARXIV (CORNELL UNIVERSITY);Chorowski J, 2015, ARXIV (CORNELL UNIVERSITY);Chen X, 2015, ARXIV (CORNELL UNIVERSITY);Kiros R, 2014, ARXIV (CORNELL UNIVERSITY);Mao J, 2014, ARXIV (CORNELL UNIVERSITY);Gao H, 2015, ARXIV (CORNELL UNIVERSITY);Torabi A, 2015, ARXIV (CORNELL UNIVERSITY);McFee B, 2010, ARXIV (CORNELL UNIVERSITY);Christoudias C, 2012, ARXIV (CORNELL UNIVERSITY);Lebret R, 2015, ARXIV (CORNELL UNIVERSITY);Vinyals O, 2015, ;McGurk H, 1976, NATURE;Blum A, 1998, ;Hotelling H, 1936, BIOMETRIKA;Karpathy A, 2015, ;Vedantam R, 2015, ;Antol S, 2015, ;Brown P, 1993, ;Farhadi A, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Atrey P, 2010, MULTIMEDIA SYSTEMS;Kulkarni G, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Vogel S, 1996, ;Juang B, 1991, TECHNOMETRICS;D’Mello S, 2015, ACM COMPUTING SURVEYS;Feng F, 2014, ;Valstar M, 2013, ;Bronstein M, 2010, ;Kim Y, 2013, ;Lienhart R, 1998, PROCEEDINGS OF SPIE, THE INTERNATIONAL SOCIETY FOR OPTICAL ENGINEERING/PROCEEDINGS OF SPIE;Kruskal J, 1983, SIAM REVIEW;Yao B, 2010, PROCEEDINGS OF THE IEEE;Wöllmer M, 2012, IMAGE AND VISION COMPUTING;Ouyang W, 2014, ;Evangelopoulos G, 2013, IEEE TRANSACTIONS ON MULTIMEDIA;Socher R, 2010, ;Li Y, 2014, NEUROCOMPUTING;Zitnick C, 2013, ;Kong C, 2014, ;Huang J, 2013, ;Sargin M, 2007, IEEE TRANSACTIONS ON MULTIMEDIA;Zhou F, 2012, ;Mei H, 2016, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Liu F, 2013, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Rohrbach A, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Chen S, 2015, ;Sarah T, 2012, ;Baltrušaitis T, 2013, ;Chen J, 2014, ;Zen H, 2012, IEEE TRANSACTIONS ON AUDIO SPEECH AND LANGUAGE PROCESSING;Tapaswi M, 2015, ;Arora R, 2013, ;Feng F, 2014, NEUROCOMPUTING;Bourlard H, 2002, ;Yeh Y, 2012, IEEE TRANSACTIONS ON MULTIMEDIA;Noulas A, 2011, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Jiang X, 2014, PATTERN RECOGNITION LETTERS;Haubold A, 2007, ;Tapaswi M, 2014, INTERNATIONAL JOURNAL OF MULTIMEDIA INFORMATION RETRIEVAL;Wu D, 2014, ;Klein B, 2014, ARXIV (CORNELL UNIVERSITY);Hochreiter S, 1997, NEURAL COMPUTATION;Srivastava N, 2014, ;淳司 柴, 2017, JOURNAL OF JAPAN SOCIETY FOR FUZZY THEORY AND INTELLIGENT INFORMATICS;Papineni K, 2001, ;Hinton G, 2006, NEURAL COMPUTATION;Hardoon D, 2004, NEURAL COMPUTATION;Denkowski M, 2014, ;Frome A, 2013, ;Farhadi A, 2009, 2009 IEEE CONFERENCE ON COMPUTER VISION AND PATTERN RECOGNITION;Gönen M, 2011, ;Rasiwasia N, 2010, ;Zen H, 2009, SPEECH COMMUNICATION;Suk H, 2014, NEUROIMAGE;Ordóñez V, 2011, ;James A, 2014, INFORMATION FUSION;Bruni E, 2014, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Carletta J, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Gehler P, 2009, ;Pomianos G, 2003, PROCEEDINGS OF THE IEEE;Bigham J, 2010, ;Lai P, 2000, INTERNATIONAL JOURNAL OF NEURAL SYSTEMS;Quattoni A, 2007, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Regneri M, 2013, TRANSACTIONS OF THE ASSOCIATION FOR COMPUTATIONAL LINGUISTICS;Snoek C, 2004, MULTIMEDIA TOOLS AND APPLICATIONS;Anagnostopoulos C, 2012, ARTIFICIAL INTELLIGENCE REVIEW;Weston J, 2010, MACHINE LEARNING;Coyne B, 2001, ;Bucak S, 2014, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;McKeown G, 2010, ;Schuller B, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Devlin J, 2015, ;Bojanowski P, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Wu Z, 2014, ;Elliott D, 2014, ;Mason R, 2014, ;Ramírez G, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Anderson R, 2013, ;Song Y, 2012, ;Slaney M, 2000, NEURAL INFORMATION PROCESSING SYSTEMS;Lan Z, 2013, MULTIMEDIA TOOLS AND APPLICATIONS;Chang A, 2015, ;Gurban M, 2008, ;Krogel M, 2004, MACHINE LEARNING;Garg A, 2003, PROCEEDINGS OF THE IEEE;Christoudias C, 2006, ;Cosi P, 2002, ;Bahdanau D, 2014, ARXIV (CORNELL UNIVERSITY);Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Socher R, 2013, ARXIV (CORNELL UNIVERSITY);Karpathy A, 2014, ARXIV (CORNELL UNIVERSITY);Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Lowe D, 2004, INTERNATIONAL JOURNAL OF COMPUTER VISION;Lafferty J, 2001, SCHOLARLY COMMONS (UNIVERSITY OF PENNSYLVANIA);Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Graves A, 2013, ;Zeng Z, 2008, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Young P, 2014, TRANSACTIONS OF THE ASSOCIATION FOR COMPUTATIONAL LINGUISTICS;Ngiam J, 2011, ;Hinton G, 2012, ;Lin C, 2003, ;Hunt A, 2002, ;Brand M, 2002, ;Socher R, 2014, TRANSACTIONS OF THE ASSOCIATION FOR COMPUTATIONAL LINGUISTICS;Palatucci M, 2018, FIGSHARE;Soleymani M, 2011, IEEE TRANSACTIONS ON AFFECTIVE COMPUTING;Bregler C, 1997, ;Zhang D, 2014, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Malinowski M, 2015, ;Guadarrama S, 2013, ;Nicolaou M, 2011, IEEE TRANSACTIONS ON AFFECTIVE COMPUTING;Jia X, 2015, ;Harvey D, 2009, ;Bruni E, 2012, INSTITUTIONAL RESEARCH INFORMATION SYSTEM (UNIVERSITÀ DEGLI STUDI DI TRENTO);Кузнецова П, 2012, ;Hendricks L, 2016, ;Elliott D, 2013, ;Kiela D, 2014, ;Левин, 2003, ;Silberer C, 2014, ;Nefian A, 2002, IEEE INTERNATIONAL CONFERENCE ON ACOUSTICS SPEECH AND SIGNAL PROCESSING;Yuhas B, 1989, IEEE COMMUNICATIONS MAGAZINE;Wöllmer M, 2010, ;Zhou F, 2009, ;Thomason J, 2014, ;Sikka K, 2013, ;Qin T, 2008, ;Yu H, 2013, ;Wang D, 2015, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Sarkar A, 2001, ;Song Y, 2012, ;Nakov P, 2012, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Yagcioglu S, 2015, ;Shariat S, 2011, ;Masuko T, 2002, ;Reiter S, 2007, ;Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Simonyan K, 2014, ARXIV (CORNELL UNIVERSITY);Ratnaparkhi A, 2000, ARXIV.ORG;Feichtenhofer C, 2016, ;Chan W, 2016, ;Fukui A, 2016, ;Mao J, 2016, ;Yu L, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Jiang Q, 2017, ;Trigeorgis G, 2016, ;Wang L, 2016, ;Yu H, 2016, ;Poria S, 2015, ;Müller M, 2008, ;Fan Y, 2014, ;Bernardi R, 2016, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Rohrbach A, 2017, INTERNATIONAL JOURNAL OF COMPUTER VISION;, 2009, ENCYCLOPEDIA OF BIOMETRICS;Cao Y, 2016, ;Nojavanasghari B, 2016, ;Shutova E, 2016, ;Srivastava N, 2012, ;Mahasseni B, 2016, ;Rajagopalan S, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Gebru I, 2017, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Baroni M, 2015, LANGUAGE AND LINGUISTICS COMPASS;Lazaridou A, 2014, ;Kiela D, 2015, ;Silberer C, 2012, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Gao H, 2015, ARXIV (CORNELL UNIVERSITY);Kiela D, 2015, ;Trigeorgis G, 2016, ;Jin Q, 2016, ;Song Y, 2016, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Naim I, 2015, ;Anguera X, 2014, ;Moon S, 2014, ;Oord A, 2016, ARXIV (CORNELL UNIVERSITY);Reed S, 2016, ARXIV (CORNELL UNIVERSITY);Oord A, 2016, ARXIV (CORNELL UNIVERSITY);Xiong C, 2016, ARXIV (CORNELL UNIVERSITY);Oord A, 2016, ARXIV (CORNELL UNIVERSITY);Collobert R, 2016, ARXIV (CORNELL UNIVERSITY);Lu J, 2016, ARXIV (CORNELL UNIVERSITY);Jayanta M, 2021, DROPS (SCHLOSS DAGSTUHL – LEIBNIZ CENTER FOR INFORMATICS);Labaca-Castro R, 2023, ;Barsalou L, 2007, ANNUAL REVIEW OF PSYCHOLOGY;Yang Z, 2016, ;, 2007, ;Xu H, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Juang B, 1991, TECHNOMETRICS;Hu R, 2016, ;Xiong C, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Huang T, 2016, ;Zeng K, 2017, ;Torre F, 2011, ;Nefian, 2002, IEEE INTERNATIONAL CONFERENCE ON ACOUSTICS SPEECH AND SIGNAL PROCESSING;McFeeBrian, 2011, JOURNAL OF MACHINE LEARNING RESEARCH;Bourlard H, 1996, 4TH INTERNATIONAL CONFERENCE ON SPOKEN LANGUAGE PROCESSING (ICSLP 1996);Zhang H, 2016, ;Ghahramani Z, 1996, ;Mei H, 2015, ARXIV (CORNELL UNIVERSITY);Klein B, 2014, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Baltrušaitis T, 2018, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE","Baltrušaitis T, 2018, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE" +https://openalex.org/W2177870565,10.1161/circulationaha.115.001593,Machine Learning in Medicine,2015,en,review,3415,CIRCULATION,Circulation,Rahul C. Deo,Rahul C. Deo,"From Cardiovascular Research Institute, Department of Medicine and Institute for Human Genetics, University of California, San Francisco, and California Institute for Quantitative Biosciences, San Francisco","Rahul C. Deo (corresponding author), From Cardiovascular Research Institute, Department of Medicine and Institute for Human Genetics, University of California, San Francisco, and California Institute for Quantitative Biosciences, San Francisco","Spurred by advances in processing power, memory, storage, and an unprecedented wealth of data, computers are being asked to tackle increasingly complex learning tasks, often with astonishing success. Computers have now mastered a popular variant of poker, learned the laws of physics from experimental data, and become experts in video games - tasks that would have been deemed impossible not too long ago. In parallel, the number of companies centered on applying complex data analysis to varying industries has exploded, and it is thus unsurprising that some analytic companies are turning attention to problems in health care. The purpose of this review is to explore what problems in medicine might benefit from such learning approaches and use examples from the literature to introduce basic concepts in machine learning. It is important to note that seemingly large enough medical data sets and adequate learning algorithms have been available for many decades, and yet, although there are thousands of papers applying machine learning algorithms to medical data, very few have contributed meaningfully to clinical care. This lack of impact stands in stark contrast to the enormous relevance of machine learning to many other industries. Thus, part of my effort will be to identify what obstacles there may be to changing the practice of medicine through statistical learning approaches, and discuss how these might be overcome.",132,20,1920,1930,Medicine;Medical physics;Medical education;Intensive care medicine,US,"Breiman L, 2001, MACHINE LEARNING;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Friedman J, 2001, THE ANNALS OF STATISTICS;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Breiman L, 1996, MACHINE LEARNING;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Breiman L, 1996, MACHINE LEARNING;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Lee D, 1999, NATURE;GuyonIsabelle, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Bengio Y, 2009, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Lip G, 2009, CHEST JOURNAL;Vapnik V, 1999, IEEE TRANSACTIONS ON NEURAL NETWORKS;Bengio Y, 2009, NOW PUBLISHERS, INC. EBOOKS;Olshausen B, 1997, VISION RESEARCH;Pitt B, 2014, NEW ENGLAND JOURNAL OF MEDICINE;Ishwaran H, 2008, THE ANNALS OF APPLIED STATISTICS;Lee H, 2007, THE MIT PRESS EBOOKS;D’Agostino R, 2001, JAMA;Woodruff P, 2009, AMERICAN JOURNAL OF RESPIRATORY AND CRITICAL CARE MEDICINE;Le Q, 2013, ;Corren J, 2011, NEW ENGLAND JOURNAL OF MEDICINE;Ishwaran H, 2008, ;O’Mahony C, 2013, EUROPEAN HEART JOURNAL;James M, 2012, PHYSIOLOGICAL REVIEWS;Shah S, 2014, CIRCULATION;Beck A, 2011, SCIENCE TRANSLATIONAL MEDICINE;Koren Y, 2009, ;Peña‐Castillo L, 2008, GENOME BIOLOGY;Abu‐Mostafa Y, 2012, ;Hsich E, 2010, CIRCULATION CARDIOVASCULAR QUALITY AND OUTCOMES;Cheng W, 2013, SCIENCE TRANSLATIONAL MEDICINE;Margolin A, 2013, SCIENCE TRANSLATIONAL MEDICINE;Udelson J, 2011, CIRCULATION;Kannel W, 1975, CIRCULATION;Cheng W, 2013, PLOS COMPUTATIONAL BIOLOGY;Gorodeski E, 2011, CIRCULATION CARDIOVASCULAR QUALITY AND OUTCOMES;Deo R, 2014, GENOME BIOLOGY;Yogatama D, 2014, ARXIV (CORNELL UNIVERSITY);Ausiello D, 2014, PUBMED",,,OPENALEX,"Deo R, 2015, CIRCULATION","Deo R, 2015, CIRCULATION" +https://openalex.org/W3135028703,10.1007/s42979-021-00592-x,"Machine Learning: Algorithms, Real-World Applications and Research Directions",2021,en,review,5114,SN COMPUTER SCIENCE,SN Computer Science,Iqbal H. Sarker,Iqbal H. Sarker,"Department of Computer Science and Engineering, Chittagong University of Engineering & Technology, 4349 Chattogram, Bangladesh;Swinburne University of Technology, Melbourne, VIC 3122 Australia;Swinburne University of Technology, Melbourne, VIC, 3122, Australia","Iqbal H. Sarker (corresponding author), Department of Computer Science and Engineering, Chittagong University of Engineering & Technology, 4349 Chattogram, Bangladesh; Swinburne University of Technology, Melbourne, VIC 3122 Australia; Swinburne University of Technology, Melbourne, VIC, 3122, Australia",,2,3,160,160,Computer science;Artificial intelligence;Machine learning;Key (lock);Big data;Data science;Data mining;Computer security,BD;AU,"LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Szegedy C, 2015, ;Witten I, 2011, ELSEVIER EBOOKS;Gordon A, 1984, BIOMETRICS;Cios K, 1997, NEUROCOMPUTING;Ester M, 1996, ;He K, 2015, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Agrawal R, 1998, ;Hotelling H, 1933, JOURNAL OF EDUCATIONAL PSYCHOLOGY;Kaelbling L, 1996, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Hazeghi K, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Kohonen T, 1990, PROCEEDINGS OF THE IEEE;Freund Y, 1996, ;Han J, 2000, ACM SIGMOD RECORD;Zanella A, 2014, IEEE INTERNET OF THINGS JOURNAL;Dey A, 2001, PERSONAL AND UBIQUITOUS COMPUTING;Tavallaee M, 2009, ;Hinton G, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Agrawal R, 1998, ;Park H, 2008, EXPERT SYSTEMS WITH APPLICATIONS;Xu D, 2015, ANNALS OF DATA SCIENCE;Zaki M, 2000, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Cessie S, 1992, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);Amit Y, 1997, NEURAL COMPUTATION;Wang W, 1997, ;, 1993, CHOICE REVIEWS ONLINE;Sneath P, 1957, MICROBIOLOGY;MTW, 1999, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Witten I, 1999, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Bishop C, 2007, ;Houtsma M, 2002, ;Agrawal R, 1998, KNOWLEDGE DISCOVERY AND DATA MINING;Rokach L, 2009, ;Zheng Y, 2015, ;Scheffer T, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Zhu H, 2014, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Zhao Q, 2003, PALLIATIVE & SUPPORTIVE CARE;Flach P, 2001, MACHINE LEARNING;Cobuloglu H, 2015, EXPERT SYSTEMS WITH APPLICATIONS;McCallum A, 2005, QUEUE;Das A, 2001, ;Phithakkitnukoon S, 2011, ACM TRANSACTIONS ON AUTONOMOUS AND ADAPTIVE SYSTEMS;Zhu H, 2012, ;Zulkernain S, 2010, E-PUBLICATIONS@MARQUETTE (MARQUETTE UNIVERSITY);Amorim R, 2012, ;Sheng S, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Bell A, 2005, QUEUE;Zulkernain S, 2010, LECTURE NOTES OF THE INSTITUTE FOR COMPUTER SCIENCES, SOCIAL INFORMATICS AND TELECOMMUNICATIONS ENGINEERING;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);John G, 2013, ARXIV (CORNELL UNIVERSITY);He K, 2016, ;Breiman L, 2001, MACHINE LEARNING;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Han J, 2012, CHOICE REVIEWS ONLINE;MacQueen J, 1967, DEFENSE TECHNICAL INFORMATION CENTER (DTIC);Chollet F, 2017, ;Silver D, 2016, NATURE;Quinlan J, 1986, MACHINE LEARNING;Agrawal R, 1993, ;Pearson K, 1901, THE LONDON EDINBURGH AND DUBLIN PHILOSOPHICAL MAGAZINE AND JOURNAL OF SCIENCE;Goodfellow I, 2016, MIT PRESS EBOOKS;Weiss K, 2016, JOURNAL OF BIG DATA;Wu X, 2007, KNOWLEDGE AND INFORMATION SYSTEMS;Aha D, 1991, MACHINE LEARNING;Ankerst M, 1999, ACM SIGMOD RECORD;Moustafa N, 2015, ;Fukunaga K, 1975, IEEE TRANSACTIONS ON INFORMATION THEORY;Eagle N, 2005, PERSONAL AND UBIQUITOUS COMPUTING;Carpenter G, 1987, COMPUTER VISION GRAPHICS AND IMAGE PROCESSING;Wagstaff K, 2001, ;Keerthi S, 2001, NEURAL COMPUTATION;Holte R, 1993, MACHINE LEARNING;Kamble S, 2018, PROCESS SAFETY AND ENVIRONMENTAL PROTECTION;Ravi K, 2015, KNOWLEDGE-BASED SYSTEMS;Xin Y, 2018, IEEE ACCESS;Rasmussen C, 1999, ;Mahdavinejad M, 2017, DIGITAL COMMUNICATIONS AND NETWORKS;Fatima M, 2017, JOURNAL OF INTELLIGENT LEARNING SYSTEMS AND APPLICATIONS;Ibáñez J, 2018, SENSORS;Liu H, 1998, ;Polydoros A, 2017, JOURNAL OF INTELLIGENT & ROBOTIC SYSTEMS;Ślusarczyk B, 2018, POLISH JOURNAL OF MANAGEMENT STUDIES;Ismail A, 2019, JOURNAL OF BIG DATA;López G, 2017, ADVANCES IN INTELLIGENT SYSTEMS AND COMPUTING;Lade P, 2017, IEEE INTELLIGENT SYSTEMS;Mohammed M, 2016, ;Balducci F, 2018, MACHINES;Nilashi M, 2017, COMPUTERS & CHEMICAL ENGINEERING;Safdar S, 2017, ARTIFICIAL INTELLIGENCE REVIEW;Srinivasan V, 2014, ;Mehrotra A, 2016, ;Sarker I, 2017, THE COMPUTER JOURNAL;Perveen S, 2018, IEEE ACCESS;Adnan N, 2017, WORLD JOURNAL OF SCIENCE TECHNOLOGY AND SUSTAINABLE DEVELOPMENT;Cai J, 2003, ;Essien A, 2019, ;Wu C, 2016, APPLIED INTELLIGENCE;Sarker I, 2016, ;Cao L, 2020, ARXIV (CORNELL UNIVERSITY);Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Breiman L, 1996, MACHINE LEARNING;Breiman L, 1996, MACHINE LEARNING;Quinlan J, 1986, MACHINE LEARNING;, 1991, CHOICE REVIEWS ONLINE;Agrawal R, 1993, ACM SIGMOD RECORD;Saeid M, 2017, VIEW;Kamble S, 2019, INTERNATIONAL JOURNAL OF PRODUCTION ECONOMICS;Oh Y, 2020, PUBMED;Lalmuanawma S, 2020, CHAOS SOLITONS & FRACTALS;Sharma R, 2020, COMPUTERS & OPERATIONS RESEARCH;Sarker I, 2020, JOURNAL OF BIG DATA;Agrawal R, 1998, ACM SIGMOD RECORD;Harmon S, 2020, NATURE COMMUNICATIONS;Jamshidi M, 2020, IEEE ACCESS;Sarker I, 2021, SN COMPUTER SCIENCE;Fujiyoshi H, 2019, IATSS RESEARCH;Kalan A, 2020, NATURE COMMUNICATIONS;Zheng T, 2016, INTERNATIONAL JOURNAL OF MEDICAL INFORMATICS;Ardabili S, 2020, ALGORITHMS;Alakuş T, 2020, CHAOS SOLITONS & FRACTALS;Mohamadou Y, 2020, APPLIED INTELLIGENCE;Shorten C, 2021, JOURNAL OF BIG DATA;Sarker I, 2020, SYMMETRY;Boukerche A, 2020, COMPUTER NETWORKS;Sarker I, 2019, JOURNAL OF BIG DATA;Sarker I, 2021, SN COMPUTER SCIENCE;Tao J, 2020, JOURNAL OF BIG DATA;Sarker I, 2020, MOBILE NETWORKS AND APPLICATIONS;Essien A, 2020, WORLD WIDE WEB;Kushwaha S, 2020, JOURNAL OF INDUSTRIAL INTEGRATION AND MANAGEMENT;Wei P, 2019, IEEE ACCESS;Sarker I, 2019, MOBILE NETWORKS AND APPLICATIONS;Sarker I, 2019, INTERNET OF THINGS;Sarker I, 2019, JOURNAL OF BIG DATA;Tsagkias M, 2020, ACM SIGIR FORUM;Marchand A, 2020, JOURNAL OF RETAILING;Sarker I, 2020, JOURNAL OF NETWORK AND COMPUTER APPLICATIONS;Sarker I, 2020, SYMMETRY;Scheffer T, 2005, INTELLIGENT DATA ANALYSIS;Sarker I, 2019, INTERNET OF THINGS;Sarker I, 2020, JOURNAL OF BIG DATA;Khadse V, 2018, ;Zi-kang H, 2020, ;Sarker I, 2021, PREPRINTS.ORG;Sarker I, 2021, PREPRINTS.ORG;Sarle W, 1991, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Aha D, 1991, MACHINE LEARNING;Baxter L, 1995, TECHNOMETRICS;Mohammed M, 2016, ",,,OPENALEX,"Sarker I, 2021, SN COMPUTER SCIENCE","Sarker I, 2021, SN COMPUTER SCIENCE" +https://openalex.org/W2750384547,10.48550/arxiv.1708.07747,Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms,2017,en,preprint,6089,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),"Xiao, Han;Kashif Rasul;Roland Vollgraf","Xiao, Han;Rasul, Kashif;Vollgraf, Roland",,,"We present Fashion-MNIST, a new dataset comprising of 28x28 grayscale images of 70,000 fashion products from 10 categories, with 7,000 images per category. The training set has 60,000 images and the test set has 10,000 images. Fashion-MNIST is intended to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking machine learning algorithms, as it shares the same image size, data format and the structure of training and testing splits. The dataset is freely available at https://github.com/zalandoresearch/fashion-mnist",,,,,MNIST database;Benchmarking;Artificial intelligence;Grayscale;Computer science;Set (abstract data type);Test set;Pattern recognition (psychology);Image (mathematics);Machine learning;Algorithm;Deep learning,,"LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Wan L, 2013, INTERNATIONAL REVIEW OF CYTOLOGY;Cohen G, 2017, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Xiao H, 2017, ARXIV (CORNELL UNIVERSITY)","Xiao H, 2017, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W2560674852,10.1145/1390156,Proceedings of the 25th international conference on Machine learning - ICML '08,2008,en,paratext,6716,,,,,,,,,,,,Computer science;Artificial intelligence,,,,,OPENALEX,"NA, 2008, ","NA, 2008, " +https://openalex.org/W1873332500,,Supervised Machine Learning: A Review of Classification Techniques,2007,en,review,4147,,,Sotiris Kotsiantis,Sotiris Kotsiantis,,"Sotiris Kotsiantis (corresponding author), ","The goal of supervised learning is to build a concise model of the distribution of class labels in terms of predictor features. The resulting classifier is then used to assign class labels to the testing instances where the values of the predictor features are known, but the value of the class label is unknown. This paper describes various supervised machine learning classification techniques. Of course, a single chapter cannot be a complete review of all supervised machine learning classification algorithms (also known induction classification algorithms), yet we hope that the references cited will cover the major theoretical issues, guiding the researcher in interesting research directions and suggesting possible bias combinations that have yet to be explored.",160,3,249,24,Machine learning;Artificial intelligence;Computer science;Classifier (UML);Supervised learning;One-class classification;Class (philosophy);Semi-supervised learning;Artificial neural network,GR,"Witten I, 2011, ELSEVIER EBOOKS;Gordon A, 1984, BIOMETRICS;Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Jain A, 1999, ACM COMPUTING SURVEYS;Sutton R, 1998, MIT PRESS EBOOKS;Friedman N, 1997, MACHINE LEARNING;Cohen W, 1995, ELSEVIER EBOOKS;Kononenko I, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Dietterich T, 2000, MACHINE LEARNING;Jordan M, 1998, ;Frank E, 1998, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Freund Y, 1998, ;Murthy S, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Batista G, 2003, APPLIED ARTIFICIAL INTELLIGENCE;Cheng J, 2002, ARTIFICIAL INTELLIGENCE;Wettschereck D, 1997, ARTIFICIAL INTELLIGENCE REVIEW;Veropoulos K, 1999, ;Crammer K, 2002, MACHINE LEARNING;Genton M, 2002, ;Fürnkranz J, 1999, ARTIFICIAL INTELLIGENCE REVIEW;Cestnik B, 1990, EUROPEAN CONFERENCE ON ARTIFICIAL INTELLIGENCE;Cestnik B, 1987, MEDICAL ENTOMOLOGY AND ZOOLOGY;Gehrke J, 2000, DATA MINING AND KNOWLEDGE DISCOVERY;Breslow L, 1997, THE KNOWLEDGE ENGINEERING REVIEW;Kononenko I, 1991, LECTURE NOTES IN COMPUTER SCIENCE;Cheng J, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Smyth P, 1991, ;F�rnkranz J, 2005, MACHINE LEARNING;Blum A, 1997, MACHINE LEARNING;Setiono R, 2000, APPLIED INTELLIGENCE;Elomaa T, 1999, MACHINE LEARNING;Rastogi R, 2000, DATA MINING AND KNOWLEDGE DISCOVERY;Siddique M, 2002, ;Yang Y, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Mántaras R, 1998, DATA & KNOWLEDGE ENGINEERING;Bonarini A, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Csiszár I, 1996, ;An A, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Wall R, 2003, ARTIFICIAL INTELLIGENCE IN MEDICINE;Brůha I, 2000, INTELLIGENT DATA ANALYSIS;Elomaa T, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Fürnkranz J, 2001, ;Zheng Z, 2000, MACHINE LEARNING;An A, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Lindgren T, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Kubát M, 2001, INTELLIGENT DATA ANALYSIS;Baik S, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Vivarelli F, 2001, NEURAL NETWORKS;Cowell R, 2013, ARXIV (CORNELL UNIVERSITY);Vapnik V, 1995, ;Quinlan J, 1992, ;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Dietterich T, 1998, NEURAL COMPUTATION;, 2000, APPLIED PHYSICS LETTERS;Hodge J, 2004, ARTIFICIAL INTELLIGENCE REVIEW;Domingos P, 1997, MACHINE LEARNING;Mika S, 2003, ;Friedman J, 1989, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Guo G, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Littlestone N, 1994, INFORMATION AND COMPUTATION;Clark P, 1989, MACHINE LEARNING;Yu L, 2004, ;Zhang G, 2000, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Wilson D, 2000, MACHINE LEARNING;Lim T, 2000, MACHINE LEARNING;Vilalta R, 2002, ARTIFICIAL INTELLIGENCE REVIEW;Nadeau C, 2003, MACHINE LEARNING;Friedman N, 2003, MACHINE LEARNING;Hunt E, 1966, MEDICAL ENTOMOLOGY AND ZOOLOGY;Weigend A, 1990, ;Brighton H, 2002, DATA MINING AND KNOWLEDGE DISCOVERY;Zhang S, 2003, APPLIED ARTIFICIAL INTELLIGENCE;, 2000, APPLIED PHYSICS LETTERS;Brazdil P, 2003, MACHINE LEARNING;Ruggieri S, 2002, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Utgoff P, 1997, MACHINE LEARNING;Keerthi S, 2002, MACHINE LEARNING;Kononenko I, 1993, APPLIED ARTIFICIAL INTELLIGENCE;Castellano G, 1997, IEEE TRANSACTIONS ON NEURAL NETWORKS;Heckerman D, 2006, STUDIES IN FUZZINESS AND SOFT COMPUTING;Parekh R, 2000, IEEE TRANSACTIONS ON NEURAL NETWORKS;Bouckaert R, 2003, ;Reinartz T, 2002, DATA MINING AND KNOWLEDGE DISCOVERY;Fürnkranz J, 1997, MACHINE LEARNING;Yam J, 2001, IEEE TRANSACTIONS ON NEURAL NETWORKS;Dutton D, 1997, THE KNOWLEDGE ENGINEERING REVIEW;Kalousis A, 2004, MACHINE LEARNING;McSherry D, 1999, KNOWLEDGE-BASED SYSTEMS;Zhou Z, 2004, JOURNAL OF COMPUTER SCIENCE AND TECHNOLOGY;Camargo L, 2001, NEURAL COMPUTATION;Yen G, 2002, ;Yıldız O, 2007, PATTERN RECOGNITION LETTERS;Roy A, 2000, IEEE TRANSACTIONS ON FUZZY SYSTEMS;Basak J, 2004, NEURAL COMPUTATION;Kon M, 2000, NEURAL NETWORKS;Okamoto S, 2003, THEORETICAL COMPUTER SCIENCE;Zheng Z, 1998, KNOWLEDGE-BASED SYSTEMS;Sánchez J, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Breiman L, 1996, MACHINE LEARNING;Mullin A, 1963, AMERICAN MATHEMATICAL MONTHLY;M. P, 1950, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);, 2001, STUDIES IN FUZZINESS AND SOFT COMPUTING;Bouckaert R, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Gama J, 1999, INTELLIGENT DATA ANALYSIS;Flach P, 2000, ;Kivinen J, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Rumelhart D, 1985, ;, 2019, ;Aha D, 1997, ",,,OPENALEX,"Kotsiantis S, 2007, ","Kotsiantis S, 2007, " +https://openalex.org/W4236362309,10.1017/cbo9781107298019,Understanding Machine Learning,2014,en,book,2923,CAMBRIDGE UNIVERSITY PRESS EBOOKS,Cambridge University Press eBooks,Shai Shalev‐Shwartz;Shai Ben-David,Shai Shalev-Shwartz;Shai Ben-David,"Hebrew University of Jerusalem;University of Waterloo, Ontario",,"Machine learning is one of the fastest growing areas of computer science, with far-reaching applications. The aim of this textbook is to introduce machine learning, and the algorithmic paradigms it offers, in a principled way. The book provides a theoretical account of the fundamentals underlying machine learning and the mathematical derivations that transform these principles into practical algorithms. Following a presentation of the basics, the book covers a wide array of central topics unaddressed by previous textbooks. These include a discussion of the computational complexity of learning and the concepts of convexity and stability; important algorithmic paradigms including stochastic gradient descent, neural networks, and structured output learning; and emerging theoretical concepts such as the PAC-Bayes approach and compression-based bounds. Designed for advanced undergraduates or beginning graduates, the text makes the fundamentals and algorithms of machine learning accessible to students and non-expert readers in statistics, computer science, mathematics and engineering.",,,,,Computer science;Artificial intelligence;Stability (learning theory);Algorithmic learning theory;Computational learning theory;Machine learning;Presentation (obstetrics);Stochastic gradient descent;Convexity;Online machine learning;Artificial neural network,IL;CA,,,,OPENALEX,"Shalev‐Shwartz S, 2014, CAMBRIDGE UNIVERSITY PRESS EBOOKS","Shalev‐Shwartz S, 2014, CAMBRIDGE UNIVERSITY PRESS EBOOKS" +https://openalex.org/W3198350258,10.1147/rd.33.0210,Some Studies in Machine Learning Using the Game of Checkers,1959,en,article,4352,IBM JOURNAL OF RESEARCH AND DEVELOPMENT,IBM Journal of Research and Development,Arthur L. Samuel,A. L. Samuel,,"A. L. Samuel (corresponding author), ","Two machine-learning procedures have been investigated in some detail using the game of checkers. Enough work has been done to verify the fact that a computer can be programmed so that it will learn to play a better game of checkers than can be played by the person who wrote the program. Furthermore, it can learn to do this in a remarkably short period of time (8 or 10 hours of machine-playing time) when given only the rules of the game, a sense of direction, and a redundant and incomplete list of parameters which are thought to have something to do with the game, but whose correct signs and relative weights are unknown and unspecified. The principles of machine learning verified by these experiments are, of course, applicable to many other situations.",3,3,210,229,Computer science;Artificial intelligence;Machine learning,,,,,OPENALEX,"Samuel A, 1959, IBM JOURNAL OF RESEARCH AND DEVELOPMENT","Samuel A, 1959, IBM JOURNAL OF RESEARCH AND DEVELOPMENT" +https://openalex.org/W2885770726,10.3390/s18082674,Machine Learning in Agriculture: A Review,2018,en,review,3036,SENSORS,Sensors,Κωνσταντίνος Λιάκος;Patrizia Busato;Dimitrios Moshou;Simon Pearson;Dionysis Bochtis,Konstantinos Liakos;Patrizia Busato;Dimitrios Moshou;Simon Pearson;Dionysis Bochtis,"Institute for Bio-Economy and Agri-Technology (IBO), Centre of Research and Technology—Hellas (CERTH), 6th km Charilaou-Thermi Rd, GR 57001 Thessaloniki, Greece;Department of Agriculture, Forestry and Food Sciences (DISAFA), Faculty of Agriculture, University of Turin, Largo Braccini 2, 10095 Grugliasco, Italy;Agricultural Engineering Laboratory, Faculty of Agriculture, Aristotle University of Thessaloniki, 54124 Thessaloniki, Greece;Institute for Bio-Economy and Agri-Technology (IBO), Centre of Research and Technology—Hellas (CERTH), 6th km Charilaou-Thermi Rd, GR 57001 Thessaloniki, Greece;Lincoln Institute for Agri-food Technology (LIAT), University of Lincoln, Brayford Way, Brayford Pool, Lincoln LN6 7TS, UK;Institute for Bio-Economy and Agri-Technology (IBO), Centre of Research and Technology—Hellas (CERTH), 6th km Charilaou-Thermi Rd, GR 57001 Thessaloniki, Greece","Dionysis Bochtis (corresponding author), Institute for Bio-Economy and Agri-Technology (IBO), Centre of Research and Technology—Hellas (CERTH), 6th km Charilaou-Thermi Rd, GR 57001 Thessaloniki, Greece","Machine learning has emerged with big data technologies and high-performance computing to create new opportunities for data intensive science in the multi-disciplinary agri-technologies domain. In this paper, we present a comprehensive review of research dedicated to applications of machine learning in agricultural production systems. The works analyzed were categorized in (a) crop management, including applications on yield prediction, disease detection, weed detection crop quality, and species recognition; (b) livestock management, including applications on animal welfare and livestock production; (c) water management; and (d) soil management. The filtering and classification of the presented articles demonstrate how agriculture will benefit from machine learning technologies. By applying machine learning to sensor data, farm management systems are evolving into real time artificial intelligence enabled programs that provide rich recommendations and insights for farmer decision support and action.",18,8,2674,2674,Agriculture;Artificial intelligence;Precision agriculture;Computer science;Machine learning;Big data;Livestock;Decision support system;Data science;Data mining,GR;IT;GB,"Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Cortes C, 1995, MACHINE LEARNING;Hopfield J, 1982, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;McCulloch W, 1943, BULLETIN OF MATHEMATICAL BIOLOGY;Jang J, 1993, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Lloyd S, 1982, IEEE TRANSACTIONS ON INFORMATION THEORY;Fisher R, 1936, ANNALS OF EUGENICS;Huang G, 2006, NEUROCOMPUTING;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Cleveland W, 1979, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Suykens J, 1999, NEURAL PROCESSING LETTERS;Kohonen T, 1990, PROCEEDINGS OF THE IEEE;Friedman J, 1991, THE ANNALS OF STATISTICS;Johnson S, 1967, PSYCHOMETRIKA;Specht D, 1991, IEEE TRANSACTIONS ON NEURAL NETWORKS;Riedmiller M, 2002, IEEE INTERNATIONAL CONFERENCE ON NEURAL NETWORKS;Suykens J, 2002, WORLD SCIENTIFIC EBOOKS;Broomhead D, 1988, COMPLEX SYSTEMS;Kong L, 2007, NUCLEIC ACIDS RESEARCH;Kass G, 1980, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);Fix E, 1989, INTERNATIONAL STATISTICAL REVIEW;Salakhutdinov R, 2009, ;Atkeson C, 1997, ARTIFICIAL INTELLIGENCE REVIEW;Samuel A, 2000, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Pal S, 1992, IEEE TRANSACTIONS ON NEURAL NETWORKS;Moshou D, 2004, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Linnainmaa S, 1976, BIT NUMERICAL MATHEMATICS;Cao J, 2012, NEURAL PROCESSING LETTERS;Galvão R, 2008, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Mackowiak S, 2015, GENOME BIOLOGY;Moshou D, 2005, REAL-TIME IMAGING;Asadi H, 2014, PLOS ONE;Melssen W, 2006, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Dutta R, 2014, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Sengupta S, 2013, BIOSYSTEMS ENGINEERING;Amatya S, 2015, BIOSYSTEMS ENGINEERING;Mohammadi K, 2015, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Thomas P, 2002, BRITISH JOURNAL OF EDUCATIONAL TECHNOLOGY;Coopersmith E, 2014, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Moshou D, 2006, PRECISION AGRICULTURE;Craninx M, 2007, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Moshou D, 2013, BIOSYSTEMS ENGINEERING;Alonso J, 2013, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Hossain M, 2011, JOURNAL OF HAND SURGERY (EUROPEAN VOLUME);Alonso J, 2014, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Gastaldo P, 2014, ROBOTICS AND AUTONOMOUS SYSTEMS;Tryon R, 1957, PSYCHOMETRIKA;Neapolitan R, 1987, APPLIED ARTIFICIAL INTELLIGENCE;Smith D, 1978, SOCIETY;Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Pearson K, 1901, THE LONDON EDINBURGH AND DUBLIN PHILOSOPHICAL MAGAZINE AND JOURNAL OF SCIENCE;Ferentinos K, 2018, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Cox D, 1958, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Barboza F, 2017, EXPERT SYSTEMS WITH APPLICATIONS;Grinblat G, 2016, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Pantazi X, 2015, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Ebrahimi M, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Morellos A, 2016, BIOSYSTEMS ENGINEERING;Zhao Y, 2017, ENERGY ECONOMICS;Hansen M, 2018, COMPUTERS IN INDUSTRY;Fang K, 2017, GEOPHYSICAL RESEARCH LETTERS;Feng Y, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Zhang B, 2017, CANCER LETTERS;Rhee J, 2017, AGRICULTURAL AND FOREST METEOROLOGY;Mehdizadeh S, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Kang J, 2015, INTERNATIONAL JOURNAL OF RADIATION ONCOLOGY*BIOLOGY*PHYSICS;Cramer S, 2017, EXPERT SYSTEMS WITH APPLICATIONS;Chung C, 2016, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Matthews S, 2017, SCIENTIFIC REPORTS;Zhou C, 2018, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Pantazi X, 2016, BIOSYSTEMS ENGINEERING;Ramos P, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Ali I, 2016, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;Bohanec M, 2016, EXPERT SYSTEMS WITH APPLICATIONS;Senthilnath J, 2016, BIOSYSTEMS ENGINEERING;Patil A, 2016, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Belson W, 1959, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);Nahvi B, 2016, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Maione C, 2015, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Wildenhain J, 2015, CELL SYSTEMS;Su Y, 2017, SAUDI JOURNAL OF BIOLOGICAL SCIENCES;Aybar-Ruíz A, 2016, SOLAR ENERGY;Maione C, 2018, CRITICAL REVIEWS IN FOOD SCIENCE AND NUTRITION;Pantazi X, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Binch A, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Pantazi X, 2017, PRECISION AGRICULTURE;Ramírez-Morales I, 2016, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Zhang M, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Johann A, 2016, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Pantazi X, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Kung H, 2016, SUSTAINABILITY;Pegorini V, 2015, SENSORS;Richardson A, 2016, CLINICAL BIOCHEMISTRY;López-Cortés X, 2016, AQUACULTURE;Takahashi K, 2017, ROBOTICS AND AUTONOMOUS SYSTEMS;Fragni R, 2018, FOOD CONTROL;Hu H, 2017, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Craninx M, 2006, PUBMED;Cortes C, 1995, MACHINE LEARNING;Breiman L, 1996, MACHINE LEARNING;, 1988, ELSEVIER EBOOKS;Hecht-Nielsen R, 1987, APPLIED OPTICS;, 1988, NEURAL NETWORKS;Strunk K, 2024, ",,,OPENALEX,"Λιάκος Κ, 2018, SENSORS","Λιάκος Κ, 2018, SENSORS" +https://openalex.org/W2535690855,10.1109/sp.2017.41,Membership Inference Attacks Against Machine Learning Models,2017,en,article,4215,,,Reza Shokri;Marco Stronati;Congzheng Song;Vitaly Shmatikov,Reza Shokri;Marco Stronati;Congzheng Song;Vitaly Shmatikov,Cornell Tech;INRIA;Cornell;Cornell Tech,,"We quantitatively investigate how machine learning models leak information about the individual data records on which they were trained. We focus on the basic membership inference attack: given a data record and black-box access to a model, determine if the record was in the model's training dataset. To perform membership inference against a target model, we make adversarial use of machine learning and train our own inference model to recognize differences in the target model's predictions on the inputs that it trained on versus the inputs that it did not train on. We empirically evaluate our inference techniques on classification models trained by commercial ""machine learning as a service"" providers such as Google and Amazon. Using realistic datasets and classification tasks, including a hospital discharge dataset whose membership is sensitive from the privacy perspective, we show that these models can be vulnerable to membership inference attacks. We then investigate the factors that influence this leakage and evaluate mitigation strategies.",,,3,18,Inference;Machine learning;Computer science;Artificial intelligence;Perspective (graphical);Focus (optics);Data modeling;Data mining;Adversarial system;Database,US;FR,"Srivastava N, 2014, ;Hastie T, 2013, ;Dwork C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Abadi M, 2016, ;Fredrikson M, 2015, ;Shokri R, 2015, ;Franklin J, 2005, THE MATHEMATICAL INTELLIGENCER;Homer N, 2008, PLOS GENETICS;Lindell Y, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Chaudhuri K, 2011, PUBMED;Bassily R, 2014, ;Fredrikson M, 2014, PUBMED;Chaudhuri K, 2008, ;Jagannathan G, 2005, ;Hardt M, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Du W, 2004, ;Zhang J, 2012, PROCEEDINGS OF THE VLDB ENDOWMENT;Dwork C, 2011, ;Yang D, 2016, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Calandrino J, 2011, ;Bos J, 2014, JOURNAL OF BIOMEDICAL INFORMATICS;Rubinstein B, 2012, JOURNAL OF PRIVACY AND CONFIDENTIALITY;Sankararaman S, 2009, NATURE GENETICS;Vaidya J, 2007, THE VLDB JOURNAL;Barni M, 2011, IEEE TRANSACTIONS ON INFORMATION FORENSICS AND SECURITY;Duchi J, 2014, JOURNAL OF THE ACM;Schneider I, 2005, THE MATHEMATICAL INTELLIGENCER;Dwork C, 2015, ;Backes M, 2016, ;Zhu J, 2001, ;Dwork C, 2010, JOURNAL OF PRIVACY AND CONFIDENTIALITY;Diakonikolas I, 2015, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Krizhevsky A, 2024, ;Hinton G, 2015, ARXIV (CORNELL UNIVERSITY);Tramèr F, 2016, ARXIV (CORNELL UNIVERSITY);Chaudhuri K, 2009, ARXIV (CORNELL UNIVERSITY);, 2016, IRIS RESEARCH PRODUCT CATALOG (SAPIENZA UNIVERSITY OF ROME);Hardt M, 2015, ARXIV (CORNELL UNIVERSITY);Xie P, 2014, ARXIV (CORNELL UNIVERSITY);Duchi J, 2012, ARXIV (CORNELL UNIVERSITY);Jain P, 2015, ARXIV (CORNELL UNIVERSITY);Kusner M, 2015, POLYPUBLIE (ÉCOLE POLYTECHNIQUE DE MONTRÉAL)",,,OPENALEX,"Shokri R, 2017, ","Shokri R, 2017, " +https://openalex.org/W1532362218,10.1007/11744023_34,Machine Learning for High-Speed Corner Detection,2006,en,book-chapter,4374,LECTURE NOTES IN COMPUTER SCIENCE,Lecture notes in computer science,Edward Rosten;Tom Drummond,Edward Rosten;Tom Drummond,"Department of Engineering, Cambridge University, UK;Department of Engineering, Cambridge University, UK",,,,,430,443,Detector;Scale-invariant feature transform;Computer science;Frame rate;Artificial intelligence;Corner detection;Frame (networking);Feature (linguistics);Computer vision;Process (computing);Speedup;Feature extraction;Pattern recognition (psychology);Image (mathematics);Telecommunications,GB,"Lowe D, 2004, INTERNATIONAL JOURNAL OF COMPUTER VISION;Quinlan J, 1986, MACHINE LEARNING;Harris C, 1988, ;Shi J, 1994, ;Haralock R, 1991, ADDISON-WESLEY LONGMAN PUBLISHING CO., INC. EBOOKS;Smith S, 1997, INTERNATIONAL JOURNAL OF COMPUTER VISION;Schmid C, 2000, INTERNATIONAL JOURNAL OF COMPUTER VISION;Mikolajczyk K, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Rosten E, 2005, ;Mikolajczyk K, 2002, ;Sklar B, 1987, ;Moravec H, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Kitchen L, 1982, PATTERN RECOGNITION LETTERS;Brown M, 2002, ;Mokhtarian F, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Schaffalitzky F, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Trajković M, 1998, IMAGE AND VISION COMPUTING;Schmid C, 2002, ;Wang H, 1995, IMAGE AND VISION COMPUTING;Zheng Z, 1999, PATTERN RECOGNITION LETTERS;Loy G, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Trucco E, 1995, AI COMMUNICATIONS;Noble J, 1988, IMAGE AND VISION COMPUTING;Rosten E, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Medioni G, 1987, COMPUTER VISION GRAPHICS AND IMAGE PROCESSING;Dias P, 2002, ;Cooper D, 1993, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Zuliani M, 2005, ;Guiducci A, 1988, PATTERN RECOGNITION LETTERS;Kenney C, 2003, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Langridge D, 1982, COMPUTER GRAPHICS AND IMAGE PROCESSING;Acharya R, 1987, COMPUTER VISION GRAPHICS AND IMAGE PROCESSING",,,OPENALEX,"Rosten E, 2006, LECTURE NOTES IN COMPUTER SCIENCE","Rosten E, 2006, LECTURE NOTES IN COMPUTER SCIENCE" +https://openalex.org/W2588003345,10.1371/journal.pone.0169748,SoilGrids250m: Global gridded soil information based on machine learning,2017,en,article,4654,PLOS ONE,PLoS ONE,Tomislav Hengl;Jorge Mendes de Jesus;G.B.M. Heuvelink;M. Ruiperez González;Milan Kilibarda;Aleksandar Blagotić;Wei Shangguan;Marvin N. Wright;Xiaoyuan Geng;Bernhard Bauer-Marschallinger;Mário Guevara;Rodrigo Vargas;R.A. MacMillan;N.H. Batjes;J.G.B. Leenaars;Eloi Ribeiro;Ichsani Wheeler;S. Mantel;Bas Kempen,Tomislav Hengl;Jorge Mendes de Jesus;Gerard B. M. Heuvelink;Maria Ruiperez Gonzalez;Milan Kilibarda;Aleksandar Blagotić;Wei Shangguan;Marvin N. Wright;Xiaoyuan Geng;Bernhard Bauer-Marschallinger;Mario Antonio Guevara;Rodrigo Vargas;Robert A. MacMillan;Niels H. Batjes;Johan G. B. Leenaars;Eloi Ribeiro;Ichsani Wheeler;Stephan Mantel;Bas Kempen,"ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;Faculty of Civil Engineering, University of Belgrade, Belgrade, Serbia;GILab Ltd, Belgrade, Serbia;School of Atmospheric Sciences, Sun Yat-sen University, Guangzhou, China;Institut für Medizinische Biometrie und Statistik, Lübeck, Germany;Agriculture and Agri-Food Canada, Ottawa (Ontario), Canada;Department of Geodesy and Geoinformation, Vienna University of Technology, Vienna, Austria;University of Delaware, Newark (DE), United States of America;University of Delaware, Newark (DE), United States of America;LandMapper Environmental Solutions Inc., Edmonton (Alberta), Canada;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;Envirometrix Inc., Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands;ISRIC - World Soil Information, Wageningen, the Netherlands;ISRIC — World Soil Information, Wageningen, the Netherlands","Tomislav Hengl (corresponding author), ISRIC - World Soil Information, Wageningen, the Netherlands; ISRIC — World Soil Information, Wageningen, the Netherlands","This paper describes the technical development and accuracy assessment of the most recent and improved version of the SoilGrids system at 250m resolution (June 2016 update). SoilGrids provides global predictions for standard numeric soil properties (organic carbon, bulk density, Cation Exchange Capacity (CEC), pH, soil texture fractions and coarse fragments) at seven standard depths (0, 5, 15, 30, 60, 100 and 200 cm), in addition to predictions of depth to bedrock and distribution of soil classes based on the World Reference Base (WRB) and USDA classification systems (ca. 280 raster layers in total). Predictions were based on ca. 150,000 soil profiles used for training and a stack of 158 remote sensing-based soil covariates (primarily derived from MODIS land products, SRTM DEM derivatives, climatic images and global landform and lithology maps), which were used to fit an ensemble of machine learning methods-random forest and gradient boosting and/or multinomial logistic regression-as implemented in the R packages ranger, xgboost, nnet and caret. The results of 10-fold cross-validation show that the ensemble models explain between 56% (coarse fragments) and 83% (pH) of variation with an overall average of 61%. Improvements in the relative accuracy considering the amount of variation explained, in comparison to the previous version of SoilGrids at 1 km spatial resolution, range from 60 to 230%. Improvements can be attributed to: (1) the use of machine learning instead of linear regression, (2) to considerable investments in preparing finer resolution covariate layers and (3) to insertion of additional soil profiles. Further development of SoilGrids could include refinement of methods to incorporate input uncertainties and derivation of posterior probability distributions (per pixel), and further automation of spatial modeling so that soil maps can be generated for potentially hundreds of soil variables. Another area of future research is the development of methods for multiscale merging of SoilGrids predictions with local and/or national gridded soil products (e.g. up to 50 m spatial resolution) so that increasingly more accurate, complete and consistent global soil information can be produced. SoilGrids are available under the Open Data Base License.",12,2,e0169748,e0169748,Random forest;Landform;Soil texture;Gradient boosting;Soil science;Environmental science;Soil map;Shuttle Radar Topography Mission;Ensemble learning;Spatial variability;Standard deviation;Land cover;Computer science;Remote sensing;Artificial intelligence;Cartography;Mathematics;Land use;Digital elevation model;Statistics;Geology;Soil water,NL;RS;CN;DE;CA;AT;US,"Venables W, 2002, STATISCTICS AND COMPUTING/STATISTICS AND COMPUTING;Hijmans R, 2005, INTERNATIONAL JOURNAL OF CLIMATOLOGY;Kühn M, 2008, JOURNAL OF STATISTICAL SOFTWARE;Cohen J, 1968, PSYCHOLOGICAL BULLETIN;Kühn M, 2013, ;Shannon C, 1998, PROCEEDINGS OF THE IEEE;Sing T, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Chen J, 2014, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Rabus B, 2003, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Pendleton R, 1945, GEOGRAPHICAL REVIEW;Scharlemann J, 2014, CARBON MANAGEMENT;Meinshausen N, 2006, ;Goldewijk K, 2010, GLOBAL ECOLOGY AND BIOGEOGRAPHY;Fan Y, 2013, SCIENCE;Hengl T, 2014, PLOS ONE;Hartmann J, 2012, GEOCHEMISTRY GEOPHYSICS GEOSYSTEMS;Minasny B, 2006, COMPUTERS & GEOSCIENCES;Hall D, 2007, HYDROLOGICAL PROCESSES;Pebesma E, 2015, ;Statnikov A, 2008, BMC BIOINFORMATICS;Shangguan W, 2013, JOURNAL OF ADVANCES IN MODELING EARTH SYSTEMS;Sánchez P, 2009, SCIENCE;Raup B, 2006, GLOBAL AND PLANETARY CHANGE;Brus D, 2011, EUROPEAN JOURNAL OF SOIL SCIENCE;Carroll M, 2009, INTERNATIONAL JOURNAL OF DIGITAL EARTH;Arrouays D, 2014, ADVANCES IN AGRONOMY;Hugelius G, 2013, EARTH SYSTEM SCIENCE DATA;Stockmann U, 2015, GLOBAL FOOD SECURITY;Grunwald S, 2011, SOIL SCIENCE SOCIETY OF AMERICA JOURNAL;Batjes N, 2009, SOIL USE AND MANAGEMENT;Savtchenko A, 2004, ADVANCES IN SPACE RESEARCH;Wan Z, 2006, INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH;Kilibarda M, 2014, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Fitzpatrick M, 2013, ECOSPHERE;Borda M, 2011, ;Malone B, 2014, GEODERMA;Sayre R, 2014, DIGITAL ACCESS TO LIBRARIES (UNIVERSITÉ CATHOLIQUE DE LOUVAIN (UCL), L'UNIVERSITÉ DE NAMUR (UNAMUR) AND THE UNIVERSITÉ SAINT-LOUIS (USL-B));Montanarella L, 2012, CURRENT OPINION IN ENVIRONMENTAL SUSTAINABILITY;Gasch C, 2015, SPATIAL STATISTICS;Bauer-Marschallinger B, 2014, COMPUTERS & GEOSCIENCES;Griffiths R, 2015, APPLIED SOIL ECOLOGY;Shani U, 2007, WATER RESOURCES RESEARCH;Cooper M, 2005, SOIL SCIENCE SOCIETY OF AMERICA JOURNAL;Knaus J, 2009, THE R JOURNAL;Láng V, 2012, GEODERMA;Pogson M, 2011, ENVIRONMENTAL MODELLING & SOFTWARE;Kidd D, 2015, SOIL RESEARCH;Mantel S, 2007, DATA ARCHIVING AND NETWORKED SERVICES (DANS);Baade J, 2014, ;Danielson J, 2011, ANTARCTICA A KEYSTONE IN A CHANGING WORLD;Chen T, 2016, ;Shannon C, 1949, PROCEEDINGS OF THE IRE;Anonim A, 2010, ;Wright M, 2017, JOURNAL OF STATISTICAL SOFTWARE;Giri C, 2010, GLOBAL ECOLOGY AND BIOGEOGRAPHY;Conrad O, 2015, GEOSCIENTIFIC MODEL DEVELOPMENT;Krämer W, 2003, COMPUTATIONAL STATISTICS & DATA ANALYSIS;FAO, 2023, ;Kühn M, 2008, ;Hengl T, 2015, PLOS ONE;Nachtergaele F, 2012, SOCIO-ENVIRONMENTAL SYSTEMS MODELING;Conrad O, 2015, ;Rome, 1977, ;Shangguan W, 2016, JOURNAL OF ADVANCES IN MODELING EARTH SYSTEMS;Sollich P, 1995, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Pelletier J, 2015, JOURNAL OF ADVANCES IN MODELING EARTH SYSTEMS;Brevik E, 2015, GEODERMA;Folberth C, 2016, NATURE COMMUNICATIONS;Domisch S, 2015, SCIENTIFIC DATA;Mulder V, 2015, GEODERMA;Krasilnikov P, 2009, ;Tóth G, 2013, JOINT RESEARCH CENTRE (EUROPEAN COMMISSION);Mira M, 2015, REMOTE SENSING OF ENVIRONMENT;Wiley P, 2007, ;Shepherd K, 2014, AGRICULTURAL SYSTEMS;Aksoy E, 2016, PLOS ONE;Searle R, 2014, ;Ribeiro E, 2015, ;Leenaars J, 2013, DATA ARCHIVING AND NETWORKED SERVICES (DANS);Faison E, 2016, PLOS ONE;Ribeiro E, 2015, ;Brevik E, 2015, GEODERMA;Jagger J, 2007, ELSEVIER EBOOKS;Hijmans R, 2010, ;Wright M, 2015, ",,,OPENALEX,"Hengl T, 2017, PLOS ONE","Hengl T, 2017, PLOS ONE" +https://openalex.org/W3153990350,10.1007/s12525-021-00475-2,Machine learning and deep learning,2021,en,article,2478,ELECTRONIC MARKETS,Electronic Markets,Christian Janiesch;Patrick Zschech;Kai Heinrich,Christian Janiesch;Patrick Zschech;Kai Heinrich,"Faculty of Business Management & Economics, University of Würzburg, Sanderring 2, 97070, Würzburg, Germany;Institute of Information Systems, Friedrich-Alexander University Erlangen-Nürnberg, Lange Gasse 20, 90403, Nürnberg, Germany;Faculty of Economics and Management, Otto-von-Guericke-Universität Magdeburg, Universitätsplatz 2, 39106, Magdeburg, Germany","Christian Janiesch (corresponding author), Faculty of Business Management & Economics, University of Würzburg, Sanderring 2, 97070, Würzburg, Germany","Abstract Today, intelligent systems that offer artificial intelligence capabilities often rely on machine learning. Machine learning describes the capacity of systems to learn from problem-specific training data to automate the process of analytical model building and solve associated tasks. Deep learning is a machine learning concept based on artificial neural networks. For many applications, deep learning models outperform shallow machine learning models and traditional data analysis approaches. In this article, we summarize the fundamentals of machine learning and deep learning to generate a broader understanding of the methodical underpinning of current intelligent systems. In particular, we provide a conceptual distinction between relevant terms and concepts, explain the process of automated analytical model building through machine learning and deep learning, and discuss the challenges that arise when implementing such intelligent systems in the field of electronic markets and networked business. These naturally go beyond technological aspects and highlight issues in human-machine interaction and artificial intelligence servitization.",31,3,685,695,Deep learning;Field (mathematics);Process (computing);Underpinning;Artificial neural network;Instance-based learning;Hyper-heuristic;Computational learning theory,DE,"LeCun Y, 2015, NATURE;Lowe D, 2004, INTERNATIONAL JOURNAL OF COMPUTER VISION;Dalal N, 2005, ;Viola P, 2005, ;Schmidhuber J, 2014, NEURAL NETWORKS;Demšar J, 2006, ;Jordan M, 2015, SCIENCE;Salton G, 1988, INFORMATION PROCESSING & MANAGEMENT;Rudin C, 2019, NATURE MACHINE INTELLIGENCE;Bishop C, 2006, SPRINGER EBOOKS;Searle J, 1980, BEHAVIORAL AND BRAIN SCIENCES;Adadi A, 2018, IEEE ACCESS;Silver D, 2018, SCIENCE;Gama J, 2014, ACM COMPUTING SURVEYS;Young T, 2018, IEEE COMPUTATIONAL INTELLIGENCE MAGAZINE;Eykholt K, 2018, ;Grigorescu S, 2019, JOURNAL OF FIELD ROBOTICS;Kotsiantis S, 2006, ARTIFICIAL INTELLIGENCE REVIEW;Pouyanfar S, 2018, ACM COMPUTING SURVEYS;Shmueli G, 2011, MIS QUARTERLY;Westerlund M, 2019, TECHNOLOGY INNOVATION MANAGEMENT REVIEW;Zhang Y, 2018, NPJ COMPUTATIONAL MATERIALS;Pan Z, 2019, IEEE ACCESS;Madani A, 2018, NPJ DIGITAL MEDICINE;Chen S, 2008, MATHEMATICS AND COMPUTERS IN SIMULATION;Shrestha Y, 2020, JOURNAL OF BUSINESS RESEARCH;Ahani A, 2019, INTERNATIONAL JOURNAL OF HOSPITALITY MANAGEMENT;Goyal D, 2015, CIRP JOURNAL OF MANUFACTURING SCIENCE AND TECHNOLOGY;Paula E, 2016, ;Peters M, 2013, MACHINE LEARNING;Howard A, 2017, ;Assaf R, 2019, ;Pentland B, 2020, MIS QUARTERLY;Kühl N, 2019, ELECTRONIC MARKETS;Liu Z, 2020, ;Balaji A, 2018, PROCEDIA COMPUTER SCIENCE;Wang S, 2020, IEEE TRANSACTIONS ON SERVICES COMPUTING;Ramaswamy S, 2018, PROCEDIA COMPUTER SCIENCE;Heinrich K, 2021, DECISION SUPPORT SYSTEMS;Fuchs D, 2018, ;Fischer M, 2020, ELECTRONIC MARKETS;Selz D, 2020, ELECTRONIC MARKETS;Wu M, 2018, ;Leijnen S, 2020, PROCEEDINGS;Wanner J, 2020, JOURNAL OF THE ASSOCIATION FOR INFORMATION SYSTEMS;Duin R, 1994, PATTERN RECOGNITION LETTERS;Heinrich K, 2019, JOURNAL OF THE ASSOCIATION FOR INFORMATION SYSTEMS;Heinrich K, 2020, JOURNAL OF THE ASSOCIATION FOR INFORMATION SYSTEMS;Haselton M, 2015, ;Widmer G, 1996, MACHINE LEARNING",,,OPENALEX,"Janiesch C, 2021, ELECTRONIC MARKETS","Janiesch C, 2021, ELECTRONIC MARKETS" +https://openalex.org/W1495061682,,Correlation-based Feature Selection for Machine Learning,1998,en,article,3503,,,Mark Hall,Mark Hall,,"Mark Hall (corresponding author), ","A central problem in machine learning is identifying a representative set of features from which to construct a classification model for a particular task. This thesis addresses the problem of feature selection for machine learning through a correlation based approach. The central hypothesis is that good feature sets contain features that are highly correlated with the class, yet uncorrelated with each other. A feature evaluation formula, based on ideas from test theory, provides an operational definition of this hypothesis. CFS (Correlation based Feature Selection) is an algorithm that couples this evaluation formula with an appropriate correlation measure and a heuristic search strategy. \n \nCFS was evaluated by experiments on artificial and natural datasets. Three machine learning algorithms were used: C4.5 (a decision tree learner), IB 1 (an instance based learner), and naive Bayes. Experiments on artificial datasets showed that CFS quickly identifies and screens irrelevant, redundant, and noisy features, and identifies relevant features as long as their relevance does not strongly depend on other features. On natural domains, CFS typically eliminated well over half the features. In most cases, classification accuracy using the reduced feature set equaled or bettered accuracy using the complete feature set. Feature selection degraded machine learning performance in cases where some features were eliminated which were highly predictive of very small areas of the instance space. \n \nFurther experiments compared CFS with a wrapper - a well known approach to feature selection that employs the target learning algorithm to evaluate feature sets. In many cases CFS gave comparable results to the wrapper, and in general, outperformed the wrapper on small datasets. CFS executes many times faster than the wrapper, which allows it to scale to larger datasets. \n \nTwo methods of extending CFS to handle feature interaction are presented and experimentally evaluated. The first considers pairs of features and the second incorporates feature weights calculated by the RELIEF algorithm. Experiments on artificial domains showed that both methods were able to identify interacting features. On natural domains, the pairwise method gave more reliable results than using weights provided by RELIEF.",,,,,Artificial intelligence;Feature selection;Machine learning;Feature (linguistics);Computer science;Pattern recognition (psychology);Heuristic;Naive Bayes classifier;Relevance (law);Correlation;Set (abstract data type);Decision tree;Data mining;Support vector machine;Mathematics,,"Holland J, 1992, THE MIT PRESS EBOOKS;Pawlak Z, 1991, OUR DIGITAL LIBRARY (WARSAW UNIVERSITY OF TECHNOLOGY);Salzberg S, 1994, ;Kira K, 1992, ELSEVIER EBOOKS;Kononenko I, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Devijver P, 1982, PRENTICE-HALL INTERNATIONAL EBOOKS;Geisser S, 1975, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;John G, 1994, ELSEVIER EBOOKS;Dougherty J, 1995, ELSEVIER EBOOKS;Piateski G, 1991, MIT PRESS EBOOKS;Narendra, 1977, IEEE TRANSACTIONS ON COMPUTERS;Holmes G, 2002, ;Cleary J, 1995, ELSEVIER EBOOKS;Kohavi R, 1995, LECTURE NOTES IN COMPUTER SCIENCE;Domingos P, 1996, ;Quinlan J, 1989, INFORMATION AND COMPUTATION;Liu H, 1996, ;Langley P, 1994, ELSEVIER EBOOKS;Almuallim H, 1991, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Schapire R, 1997, QUT EPRINTS (QUEENSLAND UNIVERSITY OF TECHNOLOGY);Caruana R, 1994, ELSEVIER EBOOKS;Skalak D, 1994, ELSEVIER EBOOKS;Schaffer C, 1993, MACHINE LEARNING;Catlett J, 1991, LECTURE NOTES IN COMPUTER SCIENCE;, 1965, STUDENTS QUARTERLY JOURNAL;Sahami M, 1996, ;Kononenko I, 1991, LECTURE NOTES IN COMPUTER SCIENCE;Kohavi R, 1995, KNOWLEDGE DISCOVERY AND DATA MINING;Kononenko I, 1995, ;Kohavi R, 1995, ;Almuallim H, 1992, ;Michalski R, 1983, COMPUTER COMPACTS;Parsons T, 1986, MEDICAL ENTOMOLOGY AND ZOOLOGY;Moore A, 1994, ELSEVIER EBOOKS;Modrzejewski M, 1993, LECTURE NOTES IN COMPUTER SCIENCE;Wong A, 1987, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Pazzani M, 1996, LECTURE NOTES IN STATISTICS;Langley P, 1994, ;Singh M, 1996, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Hogarth R, 1977, ;Gamberger D, 1997, LECTURE NOTES IN COMPUTER SCIENCE;Provan G, 1996, LECTURE NOTES IN STATISTICS;Cherkauer K, 1996, ;Holmes G, 1995, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);, 1997, THE MIT PRESS EBOOKS;Cleary J, 1996, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Langley P, 1994, ;Thrun S, 1991, ;Bainbridge D, 2002, ;Quinlan J, 1992, ;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Pearl J, 1988, ;Press W, 1994, ;Quinlan J, 1986, MACHINE LEARNING;Kohavi R, 1997, ARTIFICIAL INTELLIGENCE;Freund Y, 1996, ;Rissanen J, 1978, AUTOMATICA;Wozniak P, 1991, TECHNOMETRICS;Aha D, 1991, MACHINE LEARNING;Domingos P, 1997, MACHINE LEARNING;Quinlan J, 1987, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Merz C, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY;Clark P, 1989, MACHINE LEARNING;Holte R, 1993, MACHINE LEARNING;Allen D, 1974, TECHNOMETRICS;Liu H, 2002, ;Cost S, 1993, MACHINE LEARNING;Marill T, 1963, IEEE TRANSACTIONS ON INFORMATION THEORY;Raab G, 1991, BIOMETRICS;Langley P, 1995, COMMUNICATIONS OF THE ACM;Aha D, 1992, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Friedman N, 1996, ;Salzberg S, 1991, MACHINE LEARNING;Wettschereck D, 1995, LECTURE NOTES IN COMPUTER SCIENCE;Kohavi R, 2002, ;Singer R, 1983, MYCOLOGIA;Creecy R, 1992, COMMUNICATIONS OF THE ACM;White A, 1994, MACHINE LEARNING;Kononenko I, 1991, MACHINE LEARNING;John G, 1996, ;Vafaie H, 2002, ;Domingos P, 1997, ;Hutchinson A, 1994, ;Zajonc R, 1962, HUMAN RELATIONS;Cunningham S, 1999, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);, 2021, ;Gennari J, 1988, ",,,OPENALEX,"Hall M, 1998, ","Hall M, 1998, " +https://openalex.org/W2594183968,10.1039/c7sc02664a,MoleculeNet: a benchmark for molecular machine learning,2017,en,article,2947,CHEMICAL SCIENCE,Chemical Science,Zhenqin Wu;Bharath Ramsundar;Evan N. Feinberg;Joseph Gomes;Caleb Geniesse;Aneesh Pappu;Karl Leswing;Vijay S. Pande,Zhenqin Wu;Bharath Ramsundar;Evan N. Feinberg;Joseph Gomes;Caleb Geniesse;Aneesh S. Pappu;Karl Leswing;Vijay Pande,"Department of Chemistry;Stanford;Stanford University;USA;Department of Chemistry, Stanford University, Stanford, CA 94305, USA;Department of Computer Science;Stanford;Stanford University;USA;Department of Computer Science, Stanford University, Stanford, CA 94305, USA;Program in Biophysics;Stanford;Stanford School of Medicine;USA;Program in Biophysics, Stanford School of Medicine, Stanford, CA 94305, USA;Department of Chemistry;Stanford;Stanford University;USA;Department of Chemistry, Stanford University, Stanford, CA 94305, USA;Program in Biophysics;Stanford;Stanford School of Medicine;USA;Program in Biophysics, Stanford School of Medicine, Stanford, CA 94305, USA;Department of Computer Science;Stanford;Stanford University;USA;Department of Computer Science, Stanford University, Stanford, CA 94305, USA;Schrodinger Inc;USA;Schrodinger Inc., USA;Department of Chemistry;Stanford;Stanford University;USA;Department of Chemistry, Stanford University, Stanford, CA 94305, USA","Vijay Pande (corresponding author), Department of Chemistry; Stanford; Stanford University; USA; Department of Chemistry, Stanford University, Stanford, CA 94305, USA","Molecular machine learning has been maturing rapidly over the last few years. Improved methods and the presence of larger datasets have enabled machine learning algorithms to make increasingly accurate predictions about molecular properties. However, algorithmic progress has been limited due to the lack of a standard benchmark to compare the efficacy of proposed methods; most new algorithms are benchmarked on different datasets making it challenging to gauge the quality of proposed methods. This work introduces MoleculeNet, a large scale benchmark for molecular machine learning. MoleculeNet curates multiple public datasets, establishes metrics for evaluation, and offers high quality open-source implementations of multiple previously proposed molecular featurization and learning algorithms (released as part of the DeepChem open source library). MoleculeNet benchmarks demonstrate that learnable representations are powerful tools for molecular machine learning and broadly offer the best performance. However, this result comes with caveats. Learnable representations still struggle to deal with complex tasks under data scarcity and highly imbalanced classification. For quantum mechanical and biophysical datasets, the use of physics-aware featurizations can be more important than choice of particular learning algorithm.",9,2,513,530,Benchmark (surveying);Computer science;Machine learning;Artificial intelligence;Scale (ratio);Physics;Geography,US,"Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Cortes C, 1995, MACHINE LEARNING;Friedman J, 2001, THE ANNALS OF STATISTICS;Schmidhuber J, 2014, NEURAL NETWORKS;Miller G, 1995, COMMUNICATIONS OF THE ACM;Groom C, 2016, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE CRYSTAL ENGINEERING AND MATERIALS;Rogers D, 2010, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Weininger D, 1988, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Friedman J, 2000, THE ANNALS OF STATISTICS;Behler J, 2007, PHYSICAL REVIEW LETTERS;Berman H, 2003, NATURE STRUCTURAL & MOLECULAR BIOLOGY;Bemis G, 1996, JOURNAL OF MEDICINAL CHEMISTRY;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Ramakrishnan R, 2014, SCIENTIFIC DATA;Gražulis S, 2009, JOURNAL OF APPLIED CRYSTALLOGRAPHY;Kearnes S, 2016, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Ruddigkeit L, 2012, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Kuhn M, 2015, NUCLEIC ACIDS RESEARCH;Bolton E, 2008, ANNUAL REPORTS IN COMPUTATIONAL CHEMISTRY;Ma J, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Wang R, 2004, JOURNAL OF MEDICINAL CHEMISTRY;Delaney J, 2004, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Wang R, 2005, JOURNAL OF MEDICINAL CHEMISTRY;Blum L, 2009, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Zupan J, 1999, MEDICAL ENTOMOLOGY AND ZOOLOGY;Liu Z, 2014, BIOINFORMATICS;Wang Y, 2011, NUCLEIC ACIDS RESEARCH;Montavon G, 2013, NEW JOURNAL OF PHYSICS;Lusci A, 2013, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Mobley D, 2014, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Gasteiger J, 1993, ANGEWANDTE CHEMIE INTERNATIONAL EDITION IN ENGLISH;Mitchell J, 2014, WILEY INTERDISCIPLINARY REVIEWS COMPUTATIONAL MOLECULAR SCIENCE;Rohrer S, 2009, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Durrant J, 2011, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Jain A, 2008, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Sheridan R, 2013, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Ramakrishnan R, 2015, THE JOURNAL OF CHEMICAL PHYSICS;Varnek A, 2012, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Schneider G, 1998, PROGRESS IN BIOPHYSICS AND MOLECULAR BIOLOGY;Da C, 2014, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Mobley D, 2014, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Novick P, 2013, PLOS ONE;, 1996, ELSEVIER EBOOKS;Shemetulskis N, 1996, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Baskin I, 1997, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Manyuhina O, 2007, PHYSICAL REVIEW LETTERS;Swamidass S, 2009, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Kireev D, 1995, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);Ramsundar B, 2015, ARXIV (CORNELL UNIVERSITY);Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Cohen W, 2006, ;Richard A, 2016, CHEMICAL RESEARCH IN TOXICOLOGY;Montavon G, 2013, MPG.PURE (MAX PLANCK SOCIETY);Martins I, 2012, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Gayvert K, 2016, CELL CHEMICAL BIOLOGY;Subramanian G, 2016, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Ramsundar B, 2017, JOURNAL OF CHEMICAL INFORMATION AND MODELING;McGibbon R, 2017, THE JOURNAL OF CHEMICAL PHYSICS;Gilmer J, 2017, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Wu Z, 2017, CHEMICAL SCIENCE","Wu Z, 2017, CHEMICAL SCIENCE" +https://openalex.org/W2040884411,10.1016/s0004-3702(97)00063-5,Selection of relevant features and examples in machine learning,1997,en,article,3295,ARTIFICIAL INTELLIGENCE,Artificial Intelligence,Avrim Blum;Pat Langley,Avrim L. Blum;Pat Langley,"School of Computer Science, Carnegie Mellon University Pittsburgh, PA 15213-3891, USA;Institute for the Study of Learning and Expertise, 2164 Staunton Court, Palo Alto, CA 94306, USA;Intelligent Systems Laboratory, Daimler-Benz Research and Technology Center, 1510 Page Mill Road, Palo Alto, CA 94304, USA","Avrim L. Blum (corresponding author), School of Computer Science, Carnegie Mellon University Pittsburgh, PA 15213-3891, USA",,97,1-2,245,271,Computer science;Machine learning;Artificial intelligence;Focus (optics);Key (lock);Selection (genetic algorithm);Feature selection;Work (physics);Data science;Engineering,US,"Rosenfeld A, 1976, ARTIFICIAL INTELLIGENCE;Kira K, 1992, ELSEVIER EBOOKS;Kononenko I, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Devijver P, 1982, PRENTICE-HALL INTERNATIONAL EBOOKS;Michalski R, 2013, ;Winston P, 1976, PATTERN RECOGNITION;John G, 1994, ELSEVIER EBOOKS;Angluin D, 1987, INFORMATION AND COMPUTATION;Kearns M, 1994, THE MIT PRESS EBOOKS;Meek C, 2004, ;Koller D, 1996, ;N. K, 1992, ELSEVIER EBOOKS;Kohavi R, 1995, LECTURE NOTES IN COMPUTER SCIENCE;Langley P, 1994, ELSEVIER EBOOKS;Almuallim H, 1991, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Caruana R, 1994, ELSEVIER EBOOKS;Skalak D, 1994, ELSEVIER EBOOKS;Aha D, 1996, LECTURE NOTES IN STATISTICS;Shavlik J, 1991, ;Fortnow L, 2011, ;Lewis D, 1991, SCHOLARWORKS@UMASSAMHERST (UNIVERSITY OF MASSACHUSETTS AMHERST);Shrager J, 1990, MEDICAL ENTOMOLOGY AND ZOOLOGY;Moore A, 1994, ELSEVIER EBOOKS;Fisher D, 1991, ;Matheus C, 1989, ;Norton S, 1989, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Daelemans W, 1994, ;, 1995, THE MIT PRESS EBOOKS;Langley P, 1994, ;Langley P, 1993, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Aha D, 1990, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Singh M, 1996, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Blum A, 1995, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Shen W, 1989, ;Drucker H, 1994, ELSEVIER EBOOKS;Blum A, 2002, ;Sammut C, 1998, ;Kulkarni D, 1990, NASA TECHNICAL REPORTS SERVER (NASA);Singh M, 1995, ELSEVIER EBOOKS;Kubát M, 1993, LECTURE NOTES IN COMPUTER SCIENCE;Stanfill C, 1987, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Catlett J, 1992, ELSEVIER EBOOKS;, 2007, ;Widrow B, 1960, ;Winston P, 1970, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Armstrong R, 1995, ;Gil Y, 1993, ELSEVIER EBOOKS;Scott P, 1991, ELSEVIER EBOOKS;Gross K, 1992, ;Garey M, 1979, ;Quinlan J, 1992, ;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Kohavi R, 1997, ARTIFICIAL INTELLIGENCE;Goodall C, 1988, TECHNOMETRICS;Comon P, 1994, SIGNAL PROCESSING;Schapire R, 1990, MACHINE LEARNING;Johnson D, 1974, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Littlestone N, 1994, INFORMATION AND COMPUTATION;Clark P, 1989, MACHINE LEARNING;Blumer A, 1989, JOURNAL OF THE ACM;Holte R, 1993, MACHINE LEARNING;Seung H, 1992, ;Lin L, 1992, MACHINE LEARNING;Mitchell T, 1982, ARTIFICIAL INTELLIGENCE;Littlestone N, 1988, MACHINE LEARNING;Quinlan J, 1983, MACHINE LEARNING;Blumer A, 1987, INFORMATION PROCESSING LETTERS;Sinclair A, 1989, INFORMATION AND COMPUTATION;Dyer M, 1991, JOURNAL OF THE ACM;Lewis D, 1992, ;Ziegel E, 1989, TECHNOMETRICS;Rivest R, 1993, INFORMATION AND COMPUTATION;Pagallo G, 1990, MACHINE LEARNING;Blum A, 1994, ;Cardie C, 1993, ELSEVIER EBOOKS;Kivinen J, 1995, ;Angluin D, 1993, JOURNAL OF THE ACM;Freund Y, 1990, CONFERENCE ON LEARNING THEORY;Dyer M, 1989, ;Caruana R, 1994, ;Michalski R, 1980, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Lund C, 1993, ;Drucker H, 1992, NEURAL INFORMATION PROCESSING SYSTEMS;Cesa‐Bianchi N, 1993, ;Vere S, 1975, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Bshouty N, 2002, ;Kivinen J, 1997, ARTIFICIAL INTELLIGENCE;Blum A, 1992, MACHINE LEARNING;Lovász L, 1992, ;Littlestone N, 1991, ;Knobe B, 1976, INFORMATION AND CONTROL;Freund Y, 1992, ;Dhagat A, 2002, ;Baluja S, 1997, ARTIFICIAL INTELLIGENCE;Greiner R, 1997, ARTIFICIAL INTELLIGENCE;Jackson J, 2002, ;Pazzani M, 1992, MACHINE LEARNING;Littlestone N, 1996, NEURAL INFORMATION PROCESSING SYSTEMS;Minsky M, 1969, ;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Schapire R, 1990, MACHINE LEARNING;Hunt E, 1983, JOURNAL OF MATHEMATICAL PSYCHOLOGY;Cohn D, 1996, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Clark P, 1989, MACHINE LEARNING;, 2004, ;Vovk V, 1990, CONFERENCE ON LEARNING THEORY;Brazdil P, 1993, ;Littlestone N, 1988, MACHINE LEARNING;Someren M, 1997, EUROPEAN CONFERENCE ON MACHINE LEARNING;, 1957, PROCEEDINGS OF THE IRE;Blum A, 1995, ELSEVIER EBOOKS;Lavrač N, 1995, ;Pazzani M, 1992, MACHINE LEARNING;Blum A, 1992, MACHINE LEARNING;Rumelhart D, 1985, ;, 2023, ",,,OPENALEX,"Blum A, 1997, ARTIFICIAL INTELLIGENCE","Blum A, 1997, ARTIFICIAL INTELLIGENCE" +https://openalex.org/W2594475271,10.48550/arxiv.1702.08608,Towards A Rigorous Science of Interpretable Machine Learning,2017,en,preprint,3148,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Finale Doshi‐Velez;Been Kim,"Doshi-Velez, Finale;Kim, Been",,,"As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.",,,,,Interpretability;Artificial intelligence;Machine learning;Computer science;Taxonomy (biology);Position paper,,"Silver D, 2016, NATURE;Dwork C, 2012, ;Hempel C, 1948, PHILOSOPHY OF SCIENCE;Goodman B, 2017, AI MAGAZINE;Chang J, 2009, ;A. D, 2016, ARXIV (CORNELL UNIVERSITY);Keil F, 2005, ANNUAL REVIEW OF PSYCHOLOGY;Bechtel W, 2005, STUDIES IN HISTORY AND PHILOSOPHY OF SCIENCE PART C STUDIES IN HISTORY AND PHILOSOPHY OF BIOLOGICAL AND BIOMEDICAL SCIENCES;Vanschoren J, 2014, ACM SIGKDD EXPLORATIONS NEWSLETTER;Lazar J, 2010, ;Glennan S, 2002, PHILOSOPHY OF SCIENCE;Sculley D, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Lakkaraju H, 2016, ;Williams J, 2016, ;Wang F, 2015, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Leí T, 2016, ;Kim B, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Antunes P, 2008, ACM COMPUTING SURVEYS;Doshi‐Velez F, 2015, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Otte C, 2012, STUDIES IN COMPUTATIONAL INTELLIGENCE;Kim B, 2015, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Varshney K, 2017, BIG DATA;Suissa‐Peleg A, 2016, MICROSCOPY AND MICROANALYSIS;Mnih V, 2013, ARXIV (CORNELL UNIVERSITY);Ribeiro M, 2016, ARXIV (CORNELL UNIVERSITY);Garg V, 2016, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY)","Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W2007339694,10.1109/msp.2012.2211477,The MNIST Database of Handwritten Digit Images for Machine Learning Research [Best of the Web],2012,en,article,4615,IEEE SIGNAL PROCESSING MAGAZINE,IEEE Signal Processing Magazine,Li Deng,Li Deng,"Microsoft Research, Redmond, WA, USA;[Microsoft Research,Redmond,WA,USA]","Li Deng (corresponding author), Microsoft Research, Redmond, WA, USA; [Microsoft Research,Redmond,WA,USA]","In this issue, “Best of the Web” presents the modified National Institute of Standards and Technology (MNIST) resources, consisting of a collection of handwritten digit images used extensively in optical character recognition and machine learning research.",29,6,141,142,MNIST database;Computer science;Optical character recognition;Artificial intelligence;Handwriting recognition;Numerical digit;Digit recognition;Character (mathematics);Intelligent word recognition;Deep learning;Character recognition;Pattern recognition (psychology);Speech recognition;Intelligent character recognition;Natural language processing;Image (mathematics);Feature extraction;Artificial neural network;Arithmetic;Mathematics,US,"LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Simard P, 2005, ;Jarrett K, 2009, ;Cireşan D, 2011, ;Cireşan D, 2011, ;Deng L, 2012, ;Deng L, 2011, ",,,OPENALEX,"Deng L, 2012, IEEE SIGNAL PROCESSING MAGAZINE","Deng L, 2012, IEEE SIGNAL PROCESSING MAGAZINE" +https://openalex.org/W607505555,10.1017/cbo9781107298019,Understanding Machine Learning: From Theory To Algorithms,2015,en,book,3092,,,Shai Shalev‐Shwartz;Shai Ben-David,Shai Shalev‐Shwartz;Shai Ben-David,"Hebrew University of Jerusalem ,;University of Waterloo, Ontario#TAB#",,"Machine learning is one of the fastest growing areas of computer science, with far-reaching applications. The aim of this textbook is to introduce machine learning, and the algorithmic paradigms it offers, in a principled way. The book provides an extensive theoretical account of the fundamental ideas underlying machine learning and the mathematical derivations that transform these principles into practical algorithms. Following a presentation of the basics of the field, the book covers a wide array of central topics that have not been addressed by previous textbooks. These include a discussion of the computational complexity of learning and the concepts of convexity and stability; important algorithmic paradigms including stochastic gradient descent, neural networks, and structured output learning; and emerging theoretical concepts such as the PAC-Bayes approach and compression-based bounds. Designed for an advanced undergraduate or beginning graduate course, the text makes the fundamentals and algorithms of machine learning accessible to students and non-expert readers in statistics, computer science, mathematics, and engineering.",,,,,Computer science;Artificial intelligence;Field (mathematics);Machine learning;Computational learning theory;Stability (learning theory);Algorithmic learning theory;Stochastic gradient descent;Algorithm;Presentation (obstetrics);Convexity;Online machine learning;Artificial neural network;Mathematics,IL;CA,"Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Schölkopf B, 2001, THE MIT PRESS EBOOKS;Murphy K, 2012, ;Koller D, 2009, ;LeCun Y, 1998, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Cristianini N, 2000, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Cesa‐Bianchi N, 2006, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Dietterich T, 1995, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Hiriart‐Urruty J, 1993, ;Vapnik V, 1995, MEDICAL ENTOMOLOGY AND ZOOLOGY;Vapnik V, 2006, INFORMATION SCIENCE AND STATISTICS;García J, 1966, PSYCHONOMIC SCIENCE;Shapiro A, 2009, ;Kearns M, 1994, THE MIT PRESS EBOOKS;Barber D, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Schölkopf B, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Айзерман М, 1964, AUTOMATION AND REMOTE CONTROL;Zaki M, 2020, CAMBRIDGE UNIVERSITY PRESS EBOOKS;, 1984, MATHEMATICS AND COMPUTERS IN SIMULATION;Anthony M, 1999, LONDON SCHOOL OF ECONOMICS AND POLITICAL SCIENCE RESEARCH ONLINE (LONDON SCHOOL OF ECONOMICS AND POLITICAL SCIENCE);Rokach L, 2007, SERIES IN MACHINE PERCEPTION AND ARTIFICIAL INTELLIGENCE;Freund Y, 1998, ;Hyafil L, 1976, INFORMATION PROCESSING LETTERS;Sauer N, 1972, JOURNAL OF COMBINATORIAL THEORY SERIES A;Borwein J, 2006, CMS BOOKS IN MATHEMATICS;Weston J, 1999, THE EUROPEAN SYMPOSIUM ON ARTIFICIAL NEURAL NETWORKS;E S, 2012, CHOICE REVIEWS ONLINE;Shakhnarovich G, 2005, ;Shalev‐Shwartz S, 2010, JOURNAL OF MACHINE LEARNING RESEARCH;Littlestone N, 2003, ;Koltchinskii V, 2000, BIRKHÄUSER BOSTON EBOOKS;Shalev‐Shwartz S, 2009, CONFERENCE ON LEARNING THEORY;McAllester D, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Li L, 2015, SPRINGER OPTIMIZATION AND ITS APPLICATIONS;Ian P, 1995, CHOICE REVIEWS ONLINE;Gentile C, 2003, MACHINE LEARNING;Abernethy J, 2008, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Ben-David S, 2009, ;Haussler D, 1995, JOURNAL OF COMBINATORIAL THEORY SERIES A;Bousquet O, 2002, OPENGREY (INSTITUT DE L'INFORMATION SCIENTIFIQUE ET TECHNIQUE);Klivans A, 2006, ;Warmuth M, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Pisier G, 1981, FRENCH DIGITAL MATHEMATICS LIBRARY (NUMDAM);Agarwal S, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Livni R, 2013, CONFERENCE ON LEARNING THEORY;Eugenio P, 2000, ARXIV.ORG;Shamir O, 2012, ARXIV (CORNELL UNIVERSITY);Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Boser B, 1992, ;Robbins H, 1951, THE ANNALS OF MATHEMATICAL STATISTICS;Rissanen J, 1978, AUTOMATICA;Bengio Y, 2009, NOW PUBLISHERS, INC. EBOOKS;Valiant L, 1984, ;Fisher R, 1922, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY OF LONDON SERIES A CONTAINING PAPERS OF A MATHEMATICAL OR PHYSICAL CHARACTER;Candès E, 2008, COMPTES RENDUS MATHÉMATIQUE;Schapire R, 1990, MACHINE LEARNING;Vapnik V, 2015, ;Sipser M, 1996, ACM SIGACT NEWS;Natarajan B, 1995, SIAM JOURNAL ON COMPUTING;Neumann J, 1928, MATHEMATISCHE ANNALEN;Baraniuk R, 2008, CONSTRUCTIVE APPROXIMATION;Nemirovski A, 2009, SIAM JOURNAL ON OPTIMIZATION;Phillips D, 1962, JOURNAL OF THE ACM;Littlestone N, 1994, INFORMATION AND COMPUTATION;Collins M, 2002, ;Stone C, 1977, THE ANNALS OF STATISTICS;Shalev‐Shwartz S, 2011, NOW PUBLISHERS, INC. EBOOKS;Zhang T, 2004, ;Davis G, 1997, CONSTRUCTIVE APPROXIMATION;Blumer A, 1987, INFORMATION PROCESSING LETTERS;Blum L, 1989, BULLETIN OF THE AMERICAN MATHEMATICAL SOCIETY;Haussler D, 1992, INFORMATION AND COMPUTATION;Joachims T, 2005, ;Shelah S, 1972, PACIFIC JOURNAL OF MATHEMATICS;McAllester D, 1998, ;Boucheron S, 2005, ESAIM PROBABILITY AND STATISTICS;McAllester D, 1999, ;Kearns M, 1992, ;Alon N, 1997, JOURNAL OF THE ACM;Shalev‐Shwartz S, 2008, ;Shalev‐Shwartz S, 2010, SIAM JOURNAL ON OPTIMIZATION;Rogers W, 1978, THE ANNALS OF STATISTICS;Gordon G, 1999, ;Natarajan B, 1989, MACHINE LEARNING;Dudley R, 2010, ;Warmuth M, 2006, ;Bartlett P, 1994, ;Kearns M, 1996, ;Ben-David S, 1995, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Slud E, 1977, THE ANNALS OF PROBABILITY;Shalev‐Shwartz S, 2010, MACHINE LEARNING;Dudley R, 1991, JOURNAL OF THEORETICAL PROBABILITY;Floyd S, 1989, CONFERENCE ON LEARNING THEORY;Horváth M, 1998, DISCRETE APPLIED MATHEMATICS;Sankaran J, 1993, OPERATIONS RESEARCH LETTERS;Minsky M, 1969, ;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Vapnik V, 1995, ;Cortes C, 1995, MACHINE LEARNING;Wu Y, 1999, TECHNOMETRICS;Quinlan J, 1992, ;Hinton G, 2006, NEURAL COMPUTATION;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Quinlan J, 1986, MACHINE LEARNING;Wolpert D, 1997, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Lafferty J, 2001, SCHOLARLY COMMONS (UNIVERSITY OF PENNSYLVANIA);Luxburg U, 2007, STATISTICS AND COMPUTING;Mallat S, 1993, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Schölkopf B, 1998, NEURAL COMPUTATION;Candès E, 2005, IEEE TRANSACTIONS ON INFORMATION THEORY;Collobert R, 2008, ;Hsu C, 2008, ;Georghiades A, 2001, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Rabiner L, 1986, IEEE ASSP MAGAZINE;, 2000, APPLIED PHYSICS LETTERS;Frank M, 1956, NAVAL RESEARCH LOGISTICS QUARTERLY;Rumelhart D, 1988, ELSEVIER EBOOKS;Lee H, 2009, ;Zhao P, 2006, ;Blumer A, 1989, JOURNAL OF THE ACM;Zinkevich M, 2003, ;Rissanen J, 1983, THE ANNALS OF STATISTICS;Littlestone N, 1988, MACHINE LEARNING;Taskar B, 2003, ;Bottou L, 2011, THE MIT PRESS EBOOKS;Ranzato M, 2007, ;Shalev‐Shwartz S, 2007, ;Hazan E, 2007, MACHINE LEARNING;, 2000, APPLIED PHYSICS LETTERS;Vapnik V, 1991, NEURAL INFORMATION PROCESSING SYSTEMS;Kleinberg J, 2002, ;, 2000, APPLIED PHYSICS LETTERS;Kearns M, 1999, NEURAL COMPUTATION;Collins M, 2005, COMPUTATIONAL LINGUISTICS;Bottou L, 2003, ;Shalev‐Shwartz S, 2007, ;Floyd S, 1995, MACHINE LEARNING;Weston J, 2002, ;Langford J, 2002, NEURAL INFORMATION PROCESSING SYSTEMS;Ben-David S, 2008, NEURAL INFORMATION PROCESSING SYSTEMS;Pitt L, 1988, JOURNAL OF THE ACM;Rakhlin A, 2010, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Rakhlin A, 2005, ANALYSIS AND APPLICATIONS;Ben-David S, 2000, ;Bartlett P, 2002, THEORETICAL COMPUTER SCIENCE;Rakhlin A, 2011, ARXIV (CORNELL UNIVERSITY);Breiman L, 2001, MACHINE LEARNING;Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Donoho D, 2004, ;Kuhn H, 1955, NAVAL RESEARCH LOGISTICS QUARTERLY;Pearson K, 1901, THE LONDON EDINBURGH AND DUBLIN PHILOSOPHICAL MAGAZINE AND JOURNAL OF SCIENCE;Karp R, 1972, ;Bertsekas D, 1997, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;, 2004, CHOICE REVIEWS ONLINE;Нестеров Ю, 2014, MEDICAL ENTOMOLOGY AND ZOOLOGY;Bartlett P, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Crammer K, 2002, ;Rarp K, 2012, ;Bruijn d, 1946, DATA ARCHIVING AND NETWORKED SERVICES (DANS);Tsochantaridis I, 2004, ;Bengio Y, 2007, ;Tikhonov A, 1943, PROCEEDINGS OF THE USSR ACADEMY OF SCIENCES;Ranzato M, 2012, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Bartlett P, 2002, ;Vovk V, 1990, CONFERENCE ON LEARNING THEORY;Agmon S, 1954, CANADIAN JOURNAL OF MATHEMATICS;Neumann J, 1953, PRINCETON UNIVERSITY PRESS EBOOKS;Collins M, 2000, ;Hadamard J, 1902, MEDICAL ENTOMOLOGY AND ZOOLOGY;Donoho D, 2011, IEEE TRANSACTIONS ON INFORMATION THEORY;Langford J, 2005, ;Seeger M, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Kakade S, 2008, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Mukherjee S, 2006, ADVANCES IN COMPUTATIONAL MATHEMATICS;Kearns M, 1988, MEDICAL ENTOMOLOGY AND ZOOLOGY;Cover T, 1966, ;Murata N, 1999, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Livni R, 2013, ARXIV (CORNELL UNIVERSITY);Shalev‐Shwartz S, 2010, ;, 2012, SPRINGERREFERENCE;Poon H, 2012, ARXIV (CORNELL UNIVERSITY);Daniely A, 2012, ARXIV (CORNELL UNIVERSITY);Chang H, 2009, ARXIV.ORG",,,OPENALEX,"Shalev‐Shwartz S, 2015, ","Shalev‐Shwartz S, 2015, " +https://openalex.org/W2115252128,10.5555/1577069.1755843,Dlib-ml: A Machine Learning Toolkit,2009,en,article,2924,,,Davis E. King,Davis E. King,,"Davis E. King (corresponding author), ","There are many excellent toolkits which provide support for developing machine learning software in Python, R, Matlab, and similar environments. Dlib-ml is an open source library, targeted at both engineers and research scientists, which aims to provide a similarly rich environment for developing machine learning software in the C++ language. Towards this end, dlib-ml contains an extensible linear algebra toolkit with built in BLAS support. It also houses implementations of algorithms for performing inference in Bayesian networks and kernel-based methods for classification, regression, clustering, anomaly detection, and feature ranking. To enable easy use of these tools, the entire library has been developed with contract programming, which provides complete and precise documentation as well as powerful debugging tools. Keywords: kernel-methods, svm, rvm, kernel clustering, C++, Bayesian networks 1.",10,60,1755,1758,Computer science;Python (programming language);Debugging;Machine learning;Cluster analysis;Implementation;Programming language;Artificial intelligence;Software engineering;Documentation;Software;Data mining,,"Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Platt J, 1999, ;Shalev‐Shwartz S, 2010, MATHEMATICAL PROGRAMMING;Sonnenburg S, 2006, MPG.PURE (MAX PLANCK SOCIETY);Engel Y, 2004, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Shalev‐Shwartz S, 2007, ;Tipping M, 2003, ;Collobert R, 2000, APPLIED PHYSICS LETTERS;Suttorp T, 2007, LECTURE NOTES IN COMPUTER SCIENCE",,,OPENALEX,"King D, 2009, ","King D, 2009, " +https://openalex.org/W4400134761,10.21275/art20203995,Machine Learning Algorithms - A Review,2020,en,review,2459,INTERNATIONAL JOURNAL OF SCIENCE AND RESEARCH (IJSR),International Journal of Science and Research (IJSR),Batta Mahesh,Batta Mahesh,,"Batta Mahesh (corresponding author), ","Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems use to perform a specific task without being explicitly programmed. Learning algorithms in many applications that's we make use of daily. Every time a web search engine like Google is used to search the internet, one of the reasons that work so well is because a learning algorithm thathas learned how to rank web pages.These algorithms are used for various purposes like data mining, image processing, predictive analytics, etc. to name a few.The main advantage of using machine learning is that, once an algorithm learns what to do with data, it can do its work automatically.In this paper, a brief review and future prospect of the vast applications of machine learning algorithms has been made.",9,1,381,386,Computer science;Machine learning;Artificial intelligence;Algorithm,,"Keller J, 1985, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Marsland S, 2009, ;Harrington P, 2012, ;Bkassiny M, 2012, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Coelho L, 2013, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Sutton R, 1992, ",,,OPENALEX,"Mahesh B, 2020, INTERNATIONAL JOURNAL OF SCIENCE AND RESEARCH (IJSR)","Mahesh B, 2020, INTERNATIONAL JOURNAL OF SCIENCE AND RESEARCH (IJSR)" +https://openalex.org/W2973119841,10.1146/annurev-fluid-010719-060214,Machine Learning for Fluid Mechanics,2019,en,article,2608,ANNUAL REVIEW OF FLUID MECHANICS,Annual Review of Fluid Mechanics,Steven L. Brunton;Bernd R. Noack;Petros Koumoutsakos,Steven L. Brunton;Bernd R. Noack;Petros Koumoutsakos,"Department of Mechanical Engineering, University of Washington, Seattle, Washington 98195, USA;Institut für Strömungsmechanik und Technische Akustik, Technische Universität Berlin, D-10634 Berlin, Germany;LIMSI (Laboratoire d'Informatique pour la Mécanique et les Sciences de l'Ingénieur), CNRS UPR 3251, Université Paris-Saclay, F-91403 Orsay, France;Computational Science and Engineering Laboratory, ETH Zurich, CH-8092 Zurich, Switzerland",,"The field of fluid mechanics is rapidly advancing, driven by unprecedented volumes of data from experiments, field measurements, and large-scale simulations at multiple spatiotemporal scales. Machine learning (ML) offers a wealth of techniques to extract information from data that can be translated into knowledge about the underlying fluid mechanics. Moreover, ML algorithms can augment domain knowledge and automate tasks related to flow control and optimization. This article presents an overview of past history, current developments, and emerging opportunities of ML for fluid mechanics. We outline fundamental ML methodologies and discuss their uses for understanding, modeling, optimizing, and controlling fluid flows. The strengths and limitations of these methods are addressed from the perspective of scientific inquiry that considers data as an inherent part of modeling, experiments, and simulations. ML provides a powerful information-processing framework that can augment, and possibly even transform, current lines of fluid mechanics research and industrial applications.",52,1,477,508,Fluid mechanics;Current (fluid);Field (mathematics);Perspective (graphical);Fluid dynamics;Domain (mathematical analysis),US;FR;DE;CH,"Hochreiter S, 1997, NEURAL COMPUTATION;Rumelhart D, 1986, NATURE;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Germano M, 1991, PHYSICS OF FLUIDS A FLUID DYNAMICS;Sirovich L, 1987, QUARTERLY OF APPLIED MATHEMATICS;Schmid P, 2010, JOURNAL OF FLUID MECHANICS;Dong C, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Schmidt M, 2009, SCIENCE;Freeman W, 2002, IEEE COMPUTER GRAPHICS AND APPLICATIONS;Willert C, 1991, EXPERIMENTS IN FLUIDS;Grossberg S, 1976, BIOLOGICAL CYBERNETICS;Barber D, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Grossberg S, 1988, NEURAL NETWORKS;Baldi P, 1989, NEURAL NETWORKS;Gardner E, 1988, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Bellman R, 1952, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Tesauro G, 1992, MACHINE LEARNING;Schwefel H, 1977, BIRKHÄUSER BASEL EBOOKS;Fleming P, 2002, CONTROL ENGINEERING PRACTICE;Brunton S, 2015, APPLIED MECHANICS REVIEWS;Dissanayake G, 1994, COMMUNICATIONS IN NUMERICAL METHODS IN ENGINEERING;Barber R, 2015, THE ANNALS OF STATISTICS;Amsallem D, 2012, INTERNATIONAL JOURNAL FOR NUMERICAL METHODS IN ENGINEERING;Ouellette N, 2005, EXPERIMENTS IN FLUIDS;Bewley T, 2001, JOURNAL OF FLUID MECHANICS;Rokhlin V, 2009, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Milano M, 2002, JOURNAL OF COMPUTATIONAL PHYSICS;Ling J, 2015, PHYSICS OF FLUIDS;Hansen N, 2008, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Lee C, 1997, PHYSICS OF FLUIDS;Graves A, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Kern S, 2004, NATURAL COMPUTING;González‐García R, 1998, COMPUTERS & CHEMICAL ENGINEERING;Bright I, 2013, PHYSICS OF FLUIDS;Jambunathan K, 1996, INTERNATIONAL JOURNAL OF HEAT AND MASS TRANSFER;Rico-Martı́nez R, 1992, CHEMICAL ENGINEERING COMMUNICATIONS;Gazzola M, 2014, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Faller W, 1996, PROGRESS IN AEROSPACE SCIENCES;Nair A, 2015, JOURNAL OF FLUID MECHANICS;Ostermeier A, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Bishop C, 1993, NUCLEAR INSTRUMENTS AND METHODS IN PHYSICS RESEARCH SECTION A ACCELERATORS SPECTROMETERS DETECTORS AND ASSOCIATED EQUIPMENT;Giannakoglou K, 2006, COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING;Dracopoulos D, 1997, PERSPECTIVES IN NEURAL COMPUTING;Labonté G, 1999, EXPERIMENTS IN FLUIDS;Liang D, 2003, EXPERIMENTS IN FLUIDS;Grant I, 1995, EXPERIMENTS IN FLUIDS;Bourguignon J, 2014, PHYSICS OF FLUIDS;Hamdaouı M, 2010, JOURNAL OF AIRCRAFT;Pelikán M, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Phan M, 1995, SERIES ON STABILITY, VIBRATION AND CONTROL OF SYSTEMS - SERIES B;Mnih V, 2015, NATURE;Hornik K, 1989, NEURAL NETWORKS;Hopfield J, 1982, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Silver D, 2016, NATURE;Wright J, 2009, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Yang J, 2010, IEEE TRANSACTIONS ON IMAGE PROCESSING;Brunton S, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Halko N, 2011, SIAM REVIEW;Adrian R, 1991, ANNUAL REVIEW OF FLUID MECHANICS;Hansen N, 2003, EVOLUTIONARY COMPUTATION;Sirovich L, 1987, JOURNAL OF THE OPTICAL SOCIETY OF AMERICA A;Rowley C, 2009, JOURNAL OF FLUID MECHANICS;Lagaris I, 1998, IEEE TRANSACTIONS ON NEURAL NETWORKS;Richter S, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Ling J, 2016, JOURNAL OF FLUID MECHANICS;Rudy S, 2017, SCIENCE ADVANCES;Meneveau C, 2000, ANNUAL REVIEW OF FLUID MECHANICS;Raissi M, 2017, JOURNAL OF COMPUTATIONAL PHYSICS;Mezić I, 2012, ANNUAL REVIEW OF FLUID MECHANICS;Rowley C, 2016, ANNUAL REVIEW OF FLUID MECHANICS;Meijering E, 2002, PROCEEDINGS OF THE IEEE;Parish E, 2015, JOURNAL OF COMPUTATIONAL PHYSICS;Wang J, 2017, PHYSICAL REVIEW FLUIDS;Manohar K, 2018, IEEE CONTROL SYSTEMS;Schaeffer H, 2017, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Li Q, 2017, CHAOS AN INTERDISCIPLINARY JOURNAL OF NONLINEAR SCIENCE;Wu X, 2008, JOURNAL OF FLUID MECHANICS;Ling J, 2016, JOURNAL OF COMPUTATIONAL PHYSICS;Loiseau J, 2018, JOURNAL OF FLUID MECHANICS;Williams M, 2015, JOURNAL OF COMPUTATIONAL DYNAMICS;Xiao H, 2016, JOURNAL OF COMPUTATIONAL PHYSICS;Kaiser E, 2014, JOURNAL OF FLUID MECHANICS;Perlman E, 2007, ;Colabrese S, 2017, PHYSICAL REVIEW LETTERS;Glaz B, 2010, AIAA JOURNAL;Pierret S, 1999, JOURNAL OF TURBOMACHINERY;Reddy G, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Gazzola M, 2012, JOURNAL OF FLUID MECHANICS;Büche D, 2002, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Gazzola M, 2016, JOURNAL OF FLUID MECHANICS;Perdikaris P, 2016, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Bai Z, 2014, AIAA JOURNAL;Bénard N, 2016, EXPERIMENTS IN FLUIDS;Nüske F, 2016, THE JOURNAL OF CHEMICAL PHYSICS;Semeraro O, 2017, PHYSICAL REVIEW FLUIDS;Rees W, 2015, JOURNAL OF FLUID MECHANICS;Guéniat F, 2016, THEORETICAL AND COMPUTATIONAL FLUID DYNAMICS;Tsiotras P, 2017, JOURNAL OF GUIDANCE CONTROL AND DYNAMICS;Teo C, 2002, ;Papadimitriou D, 2015, INTERNATIONAL JOURNAL FOR UNCERTAINTY QUANTIFICATION;Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Donoho D, 2006, IEEE TRANSACTIONS ON INFORMATION THEORY;Raissi M, 2018, JOURNAL OF COMPUTATIONAL PHYSICS;Taira K, 2017, AIAA JOURNAL;Duraisamy K, 2018, ANNUAL REVIEW OF FLUID MECHANICS;Lusch B, 2018, NATURE COMMUNICATIONS;Pathak J, 2018, PHYSICAL REVIEW LETTERS;Kutz J, 2016, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Brunton S, 2019, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Fukami K, 2019, JOURNAL OF FLUID MECHANICS;Rabault J, 2019, JOURNAL OF FLUID MECHANICS;Mardt A, 2017, NATURE COMMUNICATIONS;Cherkassky V, 2006, ;Verma S, 2018, ARXIV (CORNELL UNIVERSITY);Wehmeyer C, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Vlachas P, 2018, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Maulik R, 2018, JOURNAL OF FLUID MECHANICS;Xie Y, 2018, ACM TRANSACTIONS ON GRAPHICS;Kim B, 2019, COMPUTER GRAPHICS FORUM;Wan Z, 2018, PLOS ONE;Skinner S, 2017, APPLIED SOFT COMPUTING;Reddy G, 2018, NATURE;Novati G, 2017, BIOINSPIRATION & BIOMIMETICS;Lee Y, 2017, EXPERIMENTS IN FLUIDS;Colvert B, 2018, BIOINSPIRATION & BIOMIMETICS;Novati G, 2019, PHYSICAL REVIEW FLUIDS;Colabrese S, 2018, PHYSICAL REVIEW FLUIDS;Meena M, 2018, PHYSICAL REVIEW. E;Alsalman M, 2018, BIOINSPIRATION & BIOMIMETICS;Martin N, 2018, BIOINSPIRATION & BIOMIMETICS;Hou W, 2019, AIAA SCITECH 2019 FORUM",,,OPENALEX,"Brunton S, 2019, ANNUAL REVIEW OF FLUID MECHANICS","Brunton S, 2019, ANNUAL REVIEW OF FLUID MECHANICS" +https://openalex.org/W2975634117,10.1038/s41592-019-0582-9,ilastik: interactive machine learning for (bio)image analysis,2019,en,review,3608,NATURE METHODS,Nature Methods,Stuart Berg;Dominik Kutra;Thorben Kroeger;Christoph Straehle;Bernhard X. Kausler;Carsten Haubold;Martin Schiegg;Janez Aleš;Thorsten Beier;Markus Rudy;Kemal Eren;Jaime I Cervantes;Buote Xu;Fynn Beuttenmueller;Adrian Wolny;Chong Zhang;Ullrich Koethe;Fred A. Hamprecht;Anna Kreshuk,Stuart Berg;Dominik Kutra;Thorben Kroeger;Christoph N. Straehle;Bernhard X. Kausler;Carsten Haubold;Martin Schiegg;Janez Ales;Thorsten Beier;Markus Rudy;Kemal Eren;Jaime I Cervantes;Buote Xu;Fynn Beuttenmueller;Adrian Wolny;Chong Zhang;Ullrich Koethe;Fred A. Hamprecht;Anna Kreshuk,"HHMI Janelia Research Campus, Ashburn, Virginia, USA;European Molecular Biology Laboratory, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;European Molecular Biology Laboratory, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany;HCI/IWR, Heidelberg University, Heidelberg, Germany. fred.hamprecht@iwr.uni-heidelberg.de;HCI/IWR, Heidelberg University, Heidelberg, Germany;European Molecular Biology Laboratory, Heidelberg, Germany. anna.kreshuk@embl.de;HCI/IWR, Heidelberg University, Heidelberg, Germany. anna.kreshuk@embl.de;European Molecular Biology Laboratory, Heidelberg, Germany",,,16,12,1226,1232,Computer science;Workflow;Artificial intelligence;Segmentation;Machine learning;Classifier (UML);Image segmentation;Computer vision;Pattern recognition (psychology);Database,US;DE,"Breiman L, 2001, MACHINE LEARNING;Schindelin J, 2012, NATURE METHODS;Carpenter A, 2006, GENOME BIOLOGY;Arganda‐Carreras I, 2017, BIOINFORMATICS;Loy C, 2012, ;Vilariño D, 2017, JOURNAL OF APPLIED REMOTE SENSING;Erickson B, 2017, RADIOGRAPHICS;Berthold M, 2008, STUDIES IN CLASSIFICATION, DATA ANALYSIS, AND KNOWLEDGE ORGANIZATION;Sommer C, 2011, ;Linkert M, 2010, THE JOURNAL OF CELL BIOLOGY;Neumann B, 2010, NATURE;Tarca A, 2007, PLOS COMPUTATIONAL BIOLOGY;Tu Z, 2009, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Stalling D, 2005, ELSEVIER EBOOKS;Belevich I, 2016, PLOS BIOLOGY;Nixon‐Abell J, 2016, SCIENCE;Korogod N, 2015, ELIFE;Ciliberti S, 2007, PLOS COMPUTATIONAL BIOLOGY;Streichan S, 2014, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Simpson R, 2014, ;Geurts P, 2009, MOLECULAR BIOSYSTEMS;Marée R, 2016, BIOINFORMATICS;Wolff C, 2018, ELIFE;Beier T, 2017, NATURE METHODS;Raote I, 2018, ELIFE;Andres B, 2011, ;Luengo I, 2017, JOURNAL OF STRUCTURAL BIOLOGY;Sommer C, 2017, MOLECULAR BIOLOGY OF THE CELL;Maco B, 2013, PLOS ONE;Schiegg M, 2013, ;Cassani C, 2016, PLOS BIOLOGY;Hughes A, 2018, NATURE METHODS;Hilsenbeck O, 2017, BIOINFORMATICS;González-Tendero A, 2016, EUROPEAN HEART JOURNAL - CARDIOVASCULAR IMAGING;Haubold C, 2016, ADVANCES IN ANATOMY, EMBRYOLOGY AND CELL BIOLOGY;Beier T, 2015, ;Jorstad A, 2018, FRONTIERS IN NEUROANATOMY;Straehle C, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Straehle C, 2012, ;Haubold C, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);, 2013, ;, 2011, ",,,OPENALEX,"Berg S, 2019, NATURE METHODS","Berg S, 2019, NATURE METHODS" +https://openalex.org/W3045004532,10.1016/j.neucom.2020.07.061,On hyperparameter optimization of machine learning algorithms: Theory and practice,2020,en,article,3137,NEUROCOMPUTING,Neurocomputing,Li Yang;Abdallah Shami,Li Yang;Abdallah Shami,"Department of Electrical and Computer Engineering, University of Western Ontario, 1151 Richmond St, London, ON N6A 3K7, Canada;Department of Electrical and Computer Engineering, University of Western Ontario, 1151 Richmond St, London, ON N6A 3K7, Canada","Abdallah Shami (corresponding author), Department of Electrical and Computer Engineering, University of Western Ontario, 1151 Richmond St, London, ON N6A 3K7, Canada",,415,,295,316,Hyperparameter;Computer science;Machine learning;Algorithm;Artificial intelligence;Optimization algorithm;Mathematical optimization;Mathematics,CA,"W H, 1990, CHOICE REVIEWS ONLINE;Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;1959- B, 1994, CHOICE REVIEWS ONLINE;Jordan M, 2015, SCIENCE;Bergstra J, 2012, ;Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Kühn M, 2013, ;Bottou L, 2010, ;Halko N, 2011, SIAM REVIEW;Safavian S, 1991, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Shi Y, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Bergstra J, 2011, ;Moon T, 1996, IEEE SIGNAL PROCESSING MAGAZINE;Keller J, 1985, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Hoerl A, 1970, TECHNOMETRICS;Rish I, 2001, ;Hutter F, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Marshall J, 1995, NEURAL NETWORKS;Fortin F, 2012, ;Seeger M, 2004, INTERNATIONAL JOURNAL OF NEURAL SYSTEMS;Ding C, 2004, ;Bergstra J, 2015, COMPUTATIONAL SCIENCE & DISCOVERY;Bradley S, 1977, ;Zhao Y, 2005, DATA MINING AND KNOWLEDGE DISCOVERY;Rehman S, 2014, ;Kibriya A, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Rahnamayan S, 2007, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Gogna A, 2013, JOURNAL OF EXPERIMENTAL & THEORETICAL ARTIFICIAL INTELLIGENCE;Komer B, 2014, PROCEEDINGS OF THE PYTHON IN SCIENCE CONFERENCES;Kazimipour B, 2014, ;Sparks E, 2015, ;Witt C, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Howland P, 2005, PATTERN RECOGNITION;Lobo F, 2000, ;Zuo W, 2008, PATTERN ANALYSIS AND APPLICATIONS;Ding C, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Cazzaniga P, 2015, ;Lessmann S, 2005, ;Wang H, 2009, ;Bustamante C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Yang H, 1998, NEURAL COMPUTATION;Brahim-Belhouari S, 2005, IEEE SENSORS JOURNAL;Sulzmann J, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Soliman O, 2012, INTERNATIONAL CONFERENCE ON INFORMATICS AND SYSTEMS;Chen C, 1992, TECHNOMETRICS;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);Shlens J, 2014, ARXIV (CORNELL UNIVERSITY);Meng X, 2015, ARXIV (CORNELL UNIVERSITY);Maclaurin D, 2015, ARXIV (CORNELL UNIVERSITY);Claesen M, 2014, ARXIV (CORNELL UNIVERSITY);Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hosmer D, 2013, WILEY SERIES IN PROBABILITY AND STATISTICS;Drucker H, 1996, ;Caruana R, 2006, ;Cawley G, 2010, ;Rennie J, 2003, ;Xia Y, 2017, EXPERT SYSTEMS WITH APPLICATIONS;Han D, 2017, EXPERT SYSTEMS WITH APPLICATIONS;Bengio Y, 2000, NEURAL COMPUTATION;Ogutu J, 2012, BMC PROCEEDINGS;Domhan T, 2015, FREIDOK PLUS (UNIVERSITÄTSBIBLIOTHEK FREIBURG);Luo G, 2016, NETWORK MODELING ANALYSIS IN HEALTH INFORMATICS AND BIOINFORMATICS;Gambella C, 2020, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Koutsoukas A, 2017, JOURNAL OF CHEMINFORMATICS;Lorenzo P, 2017, PROCEEDINGS OF THE GENETIC AND EVOLUTIONARY COMPUTATION CONFERENCE;Karnin Z, 2013, ;Böehm M, 2016, PROCEEDINGS OF THE VLDB ENDOWMENT;Melkumova L, 2017, PROCEDIA ENGINEERING;Ilievski I, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Injadat M, 2018, ;Eggensperger K, 2015, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Francescomarino C, 2018, INFORMATION SYSTEMS;Soon F, 2018, IET INTELLIGENT TRANSPORT SYSTEMS;DeCastro‐García N, 2019, COMPLEXITY;Ozaki Y, 2017, IPSJ TRANSACTIONS ON COMPUTER VISION AND APPLICATIONS;Itano F, 2018, ;Yan X, 2017, JOURNAL OF COMPUTER SCIENCE AND TECHNOLOGY;Yang L, 2018, IEEE ACCESS;Zhang J, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Wang P, 2012, ;McGibbon R, 2016, THE JOURNAL OF OPEN SOURCE SOFTWARE;Moubayed A, 2018, ;Arjunan K, 2017, ;Hertel L, 2018, ;Loog M, 2006, ;Cheng M, 2018, JOURNAL OF CONSTRUCTION ENGINEERING AND MANAGEMENT;Zhang S, 2016, ;Abadi M, 2016, ARXIV (CORNELL UNIVERSITY);Snoek J, 2012, ARXIV (CORNELL UNIVERSITY);Li L, 2016, ARXIV (CORNELL UNIVERSITY);Yin W, 2017, ARXIV (CORNELL UNIVERSITY);Claesen M, 2015, ARXIV (CORNELL UNIVERSITY);Martínez-Cantín R, 2014, ZAGUAN (UNIVERSITY OF ZARAGOZA REPOSITORY);Yao Q, 2018, ARXIV (CORNELL UNIVERSITY);Wang J, 2018, ARXIV (CORNELL UNIVERSITY);Knudde N, 2017, ARXIV (CORNELL UNIVERSITY);Hazan E, 2017, ARXIV (CORNELL UNIVERSITY);Yang L, 2018, THE ATRIUM (UNIVERSITY OF GUELPH);Braun W, 2021, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Li L, 2017, JOURNAL OF MACHINE LEARNING RESEARCH;Olson R, 2019, ˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING;Yang L, 2019, ;Moubayed A, 2020, AMERICAN JOURNAL OF DISTANCE EDUCATION;Injadat M, 2020, KNOWLEDGE-BASED SYSTEMS;Injadat M, 2020, APPLIED INTELLIGENCE;Salo F, 2019, 2019 INTERNATIONAL CONFERENCE ON COMPUTING, NETWORKING AND COMMUNICATIONS (ICNC);Olof S, 2018, KTH PUBLICATION DATABASE DIVA (KTH ROYAL INSTITUTE OF TECHNOLOGY);Nikolaus H, 2016, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Shawi R, 2019, ARXIV (CORNELL UNIVERSITY);Head T, 2018, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Ilievski I, 2016, ARXIV (CORNELL UNIVERSITY);Sun S, 2019, ARXIV (CORNELL UNIVERSITY);Abreu S, 2019, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Yang L, 2020, NEUROCOMPUTING","Yang L, 2020, NEUROCOMPUTING" +https://openalex.org/W2937307539,10.1038/s41573-019-0024-5,Applications of machine learning in drug discovery and development,2019,en,review,2937,NATURE REVIEWS DRUG DISCOVERY,Nature Reviews Drug Discovery,Jessica Vamathevan;Dominic A. Clark;Paul Czodrowski;Ian Dunham;Edgardo A. Ferrán;George Lee;Bin Li;Anant Madabhushi;Parantu K. Shah;Michaela Spitzer;Shanrong Zhao,Jessica Vamathevan;Dominic Clark;Paul Czodrowski;Ian Dunham;Edgardo Ferran;George Lee;Bin Li;Anant Madabhushi;Parantu Shah;Michaela Spitzer;Shanrong Zhao,"European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK. jessicav@ebi.ac.uk;European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK;European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK;Technical University of Dortmund, Dortmund, Germany;Open Targets and European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK;European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK;Bristol-Myers Squibb, Princeton, NJ, USA;Takeda Pharmaceuticals International Co., Cambridge, MA, USA;Case Western Reserve University, Cleveland, OH, USA;Louis Stokes Cleveland Veterans Affair Medical Center, Cleveland, OH, USA;EMD Serono R&D Institute, Billerica, MA, USA;Open Targets and European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK;Pfizer Worldwide Research and Development, Cambridge, MA, USA","Jessica Vamathevan (corresponding author), European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK. jessicav@ebi.ac.uk; European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK",,18,6,463,477,Computer science;Machine learning;Interpretability;Drug discovery;Artificial intelligence;Pipeline (software);Identification (biology);Context (archaeology);Data science;Field (mathematics);Drug development;Process (computing);Data mining;Drug;Bioinformatics;Medicine,GB;RU;DE;US,"Srivastava N, 2014, ;Hinton G, 2006, SCIENCE;Gaulton A, 2016, NUCLEIC ACIDS RESEARCH;Keiser M, 2007, NATURE BIOTECHNOLOGY;Iorio F, 2016, CELL;Nelson M, 2015, NATURE GENETICS;Angermueller C, 2016, MOLECULAR SYSTEMS BIOLOGY;Janowczyk A, 2016, JOURNAL OF PATHOLOGY INFORMATICS;Xiong H, 2014, SCIENCE;Sotillo E, 2015, CANCER DISCOVERY;Ma J, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Zhan F, 2006, BLOOD;Shaughnessy J, 2006, BLOOD;Thorn B, 2010, NATURE BIOTECHNOLOGY;Kim E, 2011, CANCER DISCOVERY;Community N, 2014, NATURE BIOTECHNOLOGY;Mohr D, 2017, ANNUAL REVIEW OF CLINICAL PSYCHOLOGY;Beck A, 2011, SCIENCE TRANSLATIONAL MEDICINE;Pierson E, 2015, GENOME BIOLOGY;Vaquero-Garcia J, 2016, ELIFE;Koscielny G, 2016, NUCLEIC ACIDS RESEARCH;Leung M, 2014, BIOINFORMATICS;Mulligan G, 2006, BLOOD;Decaux O, 2008, JOURNAL OF CLINICAL ONCOLOGY;Bakheet T, 2009, BIOINFORMATICS;Jiao Y, 2016, QUANTITATIVE BIOLOGY;Nayal M, 2006, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Bravo Á, 2015, BMC BIOINFORMATICS;Jeon J, 2014, GENOME MEDICINE;Tan J, 2016, MSYSTEMS;Turkki R, 2016, JOURNAL OF PATHOLOGY INFORMATICS;Koscielny S, 2010, SCIENCE TRANSLATIONAL MEDICINE;Mani N, 2016, BREAST CANCER RESEARCH;Romo‐Bucheli D, 2016, SCIENTIFIC REPORTS;Riniker S, 2014, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Li Q, 2007, BMC BIOINFORMATICS;Costa P, 2010, BMC GENOMICS;Kandoi G, 2015, FRONTIERS IN PHYSIOLOGY;Lee G, 2016, EUROPEAN UROLOGY FOCUS;Zhan F, 2008, BLOOD;Kim J, 2017, SCIENTIFIC REPORTS;Ли Б, 2015, PLOS ONE;Czodrowski P, 2014, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Bunte K, 2016, BIOINFORMATICS;Gütlein M, 2016, JOURNAL OF CHEMINFORMATICS;Kumar V, 2016, SCIENTIFIC REPORTS;Hejase H, 2015, CPT PHARMACOMETRICS & SYSTEMS PHARMACOLOGY;Veltri R, 2000, JOURNAL OF CELLULAR BIOCHEMISTRY;Ramsundar B, 2015, ARXIV (CORNELL UNIVERSITY);Unterthiner T, 2015, ARXIV (CORNELL UNIVERSITY);Tsherniak A, 2017, CELL;Khera A, 2018, NATURE GENETICS;Wu Z, 2017, CHEMICAL SCIENCE;Coudray N, 2018, NATURE MEDICINE;Segler M, 2018, NATURE;Chen H, 2017, IEEE TRANSACTIONS ON MEDICAL IMAGING;Chen H, 2018, DRUG DISCOVERY TODAY;Wong C, 2017, BIOSTATISTICS;Olivecrona M, 2017, JOURNAL OF CHEMINFORMATICS;Saltz J, 2018, CELL REPORTS;Hinton G, 2018, JAMA;Bychkov D, 2018, SCIENTIFIC REPORTS;Kadurin A, 2017, MOLECULAR PHARMACEUTICS;Mayr A, 2018, CHEMICAL SCIENCE;Hutson M, 2018, SCIENCE;Rifaioğlu A, 2018, BRIEFINGS IN BIOINFORMATICS;Cruz-Roa Á, 2017, SCIENTIFIC REPORTS;Morgan P, 2018, NATURE REVIEWS DRUG DISCOVERY;Korbar B, 2017, JOURNAL OF PATHOLOGY INFORMATICS;Preuer K, 2018, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Lenselink E, 2017, JOURNAL OF CHEMINFORMATICS;Sharma H, 2017, COMPUTERIZED MEDICAL IMAGING AND GRAPHICS;Cohen O, 2018, MAGNETIC RESONANCE IN MEDICINE;Ramsundar B, 2017, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Corredor G, 2018, CLINICAL CANCER RESEARCH;Tasaki S, 2018, NATURE COMMUNICATIONS;Gkotsis G, 2017, SCIENTIFIC REPORTS;Mamoshina P, 2018, FRONTIERS IN GENETICS;Kraus V, 2018, NATURE REVIEWS RHEUMATOLOGY;Boyiadzis M, 2018, JOURNAL FOR IMMUNOTHERAPY OF CANCER;Godinez W, 2017, BIOINFORMATICS;Lu C, 2018, LABORATORY INVESTIGATION;McMillan E, 2018, CELL;Nirschl J, 2018, PLOS ONE;Ferrero E, 2017, JOURNAL OF TRANSLATIONAL MEDICINE;Olsen T, 2018, JOURNAL OF PATHOLOGY INFORMATICS;Huang C, 2017, PLOS ONE;Lu C, 2017, MODERN PATHOLOGY;Paré G, 2017, SCIENTIFIC REPORTS;Ament S, 2018, MOLECULAR SYSTEMS BIOLOGY;Rahman R, 2016, BIOINFORMATICS;Romo‐Bucheli D, 2017, CYTOMETRY PART A;Gool A, 2017, NATURE REVIEWS DRUG DISCOVERY;Rohacek A, 2017, DEVELOPMENTAL CELL;Odell S, 2017, CURRENT PLANT BIOLOGY;Wang Q, 2017, PLOS ONE;Casanova R, 2017, CANCER RESEARCH;Farimani A, 2018, BIOPHYSICAL JOURNAL;Giraldo N, 2017, CANCER RESEARCH;Rashid S, 2017, BIORXIV (COLD SPRING HARBOR LABORATORY);LeCun Y, 2015, NATURE;Esteva A, 2018, NATURE MEDICINE;Wang B, 2017, NATURE METHODS;Ding J, 2018, NATURE COMMUNICATIONS;Norgeot B, 2018, NATURE MEDICINE;Way G, 2017, ;Steele A, 2018, PLOS ONE;Wang D, 2018, GENOMICS PROTEOMICS & BIOINFORMATICS;Smith J, 2018, ACS MEDICINAL CHEMISTRY LETTERS;Jha A, 2017, BIOINFORMATICS;Rashid S, 2019, BIOINFORMATICS;Finnegan A, 2017, PLOS COMPUTATIONAL BIOLOGY;Rouillard A, 2018, PLOS COMPUTATIONAL BIOLOGY;Korbar B, 2017, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Vamathevan J, 2019, NATURE REVIEWS DRUG DISCOVERY","Vamathevan J, 2019, NATURE REVIEWS DRUG DISCOVERY" +https://openalex.org/W1541288193,10.1023/a:1022602019183,Genetic Algorithms and Machine Learning,1988,en,article,3031,MACHINE LEARNING,Machine Learning,David E. Goldberg;John H. Holland,David E. Goldberg;John H. Holland,"University of Alabama, Tuscaloosa, USA;University of Alabama, Tuscaloosa. DGOLDBER@UA1VM.BITNET#TAB#;University of Michigan, Ann Arbor, USA;University of Michigan, Ann Arbor. JHH@UM.CC.UMICH.EDU#TAB#",,,3,2-3,95,99,Mathematics;Artificial intelligence;Computer science,US,", 1989, CHOICE REVIEWS ONLINE;Holland J, 1992, THE MIT PRESS EBOOKS;Goldberg D, 1988, ;Foxman D, 1973, THE WESTERN POLITICAL QUARTERLY;Seel P, 2012, ;Edelman G, 1987, ;Holland J, 1986, THE MIT PRESS EBOOKS;Holland J, 1962, JOURNAL OF THE ACM;Grefenstette J, 2014, PSYCHOLOGY PRESS EBOOKS;, 2013, PSYCHOLOGY PRESS EBOOKS;Fourman M, 1985, INTERNATIONAL CONFERENCE ON GENETIC ALGORITHMS;Grefenstette J, 1987, ;Davis L, 1987, INTERNATIONAL CONFERENCE ON GENETIC ALGORITHMS",,,OPENALEX,"Goldberg D, 1988, MACHINE LEARNING","Goldberg D, 1988, MACHINE LEARNING" +https://openalex.org/W2161336914,10.1145/2347736.2347755,A few useful things to know about machine learning,2012,en,article,3244,COMMUNICATIONS OF THE ACM,Communications of the ACM,Pedro Domingos,Pedro Domingos,"University of Washington, Seattle","Pedro Domingos (corresponding author), University of Washington, Seattle","Tapping into the ""folk knowledge"" needed to advance machine learning applications.",55,10,78,87,Computer science;Need to know;Artificial intelligence;Machine learning;Computer security,US,"Benjamini Y, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Vapnik V, 1995, ;Witten I, 2011, ELSEVIER EBOOKS;Quinlan J, 1992, ;Tenenbaum J, 2000, SCIENCE;Pearl J, 2009, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Witten I, 2008, ;Bengio Y, 2009, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Salzberg S, 1994, ;Manyika J, 2011, ;Bengio Y, 2009, NOW PUBLISHERS, INC. EBOOKS;Hitchcock C, 2001, THE PHILOSOPHICAL REVIEW;Domingos P, 1997, MACHINE LEARNING;, 2000, CHOICE REVIEWS ONLINE;Richardson M, 2006, MACHINE LEARNING;Bauer E, 1999, MACHINE LEARNING;Bernardo J, 1994, WILEY SERIES IN PROBABILITY AND STATISTICS;Wolpert D, 1996, NEURAL COMPUTATION;Blumer A, 1987, INFORMATION PROCESSING LETTERS;Klockars A, 1986, ;Kohavi R, 2008, DATA MINING AND KNOWLEDGE DISCOVERY;Domingos P, 1999, DATA MINING AND KNOWLEDGE DISCOVERY;Domingos P, 2000, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Ng A, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Domingos P, 2000, ;Langley P, 1988, MACHINE LEARNING;Pearl J, 1978, INTERNATIONAL JOURNAL OF GENERAL SYSTEMS;Hulten G, 2002, ;Cohen W, 1994, ARTIFICIAL INTELLIGENCE;, 2007, ",,,OPENALEX,"Domingos P, 2012, COMMUNICATIONS OF THE ACM","Domingos P, 2012, COMMUNICATIONS OF THE ACM" +https://openalex.org/W2111547563,10.1016/j.csbj.2014.11.005,Machine learning applications in cancer prognosis and prediction,2014,en,review,3293,COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL,Computational and Structural Biotechnology Journal,Κωνσταντίνα Κούρου;Themis P. Exarchos;Konstantinos Exarchos;Michalis V. Karamouzis;Dimitrios I. Fotiadis,Konstantina Kourou;Themis P. Exarchos;Konstantinos P. Exarchos;Michalis V. Karamouzis;Dimitrios I. Fotiadis,"Unit of Medical Technology and Intelligent Information Systems, Dept. of Materials Science and Engineering, University of Ioannina, Ioannina, Greece;IMBB — FORTH, Dept. of Biomedical Research, Ioannina, Greece;Unit of Medical Technology and Intelligent Information Systems, Dept. of Materials Science and Engineering, University of Ioannina, Ioannina, Greece;Unit of Medical Technology and Intelligent Information Systems, Dept. of Materials Science and Engineering, University of Ioannina, Ioannina, Greece;Molecular Oncology Unit, Department of Biological Chemistry, Medical School, University of Athens, Athens, Greece;IMBB — FORTH, Dept. of Biomedical Research, Ioannina, Greece;Unit of Medical Technology and Intelligent Information Systems, Dept. of Materials Science and Engineering, University of Ioannina, Ioannina, Greece","Dimitrios I. Fotiadis (corresponding author), IMBB — FORTH, Dept. of Biomedical Research, Ioannina, Greece; Unit of Medical Technology and Intelligent Information Systems, Dept. of Materials Science and Engineering, University of Ioannina, Ioannina, Greece","Cancer has been characterized as a heterogeneous disease consisting of many different subtypes. The early diagnosis and prognosis of a cancer type have become a necessity in cancer research, as it can facilitate the subsequent clinical management of patients. The importance of classifying cancer patients into high or low risk groups has led many research teams, from the biomedical and the bioinformatics field, to study the application of machine learning (ML) methods. Therefore, these techniques have been utilized as an aim to model the progression and treatment of cancerous conditions. In addition, the ability of ML tools to detect key features from complex datasets reveals their importance. A variety of these techniques, including Artificial Neural Networks (ANNs), Bayesian Networks (BNs), Support Vector Machines (SVMs) and Decision Trees (DTs) have been widely applied in cancer research for the development of predictive models, resulting in effective and accurate decision making. Even though it is evident that the use of ML methods can improve our understanding of cancer progression, an appropriate level of validation is needed in order for these methods to be considered in the everyday clinical practice. In this work, we present a review of recent ML approaches employed in the modeling of cancer progression. The predictive models discussed here are based on various supervised ML techniques as well as on different input features and data samples. Given the growing trend on the application of ML methods in cancer research, we present here the most recent publications that employ these techniques as an aim to model cancer risk or patient outcomes.",13,,8,17,Machine learning;Artificial intelligence;Computer science;Support vector machine;Cancer;Artificial neural network;Bayesian network;Clinical Practice;Variety (cybernetics);Field (mathematics);Predictive modelling;Medicine;Mathematics,GR,"Hanahan D, 2011, CELL;Witten I, 2011, ELSEVIER EBOOKS;Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Kohavi R, 1997, ARTIFICIAL INTELLIGENCE;, 2008, ;Paik S, 2004, NEW ENGLAND JOURNAL OF MEDICINE;Kononenko I, 2001, ARTIFICIAL INTELLIGENCE IN MEDICINE;Delen D, 2004, ARTIFICIAL INTELLIGENCE IN MEDICINE;Michiels S, 2005, THE LANCET;Cruz J, 2007, PUBMED;Akay M, 2008, EXPERT SYSTEMS WITH APPLICATIONS;Ein‐Dor L, 2006, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Bach P, 2003, JNCI JOURNAL OF THE NATIONAL CANCER INSTITUTE;Dupuy A, 2007, JNCI JOURNAL OF THE NATIONAL CANCER INSTITUTE;Zen K, 2010, MEDICINAL RESEARCH REVIEWS;Heneghan H, 2010, CURRENT OPINION IN PHARMACOLOGY;Listgarten J, 2004, CLINICAL CANCER RESEARCH;Kim W, 2012, JOURNAL OF BREAST CANCER;Chang S, 2013, BMC BIOINFORMATICS;Papadopoulos A, 2004, ARTIFICIAL INTELLIGENCE IN MEDICINE;Chen Y, 2014, COMPUTERS IN BIOLOGY AND MEDICINE;Tseng C, 2013, NEURAL COMPUTING AND APPLICATIONS;Ayer T, 2010, CANCER;Park K, 2013, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Papadopoulos A, 2008, COMPUTERS IN BIOLOGY AND MEDICINE;Maclin P, 1991, JOURNAL OF MEDICAL SYSTEMS;Koscielny S, 2010, SCIENCE TRANSLATIONAL MEDICINE;Exarchos K, 2011, IEEE TRANSACTIONS ON INFORMATION TECHNOLOGY IN BIOMEDICINE;Kim J, 2013, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Domchek S, 2003, JOURNAL OF CLINICAL ONCOLOGY;Xu X, 2012, ;Hall M, 1999, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);Fortunato O, 2014, MOLECULES;Cicchetti D, 1992, CLINICAL CHEMISTRY;Waddell M, 2005, ;Gilmore S, 2010, EXPERIMENTAL DERMATOLOGY;Park C, 2014, PLOS ONE;Cochran A, 1997, PIGMENT CELL RESEARCH;Ren X, 2012, NUCLEIC ACIDS RESEARCH;Rosado P, 2013, EXPERT SYSTEMS WITH APPLICATIONS;Drier Y, 2011, PLOS ONE;Wang Y, 2012, BMC SYSTEMS BIOLOGY;Fielding L, 1992, CANCER;Exarchos K, 2012, BMC MEDICAL INFORMATICS AND DECISION MAKING;Stojadinovic A, 2011, THE AMERICAN SURGEON;Ren X, 2013, NUCLEIC ACIDS RESEARCH;́n F, 2004, EUROPEAN JOURNAL OF CANCER PREVENTION;Bochare A, 2014, INTERNATIONAL JOURNAL OF MEDICAL ENGINEERING AND INFORMATICS;Bian X, 2009, NATURE PRECEDINGS;Parthaláin N, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Parker J, 2009, JOURNAL OF CLINICAL ONCOLOGY;Platt J, 1999, EXPLORE BRISTOL RESEARCH;Barrett T, 2006, NUCLEIC ACIDS RESEARCH;Estévez P, 2009, IEEE TRANSACTIONS ON NEURAL NETWORKS;Cruz J, 2006, CANCER INFORMATICS;, 2007, APRESS EBOOKS;Ein‐Dor L, 2004, BIOINFORMATICS;Cuzick J, 2011, JOURNAL OF CLINICAL ONCOLOGY;Gevaert O, 2006, BIOINFORMATICS;LG A, 2013, JOURNAL OF HEALTH & MEDICAL INFORMATICS;Bottaci L, 1997, THE LANCET;Sun Y, 2006, BIOINFORMATICS;Polley M, 2013, JNCI JOURNAL OF THE NATIONAL CANCER INSTITUTE;Madhavan D, 2013, FRONTIERS IN GENETICS;Bilal E, 2013, PLOS COMPUTATIONAL BIOLOGY;Simes R, 1985, JOURNAL OF CHRONIC DISEASES;Niu Y, 2009, BIOINFORMATICS;Urbanowicz R, 2013, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Chuang L, 2011, ;Niknejad A, 2013, ;Agah A, 2013, ;Bian X, 2009, NATURE PRECEDINGS",,,OPENALEX,"Κούρου Κ, 2014, COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL","Κούρου Κ, 2014, COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL" +https://openalex.org/W2151591509,10.3389/fninf.2014.00014,Machine learning for neuroimaging with scikit-learn,2014,en,article,2640,FRONTIERS IN NEUROINFORMATICS,Frontiers in Neuroinformatics,Alexandre Abraham;Fabian Pedregosa;Michael Eickenberg;Philippe Gervais;Andreas Mueller;Jean Kossaifi;Alexandre Gramfort;Bertrand Thirion;Gaël Varoquaux,Alexandre Abraham;Fabian Pedregosa;Michael Eickenberg;Philippe Gervais;Andreas Mueller;Jean Kossaifi;Alexandre Gramfort;Bertrand Thirion;Gaël Varoquaux,"Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France;Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Institute of Computer Science VI, University of Bonn Bonn, Germany;Institute of Computer Science VI, University of Bonn, Bonn, Germany;Department of Computing, Imperial College London London, UK;Department of Computing, Imperial College London, London, UK;Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France  Institut Mines-Telecom, Telecom ParisTech, CNRS LTCI Paris, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Institut Mines-Telecom, Telecom ParisTech, CNRS LTCI, Paris, France;Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France Saclay, France  Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France;Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France;Parietal Team, INRIA Saclay-Île-de-France, Saclay, France","Alexandre Abraham (corresponding author), Parietal Team, INRIA Saclay-Île-de-France Saclay, France ; Neurospin, I2 BM, DSV, CEA Gif-Sur-Yvette, France; Parietal Team, INRIA Saclay-Île-de-France, Saclay, France; Neurospin, I2 BM, DSV, CEA, Gif-Sur-Yvette, France","Statistical machine learning methods are increasingly used for neuroimaging data analysis. Their main virtue is their ability to model high-dimensional datasets, e.g., multivariate analysis of activation images or resting-state time series. Supervised learning is typically used in decoding or encoding settings to relate brain images to behavioral or clinical observations, while unsupervised learning can uncover hidden structures in sets of images (e.g., resting state functional MRI) or find sub-populations in large cohorts. By considering different functional neuroimaging applications, we illustrate how scikit-learn, a Python machine learning library, can be used to perform some key analysis steps. Scikit-learn contains a very large set of statistical learning algorithms, both supervised and unsupervised, and its application to neuroimaging data provides a versatile tool to study the brain.",8,,14,14,Neuroimaging;Computer science;Artificial intelligence;Unsupervised learning;Machine learning;Python (programming language);Functional neuroimaging;Pattern recognition (psychology);Psychology;Neuroscience,FR;DE;GB,"Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hunter J, 2007, COMPUTING IN SCIENCE & ENGINEERING;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Smith S, 2004, NEUROIMAGE;Biswal B, 1995, MAGNETIC RESONANCE IN MEDICINE;Efron B, 2004, THE ANNALS OF STATISTICS;Hyvärinen A, 2000, NEURAL NETWORKS;Aapo H, 2004, THE MIT PRESS EBOOKS;Smith S, 2009, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Haxby J, 2001, SCIENCE;Penny W, 2007, UCL DISCOVERY (UNIVERSITY COLLEGE LONDON);Destrieux C, 2010, NEUROIMAGE;Calhoun V, 2001, HUMAN BRAIN MAPPING;Beckmann C, 2004, IEEE TRANSACTIONS ON MEDICAL IMAGING;Gorgolewski K, 2011, FRONTIERS IN NEUROINFORMATICS;Kriegeskorte N, 2006, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Craddock R, 2011, HUMAN BRAIN MAPPING;Turkeltaub P, 2002, NEUROIMAGE;Fagin R, 2004, THEORETICAL COMPUTER SCIENCE;Naselaris T, 2010, NEUROIMAGE;Laird A, 2005, HUMAN BRAIN MAPPING;Miyawaki Y, 2008, NEURON;Hanke M, 2009, NEUROINFORMATICS;Schrouff J, 2013, NEUROINFORMATICS;Calı̀ A, 2012, JOURNAL OF WEB SEMANTICS;Poldrack R, 2011, FRONTIERS IN NEUROINFORMATICS;Poldrack R, 2015, ANNUAL REVIEW OF PSYCHOLOGY;Kiviniemi V, 2003, NEUROIMAGE;Baget J, 2011, ARTIFICIAL INTELLIGENCE;Hanson S, 2004, NEUROIMAGE;Beeri C, 1981, LECTURE NOTES IN COMPUTER SCIENCE;Sören S, 2010, MAX PLANCK INSTITUTE FOR PLASMA PHYSICS;O’Toole A, 2007, JOURNAL OF COGNITIVE NEUROSCIENCE;Varoquaux G, 2013, NEUROIMAGE;Calı̀ A, 2012, ARTIFICIAL INTELLIGENCE;Laird A, 2011, BMC RESEARCH NOTES;Varoquaux G, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Thirion B, 2005, HUMAN BRAIN MAPPING;Varoquaux G, 2010, NEUROIMAGE;Dalvi N, 2012, JOURNAL OF THE ACM;Michel V, 2011, PATTERN RECOGNITION;Vennekens J, 2009, THEORY AND PRACTICE OF LOGIC PROGRAMMING;Gottlob G, 2014, ACM TRANSACTIONS ON DATABASE SYSTEMS;Hanson S, 2007, NEURAL COMPUTATION;Millman K, 2007, COMPUTING IN SCIENCE & ENGINEERING;Gottlob G, 2013, ANNALS OF MATHEMATICS AND ARTIFICIAL INTELLIGENCE;Destrieux C, 2009, NEUROIMAGE;Vlasselaer J, 2014, LIRIAS (KU LEUVEN);Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);Walt S, 2011, COMPUTING IN SCIENCE & ENGINEERING;Fischl B, 2003, CEREBRAL CORTEX;Mesulam M, 1998, BRAIN;Abraham A, 2014, FRONTIERS IN NEUROINFORMATICS;Eickhoff S, 2009, HUMAN BRAIN MAPPING;, 1995, CHOICE REVIEWS ONLINE;Pereira F, 2008, NEUROIMAGE;Nieuwenhuys R, 2012, PROGRESS IN BRAIN RESEARCH;Mur M, 2009, SOCIAL COGNITIVE AND AFFECTIVE NEUROSCIENCE;Lieberman M, 2015, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Dantsin E, 2002, ;Samartsidis P, 2017, STATISTICAL SCIENCE;Jha A, 2012, PROCEEDINGS OF THE VLDB ENDOWMENT;Ceylan İ, 2021, ARTIFICIAL INTELLIGENCE;Riguzzi F, 2007, MACHINE LEARNING;Senellart P, 2017, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE)",,,OPENALEX,"Abraham A, 2014, FRONTIERS IN NEUROINFORMATICS","Abraham A, 2014, FRONTIERS IN NEUROINFORMATICS" +https://openalex.org/W2111072639,10.1016/j.neucom.2005.12.126,Extreme learning machine: Theory and applications,2006,en,article,13171,NEUROCOMPUTING,Neurocomputing,Guang-Bin Huang;Qinyu Zhu;Chee‐Kheong Siew,Guang-Bin Huang;Qin-Yu Zhu;Chee-Kheong Siew,"School of Electrical and Electronic Engineering, NanyangTechnological University, Nanyang Avenue, Singapore 639798, Singapore;School of Electrical and Electronic Engineering, NanyangTechnological University, Nanyang Avenue, Singapore 639798, Singapore;School of Electrical and Electronic Engineering, NanyangTechnological University, Nanyang Avenue, Singapore 639798, Singapore",,,70,1-3,489,501,Extreme learning machine;Computer science;Bottleneck;Generalization;Feedforward neural network;Artificial neural network;Benchmark (surveying);Feed forward;Artificial intelligence;Key (lock);Machine learning;Algorithm;Mathematics;Engineering,SG,"Haykin S, 1998, ;Blake C, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY;Freund Y, 1996, ;Hornik K, 1991, NEURAL NETWORKS;Huang G, 2006, IEEE TRANSACTIONS ON NEURAL NETWORKS;Merz C, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY;Banerjee K, 1973, TECHNOMETRICS;Franklin J, 1968, ;Mayne A, 1972, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;Bartlett P, 1998, IEEE TRANSACTIONS ON INFORMATION THEORY;Huang G, 2003, IEEE TRANSACTIONS ON NEURAL NETWORKS;, 2003, CHOICE REVIEWS ONLINE;Cambria E, 2013, ;, 2005, PROCEEDINGS. 2005 IEEE INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORKS, 2005.;Huang G, 1998, IEEE TRANSACTIONS ON NEURAL NETWORKS;Tamura S, 1997, IEEE TRANSACTIONS ON NEURAL NETWORKS;Collobert R, 2002, NEURAL COMPUTATION;Ferrari S, 2005, IEEE TRANSACTIONS ON NEURAL NETWORKS;Huang G, 2005, ;Huang G, 2006, IEEE TRANSACTIONS ON CIRCUITS AND SYSTEMS II ANALOG AND DIGITAL SIGNAL PROCESSING;Huang G, 2000, IEEE TRANSACTIONS ON NEURAL NETWORKS;Huang G, 2006, IEEE TRANSACTIONS ON NEURAL NETWORKS;Wang D, 2006, PROCEEDINGS. 2005 IEEE INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORKS, 2005.;Raeetsch G, 1998, INTERNATIONAL CONFERENCE ON NEURAL INFORMATION PROCESSING;Wilson D, 2002, PROCEEDINGS OF INTERNATIONAL CONFERENCE ON NEURAL NETWORKS (ICNN'96);Romero E, 2003, ",,,OPENALEX,"Huang G, 2006, NEUROCOMPUTING","Huang G, 2006, NEUROCOMPUTING" +https://openalex.org/W2135194391,10.1023/a:1020281327116,An Introduction to MCMC for Machine Learning,2003,en,article,2416,MACHINE LEARNING,Machine Learning,Christophe Andrieu;Nando de Freitas;Arnaud Doucet;Michael I. Jordan,Christophe Andrieu;Nando de Freitas;Arnaud Doucet;Michael I. Jordan,"Department of Mathematics, Statistics Group, University of Bristol, University Walk, Bristol, BS8 1TW, UK;Department of Mathematics, Statistics Group, University of Bristol, University Walk, UK;Department of Computer Science, University of British Columbia, 2366 Main Mall, Vancouver, BC, V6T 1Z4, Canada;Department of Computer Science, University of British Columbia, Vancouver, Canada;Department of Electrical and Electronic Engineering, University of Melbourne, Parkville, Victoria, 3052, Australia;[Department of Electrical and Electronic Engineering, University of Melbourne, Parkville, Australia];Departments of Computer Science and Statistics, University of California at Berkeley, 387 Soda Hall, Berkeley, CA, 94720-1776, USA;Departments of Computer Science and Statistics, University of California at Berkeley, Berkeley, USA","Michael I. Jordan (corresponding author), Departments of Computer Science and Statistics, University of California at Berkeley, 387 Soda Hall, Berkeley, CA, 94720-1776, USA; Departments of Computer Science and Statistics, University of California at Berkeley, Berkeley, USA",,50,1-2,5,43,Computer science;Markov chain Monte Carlo;Monte Carlo method;Probabilistic logic;Artificial intelligence;Machine learning;Bayesian probability;Mathematics;Statistics,GB;CA;AU;US,"Page L, 1999, ;Doucet A, 2001, ;Baxter R, 1985, SERIES ON ADVANCES IN STATISTICAL MECHANICS;Kim H, 2000, TECHNOMETRICS;Meyn S, 1993, ;Neal R, 1996, LECTURE NOTES IN STATISTICS;Hochba D, 1997, ACM SIGACT NEWS;N. K, 1992, ELSEVIER EBOOKS;Ghahramani Z, 1997, MACHINE LEARNING;Neal R, 2011, ;Carlin B, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Besag J, 1995, STATISTICAL SCIENCE;Isard M, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Gilks W, 1994, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES D (THE STATISTICIAN);Jong P, 1995, BIOMETRIKA;Mengersen K, 1996, THE ANNALS OF STATISTICS;Bergman N, 1999, ;Jerrum M, 1996, ;Doucet A, 1998, OPENGREY (INSTITUT DE L'INFORMATION SCIENTIFIQUE ET TECHNIQUE);Fox D, 2001, ;Dension D, 1998, BIOMETRIKA;Tierney L, 1999, STATISTICS IN MEDICINE;McCulloch C, 1994, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Browne W, 2000, COMPUTATIONAL STATISTICS;Pasula H, 1999, ;Higdon D, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;MacEachern S, 1999, CANADIAN JOURNAL OF STATISTICS;Godsill S, 1998, ;Wakefield J, 1991, STATISTICS AND COMPUTING;Chenney S, 2000, ;Bielza C, 1999, MANAGEMENT SCIENCE;Ishwaran H, 1999, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Bar-Yossef Z, 2000, ;Andrieu C, 2003, ;Propp J, 1998, DIMACS SERIES IN DISCRETE MATHEMATICS AND THEORETICAL COMPUTER SCIENCE;Andrieu C, 2001, ;Fill J, 1997, ;Insua D, 1998, LECTURE NOTES IN STATISTICS;Morris R, 2002, ;Wilkinson D, 2002, STATISTICS AND COMPUTING;Salmond D, 2001, ;Pasula H, 2001, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Bui H, 2000, OWN YOUR POTENTIAL (DEAKIN);Kanazawa K, 2013, ARXIV (CORNELL UNIVERSITY);Ortiz L, 2013, ARXIV (CORNELL UNIVERSITY);Casella G, 1999, ECOMMONS (CORNELL UNIVERSITY);Ormoneit D, 2013, ARXIV (CORNELL UNIVERSITY);Utsugi A, 2001, NEURAL PROCESSING LETTERS;Vermaak J, 1999, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Kirkpatrick S, 1983, SCIENCE;Metropolis N, 1953, THE JOURNAL OF CHEMICAL PHYSICS;Geman S, 1984, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Gordon N, 1993, IEE PROCEEDINGS F RADAR AND SIGNAL PROCESSING;Gelfand A, 1990, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Metropolis N, 1949, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Green P, 1995, BIOMETRIKA;Baum L, 1970, THE ANNALS OF MATHEMATICAL STATISTICS;Duane S, 1987, PHYSICS LETTERS B;Albert J, 1993, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Swendsen R, 1987, PHYSICAL REVIEW LETTERS;Liu J, 2004, SPRINGER SERIES IN STATISTICS;Escobar M, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Carter C, 1994, BIOMETRIKA;Richardson S, 1997, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Geweke J, 1989, ECONOMETRICA;Hesterberg T, 2002, TECHNOMETRICS;Wei G, 1990, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Richardson S, 1997, ;Chen M, 2000, SPRINGER SERIES IN STATISTICS;Brooks S, 1998, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES D (THE STATISTICIAN);Dyer M, 1991, JOURNAL OF THE ACM;Tu Z, 2002, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Casella G, 1996, BIOMETRIKA;Veach E, 1997, ;Bucher C, 1988, STRUCTURAL SAFETY;Roberts G, 1996, BIOMETRIKA;Pearl J, 1987, ARTIFICIAL INTELLIGENCE;Gilks W, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Green P, 2001, SCANDINAVIAN JOURNAL OF STATISTICS;Levine R, 2001, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Andrieu C, 1999, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Thrun S, 1999, NEURAL INFORMATION PROCESSING SYSTEMS;Mykland P, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Ziegel E, 1999, TECHNOMETRICS;Fill J, 1998, THE ANNALS OF APPLIED PROBABILITY;Diaconis P, 1998, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Applegate D, 1991, ;Jensen C, 1995, INTERNATIONAL JOURNAL OF HUMAN-COMPUTER STUDIES;Celeux G, 1992, STOCHASTICS AND STOCHASTICS REPORTS;Haario H, 1991, ADVANCES IN APPLIED PROBABILITY;Gelfand A, 1994, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Barber D, 1996, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Wood S, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Sherman R, 1999, ECONOMETRICS JOURNAL;Al-Qaq W, 1995, IEEE TRANSACTIONS ON COMMUNICATIONS;Newton M, 2000, BIOMETRICS;Jaumard B, 2006, ;Schuurmans D, 2013, ARXIV (CORNELL UNIVERSITY);Hastings W, 1970, BIOMETRIKA;Gilks W, 1995, ;Moore M, 1983, PHYSICS BULLETIN;Doucet A, 2000, STATISTICS AND COMPUTING;Tanner M, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Tierney L, 1994, THE ANNALS OF STATISTICS;Rubinstein R, 2016, WILEY SERIES IN PROBABILITY AND STATISTICS;, 1988, MATHEMATICS AND COMPUTERS IN SIMULATION;Pitt M, 1999, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Merwe R, 2000, ;Neal R, 2000, ARXIV (CORNELL UNIVERSITY);Murphy K, 2001, ;Berners‐Lee T, 1994, COMMUNICATIONS OF THE ACM;Kalos M, 1986, ;Rota G, 1988, ADVANCES IN MATHEMATICS;Peskun P, 1973, BIOMETRIKA;Damlen P, 1999, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Escobar M, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Smith P, 1997, IEEE JOURNAL ON SELECTED AREAS IN COMMUNICATIONS;Freitas J, 2000, NEURAL COMPUTATION;Cheng J, 2000, ;Cheng J, 2000, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Troughton P, 2002, ;, 1995, ;Neuwald A, 1997, NUCLEIC ACIDS RESEARCH;Beichl I, 2000, COMPUTING IN SCIENCE & ENGINEERING;Andrieu C, 2001, NEURAL COMPUTATION;Tu Z, 2002, ;Freitas N, 2001, UNCERTAINTY IN ARTIFICIAL INTELLIGENCE;Peskun P, 1973, BIOMETRIKA;Müller P, 1998, NEURAL COMPUTATION;Holmes C, 1998, NEURAL COMPUTATION;Ghahramani Z, 1994, ;Remondo D, 2000, IEEE TRANSACTIONS ON COMMUNICATIONS;Andrieu C, 2002, THE MIT PRESS EBOOKS;Poupart P, 2017, ENCYCLOPEDIA OF MACHINE LEARNING AND DATA MINING;Mykland P, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Andrieu C, 2001, JOURNAL OF APPLIED PROBABILITY;Aldous D, 1998, DIMACS SERIES IN DISCRETE MATHEMATICS AND THEORETICAL COMPUTER SCIENCE;Kannan R, 2002, ;Andrieu C, 1999, OXFORD UNIVERSITY RESEARCH ARCHIVE (ORA) (UNIVERSITY OF OXFORD);Clark E, 1999, ;Doucet A, 2013, ARXIV (CORNELL UNIVERSITY);Andrieu C, 2013, ARXIV (CORNELL UNIVERSITY);Forsyth D, 2003, ;Ridgeway G, 1999, RESEARCHWORKS AT THE UNIVERSITY OF WASHINGTON (UNIVERSITY OF WASHINGTON);Ortiz L, 2013, ARXIV (CORNELL UNIVERSITY);Rubinstein R, 1981, WILEY SERIES IN PROBABILITY AND STATISTICS;Laarhoven P, 1987, ;Robert C, 1999, SPRINGER TEXTS IN STATISTICS;Gasparini M, 1997, TECHNOMETRICS;Meyn S, 2009, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Hastings W, 1970, BIOMETRIKA;Gelfand A, 1990, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Wei G, 1990, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Pitt M, 1999, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Geman S, 1987, ELSEVIER EBOOKS;Tanner M, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Green P, 1995, BIOMETRIKA;Albert J, 1993, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Doucet A, 2005, ;Seila A, 1982, TECHNOMETRICS;Carter C, 1994, BIOMETRIKA;McLoughlin I, 2016, CAMBRIDGE UNIVERSITY PRESS EBOOKS;McCulloch C, 1994, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Gilks W, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Gelfand A, 1994, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Haario H, 1991, ADVANCES IN APPLIED PROBABILITY;Andrieu C, 2001, JOURNAL OF APPLIED PROBABILITY;Wood S, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Ghahramani Z, 1996, ;Moreau T, 2023, MEDICAL IMAGE ANALYSIS",,,OPENALEX,"Andrieu C, 2003, MACHINE LEARNING","Andrieu C, 2003, MACHINE LEARNING" +https://openalex.org/W2923537029,10.1103/revmodphys.91.045002,Machine learning and the physical sciences,2019,en,article,2429,REVIEWS OF MODERN PHYSICS,Reviews of Modern Physics,Giuseppe Carleo;J. I. Cirac;K. Cranmer;Laurent Daudet;Maria Schuld;Naftali Tishby;Leslie Vogt-Maranto;Lenka Zdeborová,Giuseppe Carleo;Ignacio Cirac;Kyle Cranmer;Laurent Daudet;Maria Schuld;Naftali Tishby;Leslie Vogt-Maranto;Lenka Zdeborová,"Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA;Center for Computational Quantum Physics, Flatiron Institute, 162 5th Avenue, New York, New York 10010, USA",,"In October 2018 an APS Physics Next Workshop on Machine Learning was held in Riverhead, NY. This article reviews and summarizes the proceedings of this very broad, emerging field.This needs to be a placard in the left-hand column, with a custom tag.",91,4,,,Physics;Field (mathematics);Column (typography);Engineering physics;Library science;Data science;Engineering ethics;Mechanical engineering;Computer science;Engineering;Connection (principal bundle),US,"Gordon A, 1984, BIOMETRICS;Nielsen M, 2002, AMERICAN JOURNAL OF PHYSICS;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Johnson J, 2000, NEUROCOMPUTING;Jones D, 1998, JOURNAL OF GLOBAL OPTIMIZATION;Ganin Y, 2017, ADVANCES IN COMPUTER VISION AND PATTERN RECOGNITION;Nielsen M, 2011, ;Амари Ш, 1998, NEURAL COMPUTATION;Oseledets I, 2011, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Smolensky P, 1986, MIT PRESS EBOOKS;Lloyd S, 2014, NATURE PHYSICS;Thouless D, 1977, PHILOSOPHICAL MAGAZINE;Nishimori H, 2001, ;Groß D, 2010, PHYSICAL REVIEW LETTERS;Decelle A, 2011, PHYSICAL REVIEW E;Engel A, 2001, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Dean D, 1996, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Lakemeyer G, 2003, ;Decelle A, 2011, PHYSICAL REVIEW LETTERS;Gligorov V, 2013, JOURNAL OF INSTRUMENTATION;Collett T, 2015, THE ASTROPHYSICAL JOURNAL;Minitti M, 2015, PHYSICAL REVIEW LETTERS;Kind M, 2013, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Feldmann R, 2006, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Tóth G, 2010, PHYSICAL REVIEW LETTERS;Saad D, 1995, PHYSICAL REVIEW. E, STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS;Robin A, 2014, ASTRONOMY AND ASTROPHYSICS;Hand D, 2015, STATISTICS AND COMPUTING;Bonnett C, 2016, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Kabashima Y, 2016, IEEE TRANSACTIONS ON INFORMATION THEORY;Ntampaka M, 2015, THE ASTROPHYSICAL JOURNAL;Sompolinsky H, 1990, PHYSICAL REVIEW LETTERS;Schuch N, 2008, PHYSICAL REVIEW LETTERS;Changlani H, 2009, PHYSICAL REVIEW B;Györgyi G, 1990, PHYSICAL REVIEW A;Mezzacapo F, 2009, NEW JOURNAL OF PHYSICS;University> C, 1984, CONTEMPORARY MATHEMATICS - AMERICAN MATHEMATICAL SOCIETY;Ishida É, 2015, ASTRONOMY AND COMPUTING;Lorenz U, 2010, NEW JOURNAL OF PHYSICS;Estrada J, 2007, THE ASTROPHYSICAL JOURNAL;Lorenz U, 2010, PHYSICAL REVIEW A;Bang J, 2014, NEW JOURNAL OF PHYSICS;Sakata A, 2013, EUROPHYSICS LETTERS (EPL);Hintikka J, 1970, ;Levi I, 1967, SYNTHESE;Gendiar A, 2002, PHYSICAL REVIEW. E, STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS;Mehta P, 2014, ARXIV (CORNELL UNIVERSITY);Cranmer K, 2015, ARXIV (CORNELL UNIVERSITY);Bény C, 2013, ARXIV (CORNELL UNIVERSITY);Hochreiter S, 1997, NEURAL COMPUTATION;Schmidhuber J, 2014, NEURAL NETWORKS;Cybenko G, 1989, MATHEMATICS OF CONTROL SIGNALS AND SYSTEMS;Foreman-Mackey D, 2013, PUBLICATIONS OF THE ASTRONOMICAL SOCIETY OF THE PACIFIC;White S, 1992, PHYSICAL REVIEW LETTERS;Hinton G, 2002, NEURAL COMPUTATION;Behler J, 2007, PHYSICAL REVIEW LETTERS;Jaeger H, 2004, SCIENCE;Bartók A, 2010, PHYSICAL REVIEW LETTERS;Beaumont M, 2002, GENETICS;Bartók A, 2013, PHYSICAL REVIEW B;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Reck M, 1994, PHYSICAL REVIEW LETTERS;Ramakrishnan R, 2014, SCIENTIFIC DATA;Brammer G, 2008, THE ASTROPHYSICAL JOURNAL;Verstraete F, 2008, ADVANCES IN PHYSICS;Seung H, 1992, ;Marjoram P, 2003, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Amit D, 1985, PHYSICAL REVIEW. A, GENERAL PHYSICS;Bershady M, 2004, PUBLICATIONS OF THE ASTRONOMICAL SOCIETY OF THE PACIFIC;Benitez N, 2000, THE ASTROPHYSICAL JOURNAL;Gardner E, 1988, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Johnstone I, 2009, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Anandkumar A, 2014, CALTECHAUTHORS (CALIFORNIA INSTITUTE OF TECHNOLOGY);Snyder J, 2012, PHYSICAL REVIEW LETTERS;Krząkała F, 2013, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Shi Y, 2006, PHYSICAL REVIEW A;Seung H, 1992, PHYSICAL REVIEW A;Hackbusch W, 2009, JOURNAL OF FOURIER ANALYSIS AND APPLICATIONS;Cubuk E, 2015, PHYSICAL REVIEW LETTERS;Gardner E, 1987, EUROPHYSICS LETTERS (EPL);Sorella S, 1998, PHYSICAL REVIEW LETTERS;Carleo G, 2012, SCIENTIFIC REPORTS;Forte S, 2002, JOURNAL OF HIGH ENERGY PHYSICS;Gardner E, 1989, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Ambs P, 2010, ADVANCES IN OPTICAL TECHNOLOGIES;Cameron E, 2012, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Saad D, 1995, PHYSICAL REVIEW LETTERS;Marshall P, 2009, THE ASTROPHYSICAL JOURNAL;Ben-Nun M, 1997, THE JOURNAL OF PHYSICAL CHEMISTRY A;Watkin T, 1994, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Shalashilin D, 2011, FARADAY DISCUSSIONS;Schwarze H, 1993, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Lu T, 1989, APPLIED OPTICS;Lundberg K, 2005, IEEE CONTROL SYSTEMS;Barkai N, 1994, PHYSICAL REVIEW. E, STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS;Çakmak B, 2014, ;Biehl M, 1993, EUROPHYSICS LETTERS (EPL);Minsky M, 1969, ;Adachi S, 2015, ARXIV (CORNELL UNIVERSITY);S S, 2005, IEEE TRANSACTIONS ON NEURAL NETWORKS;Hopfield J, 1982, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Fortunato S, 2009, PHYSICS REPORTS;Luxburg U, 2007, STATISTICS AND COMPUTING;Dirac P, 1930, MATHEMATICAL PROCEEDINGS OF THE CAMBRIDGE PHILOSOPHICAL SOCIETY;Ball R, 2015, JOURNAL OF HIGH ENERGY PHYSICS;McClean J, 2016, NEW JOURNAL OF PHYSICS;Carleo G, 2017, SCIENCE;Hofmann T, 2008, THE ANNALS OF STATISTICS;Carrasquilla J, 2017, NATURE PHYSICS;Baldi P, 2014, NATURE COMMUNICATIONS;Sisson S, 2007, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Marin J, 2011, STATISTICS AND COMPUTING;Nishimori H, 2001, ;Wang L, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Brockherde F, 2017, NATURE COMMUNICATIONS;Roe B, 2005, NUCLEAR INSTRUMENTS AND METHODS IN PHYSICS RESEARCH SECTION A ACCELERATORS SPECTROMETERS DETECTORS AND ASSOCIATED EQUIPMENT;Collister A, 2004, PUBLICATIONS OF THE ASTRONOMICAL SOCIETY OF THE PACIFIC;Acar E, 2008, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Zdeborová L, 2016, ADVANCES IN PHYSICS;Broecker P, 2017, SCIENTIFIC REPORTS;Oliveira L, 2016, JOURNAL OF HIGH ENERGY PHYSICS;Krenn M, 2016, PHYSICAL REVIEW LETTERS;Baldi P, 2016, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Aurisano A, 2016, JOURNAL OF INSTRUMENTATION;Baldi P, 2016, THE EUROPEAN PHYSICAL JOURNAL C;Wigley P, 2016, SCIENTIFIC REPORTS;Guest D, 2016, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Lookman T, 2015, SPRINGER SERIES IN MATERIALS SCIENCE;Tanaka A, 2017, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Reddy G, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Riofrío C, 2017, NATURE COMMUNICATIONS;Baldassi C, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Baldassi C, 2015, PHYSICAL REVIEW LETTERS;Ohtsuki T, 2016, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Schoenholz S, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Barnes D, 2016, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Mézard M, 2017, PHYSICAL REVIEW. E;Likhomanenko T, 2015, JOURNAL OF PHYSICS CONFERENCE SERIES;Ntampaka M, 2016, THE ASTROPHYSICAL JOURNAL;Firth A, 2003, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Mavadia S, 2017, NATURE COMMUNICATIONS;Kirrander A, 2015, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Stevens J, 2013, JOURNAL OF INSTRUMENTATION;Li N, 2016, THE ASTROPHYSICAL JOURNAL;Ravanbakhsh S, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Novikov A, 2016, ARXIV (CORNELL UNIVERSITY);Sutherland D, 2012, ARXIV (CORNELL UNIVERSITY);Flach P, 2015, ;Teymur O, 2016, ;Biamonte J, 2017, NATURE;Bronstein M, 2017, IEEE SIGNAL PROCESSING MAGAZINE;, 2007, CHOICE REVIEWS ONLINE;Germain M, 2015, ;Smith J, 2017, CHEMICAL SCIENCE;Mézard M, 2009, ;Behler J, 2016, THE JOURNAL OF CHEMICAL PHYSICS;Nieuwenburg E, 2017, NATURE PHYSICS;Gastegger M, 2017, CHEMICAL SCIENCE;Wetzel S, 2017, PHYSICAL REVIEW. E;Deng D, 2017, PHYSICAL REVIEW X;Kull M, 2017, ;Paganini M, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Gao X, 2017, NATURE COMMUNICATIONS;Nguyen H, 2017, ADVANCES IN PHYSICS;Lanyon B, 2017, NATURE PHYSICS;Chen J, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Liu J, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Schawinski K, 2017, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY LETTERS;Huang L, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Cai Z, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Koch-Janusz M, 2018, NATURE PHYSICS;Torlai G, 2017, PHYSICAL REVIEW LETTERS;Lanusse F, 2017, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Arunachalam S, 2017, ACM SIGACT NEWS;Schindler F, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Lelarge M, 2018, PROBABILITY THEORY AND RELATED FIELDS;Shimmin C, 2017, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Tubiana J, 2017, PHYSICAL REVIEW LETTERS;Zhang Y, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Acciarri R, 2017, JOURNAL OF INSTRUMENTATION;Varsamopoulos S, 2017, QUANTUM SCIENCE AND TECHNOLOGY;Ballard A, 2017, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Coja‐Oghlan A, 2018, ADVANCES IN MATHEMATICS;Bradde S, 2017, JOURNAL OF STATISTICAL PHYSICS;Liu J, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Ohtsuki T, 2017, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Nagai Y, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Barra A, 2018, PHYSICAL REVIEW. E;Huang Y, 2021, PHYSICAL REVIEW LETTERS;Ilten P, 2017, JOURNAL OF INSTRUMENTATION;Miller R, 2016, FARADAY DISCUSSIONS;Bellshaw D, 2017, CHEMICAL PHYSICS LETTERS;Tramel E, 2018, PHYSICAL REVIEW X;, 2015, ;Shwartz-Ziv R, 2017, ARXIV (CORNELL UNIVERSITY);Law J, 2001, ACM SIGSOFT SOFTWARE ENGINEERING NOTES;Shen Y, 2017, NATURE PHOTONICS;Zhang L, 2018, PHYSICAL REVIEW LETTERS;Faber F, 2017, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Pathak J, 2017, CHAOS AN INTERDISCIPLINARY JOURNAL OF NONLINEAR SCIENCE;Becca F, 2017, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Ciliberto C, 2018, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Mardt A, 2017, NATURE COMMUNICATIONS;Wehmeyer C, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Hu W, 2017, PHYSICAL REVIEW. E;Nomura Y, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Melnikov A, 2018, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Lubbers N, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Metodiev E, 2017, JOURNAL OF HIGH ENERGY PHYSICS;Bukov M, 2018, PHYSICAL REVIEW X;Zhang Y, 2017, PHYSICAL REVIEW LETTERS;Zhang P, 2018, PHYSICAL REVIEW LETTERS;Hezaveh Y, 2017, NATURE;Han Z, 2018, PHYSICAL REVIEW X;Glasser I, 2018, PHYSICAL REVIEW X;Beach M, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Haah J, 2017, IEEE TRANSACTIONS ON INFORMATION THEORY;Bereau T, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Venderley J, 2018, PHYSICAL REVIEW LETTERS;Schneider E, 2017, PHYSICAL REVIEW LETTERS;Advani M, 2020, NEURAL NETWORKS;Wang C, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Baireuther P, 2018, QUANTUM;Saito H, 2017, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Carifio J, 2017, JOURNAL OF HIGH ENERGY PHYSICS;Saito H, 2017, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Benedetti M, 2017, PHYSICAL REVIEW X;Krastanov S, 2017, SCIENTIFIC REPORTS;Kaubruegger R, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Martiniani S, 2019, PHYSICAL REVIEW X;Clark S, 2018, JOURNAL OF PHYSICS A MATHEMATICAL AND THEORETICAL;Rocchetto A, 2018, NPJ QUANTUM INFORMATION;Schmitt M, 2018, SCIPOST PHYSICS;Arsenault L, 2017, INVERSE PROBLEMS;Teng P, 2018, PHYSICAL REVIEW. E;Barbier J, 2017, ;Cocco S, 2017, PHYSICA A STATISTICAL MECHANICS AND ITS APPLICATIONS;Apollinari G, 2015, ARXIV (CORNELL UNIVERSITY);Putzky P, 2017, ARXIV (CORNELL UNIVERSITY);Broecker P, 2017, ARXIV (CORNELL UNIVERSITY);Louppe G, 2017, ARXIV (CORNELL UNIVERSITY);Cristoforetti M, 2017, ARXIV (CORNELL UNIVERSITY);Frate M, 2017, ARXIV (CORNELL UNIVERSITY);Pang L, 2016, ARXIV (CORNELL UNIVERSITY);Izmailov P, 2017, ARXIV (CORNELL UNIVERSITY);Schütt K, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Schuld M, 2019, PHYSICAL REVIEW LETTERS;Pathak J, 2018, PHYSICAL REVIEW LETTERS;Dunjko V, 2018, REPORTS ON PROGRESS IN PHYSICS;Smith J, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Chmiela S, 2018, NATURE COMMUNICATIONS;Alet F, 2018, COMPTES RENDUS PHYSIQUE;Yao K, 2018, CHEMICAL SCIENCE;Deng D, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Paganini M, 2018, PHYSICAL REVIEW LETTERS;Deringer V, 2018, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Kamath A, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Fösel T, 2018, PHYSICAL REVIEW X;Pang L, 2018, NATURE COMMUNICATIONS;Nguyen T, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Alsing J, 2018, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Li S, 2018, PHYSICAL REVIEW LETTERS;Carleo G, 2018, NATURE COMMUNICATIONS;Torlai G, 2018, PHYSICAL REVIEW LETTERS;Komiske P, 2018, JOURNAL OF HIGH ENERGY PHYSICS;Chung S, 2018, PHYSICAL REVIEW X;Charnock T, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Kalantre S, 2019, NPJ QUANTUM INFORMATION;Hashimoto K, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Stoudenmire E, 2018, QUANTUM SCIENCE AND TECHNOLOGY;Sosso G, 2018, MOLECULAR SIMULATION;Czischek S, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Maskara N, 2019, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Sidky H, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Rodríguez A, 2018, COMPUTATIONAL ASTROPHYSICS AND COSMOLOGY;Nagai R, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Shanahan P, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Nieuwenburg E, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Iakovlev I, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Chen C, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Decelle A, 2017, EUROPHYSICS LETTERS (EPL);Wang C, 2018, FRONTIERS OF PHYSICS;Agresti I, 2019, PHYSICAL REVIEW X;Anelli A, 2018, PHYSICAL REVIEW MATERIALS;Tubiana J, 2019, ELIFE;Schmidt E, 2018, COMPUTATIONAL MATERIALS SCIENCE;Charnock T, 2018, ARXIV (CORNELL UNIVERSITY);Aaronson S, 2018, SIAM JOURNAL ON COMPUTING;Lee J, 2017, ARXIV (CORNELL UNIVERSITY);Torlai G, 2017, ARXIV (CORNELL UNIVERSITY);Kondor R, 2018, ARXIV (CORNELL UNIVERSITY);Tanaka A, 2017, ARXIV (CORNELL UNIVERSITY);Casado M, 2017, ARXIV (CORNELL UNIVERSITY);Regier J, 2018, ARXIV (CORNELL UNIVERSITY);Jain A, 2018, ARXIV (CORNELL UNIVERSITY);Butler K, 2018, NATURE;Lin X, 2018, SCIENCE;Havlíček V, 2019, NATURE;Ambrogio S, 2018, NATURE;Schuld M, 2018, QUANTUM SCIENCE AND TECHNOLOGY;Radovic A, 2018, NATURE;Bartók A, 2018, PHYSICAL REVIEW X;Duarte J, 2018, JOURNAL OF INSTRUMENTATION;Guest D, 2018, ANNUAL REVIEW OF NUCLEAR AND PARTICLE SCIENCE;Paruzzo F, 2018, NATURE COMMUNICATIONS;Choo K, 2018, PHYSICAL REVIEW LETTERS;Albertsson K, 2018, JOURNAL OF PHYSICS CONFERENCE SERIES;Wu D, 2019, PHYSICAL REVIEW LETTERS;Reddy G, 2018, NATURE;Brehmer J, 2018, PHYSICAL REVIEW LETTERS;Brehmer J, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Gabrié M, 2019, JOURNAL OF STATISTICAL MECHANICS THEORY AND EXPERIMENT;Sifain A, 2018, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Huembeli P, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Komiske P, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Liang X, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Rupp M, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Fournier R, 2020, PHYSICAL REVIEW LETTERS;Hsu Y, 2018, PHYSICAL REVIEW LETTERS;Engel E, 2018, NATURE COMMUNICATIONS;Eickenberg M, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Sun N, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Greitemann J, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Yoon H, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Bukov M, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Hashimoto K, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Lu S, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Huembeli P, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Guo C, 2018, PHYSICAL REVIEW. E;Banchi L, 2018, NEW JOURNAL OF PHYSICS;Seif A, 2018, JOURNAL OF PHYSICS B ATOMIC MOLECULAR AND OPTICAL PHYSICS;Zhang W, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Saito H, 2018, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Leistedt B, 2019, THE ASTROPHYSICAL JOURNAL;Liu Y, 2018, ARXIV (CORNELL UNIVERSITY);Steinbrecher G, 2019, NPJ QUANTUM INFORMATION;Papamakarios G, 2018, ARXIV (CORNELL UNIVERSITY);Jónsson B, 2018, ARXIV (CORNELL UNIVERSITY);Song M, 2018, ARXIV (CORNELL UNIVERSITY);Kondor R, 2018, ARXIV (CORNELL UNIVERSITY);Glasser I, 2018, ARXIV (CORNELL UNIVERSITY);Morningstar W, 2018, ARXIV (CORNELL UNIVERSITY);Wang C, 2018, ARXIV (CORNELL UNIVERSITY);Golkar S, 2018, ARXIV (CORNELL UNIVERSITY);LeCun Y, 2015, NATURE;Cong I, 2019, NATURE PHYSICS;Zhang L, 2019, PHYSICAL REVIEW MATERIALS;Carrasquilla J, 2019, NATURE MACHINE INTELLIGENCE;Hartmann M, 2019, PHYSICAL REVIEW LETTERS;Sharir O, 2020, PHYSICAL REVIEW LETTERS;Rem B, 2019, NATURE PHYSICS;Nagy A, 2019, PHYSICAL REVIEW LETTERS;Vicentini F, 2019, PHYSICAL REVIEW LETTERS;Komiske P, 2019, JOURNAL OF HIGH ENERGY PHYSICS;Albergo M, 2019, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;He S, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Levine Y, 2019, PHYSICAL REVIEW LETTERS;Barbier J, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Doggen E, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Yoshioka N, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Luo D, 2019, PHYSICAL REVIEW LETTERS;Zhang Y, 2019, NATURE;Torlai G, 2019, PHYSICAL REVIEW LETTERS;Gray J, 2018, PHYSICAL REVIEW LETTERS;Cheng S, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Alsing J, 2019, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Salamani D, 2018, ;Peel A, 2019, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Morningstar W, 2019, THE ASTROPHYSICAL JOURNAL;Liu K, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Mills K, 2019, CHEMICAL SCIENCE;Armitage T, 2019, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY;Stokes J, 2019, ENTROPY;Kashiwa K, 2019, PROGRESS OF THEORETICAL AND EXPERIMENTAL PHYSICS;Borin A, 2020, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Tsaris A, 2018, JOURNAL OF PHYSICS CONFERENCE SERIES;Zheng Y, 2019, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Quek Y, 2021, NPJ QUANTUM INFORMATION;Cohen T, 2019, ARXIV (CORNELL UNIVERSITY);, 2016, ;Farrell S, 2018, ARXIV (CORNELL UNIVERSITY);Zhang X, 2019, ARXIV (CORNELL UNIVERSITY);Ntampaka M, 2019, ARXIV (CORNELL UNIVERSITY);Kochkov D, 2018, ARXIV (CORNELL UNIVERSITY);Albertsson K, 2018, ARXIV (CORNELL UNIVERSITY);Xu Q, 2018, ARXIV (CORNELL UNIVERSITY);Cranmer K, 2019, ARXIV (CORNELL UNIVERSITY);Choma N, 2018, ARXIV (CORNELL UNIVERSITY);Bozson A, 2018, ARXIV (CORNELL UNIVERSITY);, 2002, THE MIT PRESS EBOOKS;Yu B, 2018, FRONTIERS OF INFORMATION TECHNOLOGY & ELECTRONIC ENGINEERING;Vidal G, 2007, PHYSICAL REVIEW LETTERS;Jaroslav Ř, 2004, LECTURE NOTES IN PHYSICS;Noé F, 2019, SCIENCE;Han J, 2019, JOURNAL OF COMPUTATIONAL PHYSICS;Yang J, 2020, SCIENCE;Brehmer J, 2020, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Louppe G, 2017, ORBI (UNIVERSITY OF LIÈGE);Stankus B, 2019, NATURE CHEMISTRY;Yong H, 2020, NATURE COMMUNICATIONS;Lang D, 2016, ASTROPHYSICS SOURCE CODE LIBRARY;Ronhovde P, 2011, THE EUROPEAN PHYSICAL JOURNAL E;Rocchetto A, 2019, PUBMED;Fabiani G, 2019, SCIPOST PHYSICS;Morningstar A, 2018, ARXIV (CORNELL UNIVERSITY);Schoenholz S, 2019, ARXIV (CORNELL UNIVERSITY);Cranmer K, 2016, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Mardt A, 2018, REPEC: RESEARCH PAPERS IN ECONOMICS;, , PORTSMOUTH RESEARCH PORTAL (UNIVERSITY OF PORTSMOUTH);A. C, 2003, ;Gao X, , REPEC: RESEARCH PAPERS IN ECONOMICS;Ritzmann U, 2018, ARXIV (CORNELL UNIVERSITY);Zhang Y, , REPEC: RESEARCH PAPERS IN ECONOMICS;Justin S, 2013, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Aubin B, 2018, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Hermans J, 2019, ARXIV (CORNELL UNIVERSITY);Lanusse F, 2019, ARXIV (CORNELL UNIVERSITY);Hofmann T, 2008, ;F B, 2017, MPG.PURE (MAX PLANCK SOCIETY);I. G, 2018, MAX PLANCK DIGITAL LIBRARY;T. F, 2018, MPG.PURE (MAX PLANCK SOCIETY);Nguyen T, 2018, OPEN MIND;Robin A, 2014, SPRINGER LINK (CHIBA INSTITUTE OF TECHNOLOGY);Ntampaka M, 2019, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Rocchetto A, 2018, UCL DISCOVERY (UNIVERSITY COLLEGE LONDON);F. M, 2009, MPG.PURE (MAX PLANCK SOCIETY);Sarao M, 2020, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);vloncar, 2021, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);, , ANET (UNIVERSITY OF ANTWERP);Komiske P, 2019, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Komiske P, 2018, OSTI OAI (U.S. DEPARTMENT OF ENERGY OFFICE OF SCIENTIFIC AND TECHNICAL INFORMATION);Donoho D, 2006, IEEE TRANSACTIONS ON INFORMATION THEORY;, 1996, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Valiant L, 1984, COMMUNICATIONS OF THE ACM;Ziegel E, 1989, TECHNOMETRICS;Frenkelʹ I, 1934, CLARENDON PRESS EBOOKS;Wu Z, 2015, ;Dean D, 1996, ;Carleo G, 2018, REPEC: RESEARCH PAPERS IN ECONOMICS;Rodríguez A, 2018, REPOSITORY FOR PUBLICATIONS AND RESEARCH DATA (ETH ZURICH);Wecker D, 2016, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;L B, 2018, UCL DISCOVERY (UNIVERSITY COLLEGE LONDON);E F, 2002, ;M. S, 2018, MAX PLANCK DIGITAL LIBRARY;Tiersch M, 2015, SCIENTIFIC REPORTS;, 2005, IEEE TRANSACTIONS ON INFORMATION THEORY;, 1992, ;Nathan K, 2018, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Oliveira L, 2022, OSTI OAI (U.S. DEPARTMENT OF ENERGY OFFICE OF SCIENTIFIC AND TECHNICAL INFORMATION);, 2013, PROBABILITY THEORY AND RELATED FIELDS;, 2018, ;Ilten P, 2015, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);, 1990, ;, 2017, ;, 2008, ADVANCES IN OPTICAL TECHNOLOGIES;Mannelli S, 2019, ARXIV (CORNELL UNIVERSITY);, 2014, ;Kagan M, 2016, OPEN REPOSITORY AND BIBLIOGRAPHY (UNIVERSITY OF LIÈGE);Kondor R, 2018, ARXIV (CORNELL UNIVERSITY);Cohen T, 2018, ARXIV (CORNELL UNIVERSITY);Louppe G, 2020, OPEN REPOSITORY AND BIBLIOGRAPHY (UNIVERSITY OF LIÈGE);Kondor R, 2018, ARXIV (CORNELL UNIVERSITY);, 2018, ;Levine Y, 2017, ARXIV (CORNELL UNIVERSITY);Frate M, 2017, ARXIV (CORNELL UNIVERSITY);Glasser I, 2018, ARXIV (CORNELL UNIVERSITY);Baydin A, 2018, ARXIV (CORNELL UNIVERSITY);Goldt S, 2019, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Carleo G, 2019, REVIEWS OF MODERN PHYSICS","Carleo G, 2019, REVIEWS OF MODERN PHYSICS" +https://openalex.org/W2525984666,10.1056/nejmp1606181,"Predicting the Future — Big Data, Machine Learning, and Clinical Medicine",2016,en,article,3489,NEW ENGLAND JOURNAL OF MEDICINE,New England Journal of Medicine,Ziad Obermeyer;Ezekiel Emanuel,Ziad Obermeyer;Ezekiel J. Emanuel,"From the Department of Emergency Medicine, Harvard Medical School and Brigham and Women's Hospital, and the Department of Health Care Policy, Harvard Medical School, Boston (Z.O.) and the Department of Medical Ethics and Health Policy, Perelman School of Medicine, and the Department of Health Care Management, Wharton School, University of Pennsylvania, Philadelphia (E.J.E.);Department of Health Care Policy, Harvard Medical School, Boston, and the Department of Medical Ethics and Health Policy, Perelman School of Medicine;From the Department of Emergency Medicine, Harvard Medical School and Brigham and Women's Hospital, and the Department of Health Care Policy, Harvard Medical School, Boston (Z.O.) and the Department of Medical Ethics and Health Policy, Perelman School of Medicine, and the Department of Health Care Management, Wharton School, University of Pennsylvania, Philadelphia (E.J.E.);Department of Health Care Management, Wharton School, University of Pennsylvania, Philadelphia",,"The algorithms of machine learning, which can sift through vast numbers of variables looking for combinations that reliably predict outcomes, will improve prognosis, displace much of the work of radiologists and anatomical pathologists, and improve diagnostic accuracy.",375,13,1216,1219,Medicine;Machine learning;Artificial intelligence;Big data;Precision medicine;Data science;Scale-invariant feature transform;MEDLINE;Medical physics;Computer science;Pathology;Data mining;Feature extraction,US;TW,"Mullainathan S, 2017, THE JOURNAL OF ECONOMIC PERSPECTIVES;Halevy A, 2009, IEEE INTELLIGENT SYSTEMS;Bouton C, 2016, NATURE;Kleinberg J, 2015, AMERICAN ECONOMIC REVIEW;Gilbert F, 2008, NEW ENGLAND JOURNAL OF MEDICINE",,,OPENALEX,"Obermeyer Z, 2016, NEW ENGLAND JOURNAL OF MEDICINE","Obermeyer Z, 2016, NEW ENGLAND JOURNAL OF MEDICINE" +https://openalex.org/W2603766943,10.1145/3052973.3053009,Practical Black-Box Attacks against Machine Learning,2017,en,article,3482,,,Nicolas Papernot;Patrick McDaniel;Ian Goodfellow;Somesh Jha;Z. Berkay Celik;Ananthram Swami,Nicolas Papernot;Patrick McDaniel;Ian Goodfellow;Somesh Jha;Z. Berkay Celik;Ananthram Swami,"Pennsylvania State University, University Park, PA, USA;Pennsylvania State University, University Park, PA, USA;OpenAI, San Francisco, CA, USA;University of Wisconsin, Madison, WI, USA;Pennsylvania State University, University Park, PA, USA;US Army Research Laboratory, Adelphi, MD, USA",,"Machine learning (ML) models, e.g., deep neural networks (DNNs), are vulnerable to adversarial examples: malicious inputs modified to yield erroneous model outputs, while appearing unmodified to human observers. Potential attacks include having malicious content like malware identified as legitimate or controlling vehicle behavior. Yet, all existing adversarial example attacks require knowledge of either the model internals or its training data. We introduce the first practical demonstration of an attacker controlling a remotely hosted DNN with no such knowledge. Indeed, the only capability of our black-box adversary is to observe labels given by the DNN to chosen inputs. Our attack strategy consists in training a local model to substitute for the target DNN, using inputs synthetically generated by an adversary and labeled by the target DNN. We use the local substitute to craft adversarial examples, and find that they are misclassified by the targeted DNN. To perform a real-world and properly-blinded evaluation, we attack a DNN hosted by MetaMind, an online deep learning API. We find that their DNN misclassifies 84.24% of the adversarial examples crafted with our substitute. We demonstrate the general applicability of our strategy to many ML techniques by conducting the same attack against models hosted by Amazon and Google, using logistic regression substitutes. They yield adversarial examples misclassified by Amazon and Google at rates of 96.19% and 88.94%. We also find that this black-box attack strategy is capable of evading defense strategies previously found to make adversarial example crafting harder.",,,506,519,Adversarial system;Computer science;Adversary;Black box;Malware;Deep neural networks;Artificial intelligence;Deep learning;Machine learning;Artificial neural network;Adversarial machine learning;Threat model;Computer security,US,"Papernot N, 2016, ;Vitter J, 1985, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Biggio B, 2013, ;Sharif M, 2016, ;Stallkamp J, 2012, NEURAL NETWORKS;Huang L, 2011, ;Barreno M, 2006, ;Šrndić N, 2014, ;Papernot N, 2016, ;Xu W, 2016, ;, 2007, ;Rashidi Y, 2015, ;, 2014, THE MIT PRESS EBOOKS;Calders T, 2014, LECTURE NOTES IN COMPUTER SCIENCE;, 2021, WILEY SERIES IN PROBABILITY AND STATISTICS;Tramèr F, 2016, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Papernot N, 2017, ","Papernot N, 2017, " +https://openalex.org/W1563088657,10.1017/cbo9780511801389,An Introduction to Support Vector Machines and Other Kernel-based Learning Methods,2000,en,book,13880,CAMBRIDGE UNIVERSITY PRESS EBOOKS,Cambridge University Press eBooks,Nello Cristianini;John Shawe‐Taylor,Nello Cristianini;John Shawe-Taylor,"University of Bristol;Royal Holloway, University of London;†Royal Holloway, University of London",,"This is the first comprehensive introduction to Support Vector Machines (SVMs), a generation learning system based on recent advances in statistical learning theory. SVMs deliver state-of-the-art performance in real-world applications such as text categorisation, hand-written character recognition, image classification, biosequences analysis, etc., and are now established as one of the standard tools for machine learning and data mining. Students will find the book both stimulating and accessible, while practitioners will be guided smoothly through the material required for a good grasp of the theory and its applications. The concepts are introduced gradually in accessible and self-contained stages, while the presentation is rigorous and thorough. Pointers to relevant literature and web sites containing software ensure that it forms an ideal starting point for further study. Equally, the book and its associated web site will guide practitioners to updated literature, new applications, and on-line software.",,,,,Computer science;GRASP;Support vector machine;Point (geometry);Artificial intelligence;Statistical learning theory;Presentation (obstetrics);Machine learning;Software;Kernel (algebra);Data science;Software engineering;Programming language,GB,,,,OPENALEX,"Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS","Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS" +https://openalex.org/W2157331557,10.3115/v1/d14-1179,Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation,2014,en,preprint,24415,,,Kyunghyun Cho;Bart van Merriënboer;Çağlar Gülçehre;Dzmitry Bahdanau;Fethi Bougares;Holger Schwenk;Yoshua Bengio,Kyunghyun Cho;Bart van Merrienboer;Caglar Gulcehre;Dzmitry Bahdanau;Fethi Bougares;Holger Schwenk;Yoshua Bengio,Laboratoire d'Informatique de l'Université du Maine;Laboratoire d'Informatique de l'Université du Maine;Laboratoire d'Informatique de l'Université du Maine;AT&T Alcatel-Lucent Ecole Polytechnique de Montreal,,"Kyunghyun Cho, Bart van Merriënboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, Yoshua Bengio. Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP). 2014.",,,1724,1734,Machine translation;Computer science;Phrase;Natural language processing;Artificial intelligence;Encoder;Translation (biology);Statistical learning;Speech recognition,US;DE;CA,"Hochreiter S, 1997, NEURAL COMPUTATION;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Glorot X, 2012, ;Koehn P, 2003, ;Koehn P, 2005, ;Graves A, 2012, STUDIES IN COMPUTATIONAL INTELLIGENCE;Dahl G, 2011, IEEE TRANSACTIONS ON AUDIO SPEECH AND LANGUAGE PROCESSING;Kalchbrenner N, 2013, ;Socher R, 2011, ;Pascanu R, 2014, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Zou W, 2013, ;Moore R, 2010, ;Schwenk H, 2006, COMPUTER SPEECH & LANGUAGE;Axelrod A, 2011, ;Devlin J, 2014, ;Bengio Y, 2013, ;Marcu D, 2002, ;Saxe A, 2014, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Auli M, 2013, ;Vaswani A, 2013, ;Le H, 2012, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Schwenk H, 2014, ;Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Zeiler M, 2012, ARXIV (CORNELL UNIVERSITY);Bastien F, 2012, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2013, ;Schwenk H, 2025, ;P S, 2014, ARXIV (CORNELL UNIVERSITY);Maaten L, 2013, ARXIV (CORNELL UNIVERSITY);Gao J, 2013, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Cho K, 2014, ","Cho K, 2014, " +https://openalex.org/W3200707343,10.1038/s41580-021-00407-0,A guide to machine learning for biologists,2021,en,review,2060,NATURE REVIEWS MOLECULAR CELL BIOLOGY,Nature Reviews Molecular Cell Biology,Joe G. Greener;Shaun M. Kandathil;Lewis Moffat;David T. Jones,Joe G. Greener;Shaun M. Kandathil;Lewis Moffat;David T. Jones,"Department of Computer Science, University College London, London, UK;Department of Computer Science, University College London, London, UK;Department of Computer Science, University College London, London, UK;Department of Computer Science, University College London, London, UK. d.t.jones@ucl.ac.uk;Department of Computer Science, University College London, London, UK",,,23,1,40,55,Machine learning;Artificial intelligence;Computer science;Biological data;Deep learning;Artificial neural network;Bioinformatics;Biology,GB,"Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Maaten L, 2008, JOURNAL OF MACHINE LEARNING RESEARCH;Zou H, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hastie T, 2013, ;Ester M, 1996, ;Esteva A, 2017, NATURE;Kühn M, 2008, JOURNAL OF STATISTICAL SOFTWARE;Jain A, 2009, PATTERN RECOGNITION LETTERS;Kircher M, 2014, NATURE GENETICS;Abadi M, 2016, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Noble W, 2006, NATURE BIOTECHNOLOGY;Alipanahi B, 2015, NATURE BIOTECHNOLOGY;Chothia C, 1986, THE EMBO JOURNAL;Libbrecht M, 2015, NATURE REVIEWS GENETICS;Bengio Y, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Qian N, 1988, JOURNAL OF MOLECULAR BIOLOGY;Mayr A, 2016, FRONTIERS IN ENVIRONMENTAL SCIENCE;Pires D, 2014, NUCLEIC ACIDS RESEARCH;Quang D, 2016, NUCLEIC ACIDS RESEARCH;Hopf T, 2017, NATURE BIOTECHNOLOGY;Ben‐Hur A, 2009, METHODS IN MOLECULAR BIOLOGY;Crick F, 1989, NATURE;Ben‐Hur A, 2008, PLOS COMPUTATIONAL BIOLOGY;Moult J, 2013, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Tarca A, 2007, PLOS COMPUTATIONAL BIOLOGY;Zeng H, 2016, BIOINFORMATICS;Cheng H, 2014, PLOS COMPUTATIONAL BIOLOGY;Nugent T, 2009, BMC BIOINFORMATICS;Zhang Y, 2004, JOURNAL OF COMPUTATIONAL CHEMISTRY;Wang C, 2016, JOURNAL OF COMPUTATIONAL CHEMISTRY;Wei Q, 2013, PLOS ONE;Kircher M, 2014, ;Bao L, 2005, NUCLEIC ACIDS RESEARCH;S. S, 2007, ANU OPEN RESEARCH (AUSTRALIAN NATIONAL UNIVERSITY);Cozzetto D, 2016, SCIENTIFIC REPORTS;Chen L, 2016, BMC BIOINFORMATICS;Wang Y, 2016, SCIENTIFIC REPORTS;Li Y, 2017, JOURNAL OF CHEMICAL INFORMATION AND MODELING;List M, 2017, PLOS COMPUTATIONAL BIOLOGY;Walsh I, 2015, BRIEFINGS IN BIOINFORMATICS;Teodoro M, 2003, JOURNAL OF COMPUTATIONAL BIOLOGY;Zhang Z, 2012, BIOINFORMATICS;Söding J, 2011, CURRENT OPINION IN STRUCTURAL BIOLOGY;Kandoi G, 2015, FRONTIERS IN PHYSIOLOGY;deFigueiredo R, 1995, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Haario H, 1998, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);Konečný J, 2016, ARXIV (CORNELL UNIVERSITY);Kelley D, 2016, GENOME RESEARCH;Shrikumar A, 2017, BIORXIV (COLD SPRING HARBOR LABORATORY);LeCun Y, 2015, NATURE;Steinegger M, 2017, NATURE BIOTECHNOLOGY;Ching T, 2018, JOURNAL OF THE ROYAL SOCIETY INTERFACE;Dührkop K, 2019, NATURE METHODS;Poplin R, 2018, NATURE BIOTECHNOLOGY;Poplin R, 2018, NATURE BIOMEDICAL ENGINEERING;Zech J, 2018, PLOS MEDICINE;Buchan D, 2019, NUCLEIC ACIDS RESEARCH;Žitnik M, 2018, BIOINFORMATICS;Zou J, 2018, NATURE GENETICS;Pandarinath C, 2018, NATURE METHODS;Marblestone A, 2016, ARXIV (CORNELL UNIVERSITY);Rappoport N, 2018, NUCLEIC ACIDS RESEARCH;Heffernan R, 2017, BIOINFORMATICS;Sun T, 2017, BMC BIOINFORMATICS;AlQuraishi M, 2019, CELL SYSTEMS;Beaulieu‐Jones B, 2019, CIRCULATION CARDIOVASCULAR QUALITY AND OUTCOMES;Bzdok D, 2018, NATURE METHODS;Nguyen L, 2019, PLOS COMPUTATIONAL BIOLOGY;Müller A, 2018, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Choi E, 2016, PUBMED;Gligorijević V, 2018, BIOINFORMATICS;Altman N, 2017, NATURE METHODS;Avsec Ž, 2019, NATURE BIOTECHNOLOGY;Sillitoe I, 2018, NUCLEIC ACIDS RESEARCH;AlQuraishi M, 2019, BMC BIOINFORMATICS;Chen K, 2019, NATURE METHODS;Greener J, 2018, SCIENTIFIC REPORTS;Silva J, 2019, PLANT SCIENCE;Pagès G, 2019, BIOINFORMATICS;Zeng W, 2018, BMC GENOMICS;Ingraham J, 2018, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Hie B, 2018, SCIENCE;Veselkov K, 2019, SCIENTIFIC REPORTS;Antczak M, 2019, NATURE COMMUNICATIONS;Pérez A, 2018, CURRENT OPINION IN STRUCTURAL BIOLOGY;Jumper J, 2018, PLOS COMPUTATIONAL BIOLOGY;Paszke A, 2019, ARXIV (CORNELL UNIVERSITY);Abadi M, 2016, ARXIV (CORNELL UNIVERSITY);Gal Y, 2015, ARXIV (CORNELL UNIVERSITY);Fey M, 2019, ARXIV (CORNELL UNIVERSITY);Adebayo J, 2018, ARXIV (CORNELL UNIVERSITY);Pandarinath C, 2017, BIORXIV (COLD SPRING HARBOR LABORATORY);Beaulieu‐Jones B, 2017, BIORXIV (COLD SPRING HARBOR LABORATORY);Steinegger M, 2019, BIORXIV (COLD SPRING HARBOR LABORATORY);Rao R, 2019, PUBMED;Zhou N, 2019, BIORXIV (COLD SPRING HARBOR LABORATORY);Innes M, 2019, ARXIV (CORNELL UNIVERSITY);Isensee F, 2020, NATURE METHODS;Senior A, 2020, NATURE;Stokes J, 2020, CELL;Geirhos R, 2020, NATURE MACHINE INTELLIGENCE;Yang J, 2020, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Zhavoronkov A, 2019, NATURE BIOTECHNOLOGY;Tegunov D, 2019, NATURE METHODS;Steinegger M, 2019, BMC BIOINFORMATICS;Alley E, 2019, NATURE METHODS;Moon K, 2019, NATURE BIOTECHNOLOGY;Kobak D, 2019, NATURE COMMUNICATIONS;Gaínza P, 2019, NATURE METHODS;Noé F, 2019, SCIENCE;Myszczynska M, 2020, NATURE REVIEWS NEUROLOGY;Schmauch B, 2020, NATURE COMMUNICATIONS;Zhou N, 2019, GENOME BIOLOGY;Das P, 2021, NATURE BIOMEDICAL ENGINEERING;Liebal U, 2020, METABOLITES;Li W, 2019, FRONTIERS IN GENETICS;Fudenberg G, 2020, NATURE METHODS;Yuan Y, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Strokach A, 2020, CELL SYSTEMS;Livesey B, 2020, MOLECULAR SYSTEMS BIOLOGY;Jones D, 2019, NATURE REVIEWS MOLECULAR CELL BIOLOGY;Linder J, 2020, CELL SYSTEMS;Kantz E, 2019, ANALYTICAL CHEMISTRY;Si D, 2020, SCIENTIFIC REPORTS;Driscoll M, 2019, NATURE METHODS;Lopez R, 2020, MOLECULAR SYSTEMS BIOLOGY;Smith A, 2020, BMC BIOINFORMATICS;Gligorijević V, 2021, ;Kopp W, 2020, NATURE COMMUNICATIONS;Blaom A, 2020, THE JOURNAL OF OPEN SOURCE SOFTWARE;Schreiber J, 2020, GENOME BIOLOGY;Yao R, 2019, BIOINFORMATICS;Schoenholz S, 2019, ARXIV (CORNELL UNIVERSITY);Elnaggar A, 2020, BIORXIV (COLD SPRING HARBOR LABORATORY);Hopf T, 2015, ARXIV (CORNELL UNIVERSITY);Yang J, 2019, BIORXIV (COLD SPRING HARBOR LABORATORY);Anishchenko I, 2020, BIORXIV (COLD SPRING HARBOR LABORATORY);Hiranuma N, 2020, BIORXIV (COLD SPRING HARBOR LABORATORY);Gligorijevic V, 2019, BIORXIV (COLD SPRING HARBOR LABORATORY);Wang Y, 2020, ARXIV (CORNELL UNIVERSITY);Jumper J, 2021, NATURE;Roberts M, 2020, RESEARCH EXPLORER (THE UNIVERSITY OF MANCHESTER);Roberts M, 2021, NATURE MACHINE INTELLIGENCE;Gligorijević V, 2021, NATURE COMMUNICATIONS;Zhong E, 2021, NATURE METHODS;Kryshtafovych A, 2019, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Wang J, 2021, NATURE COMMUNICATIONS;Stokes J, 2020, CELL;Hiranuma N, 2021, NATURE COMMUNICATIONS;Xu J, 2021, NATURE MACHINE INTELLIGENCE;Munro D, 2020, BIOINFORMATICS;Rappoport N, 2018, NUCLEIC ACIDS RESEARCH",,,OPENALEX,"Greener J, 2021, NATURE REVIEWS MOLECULAR CELL BIOLOGY","Greener J, 2021, NATURE REVIEWS MOLECULAR CELL BIOLOGY" +https://openalex.org/W2914584698,,Proceedings of the 23rd international conference on Machine learning,2006,en,article,2592,,,William W. Cohen;Andrew Moore,William W. Cohen;Andrew Moore,,,"This volume, which is also available from http://www.machinelearning.org, the home page of the International Machine Learning Society, contains the technical papers accepted for presentation at ICML-2006, the 23rd International Conference on Machine Learning. ICML is an international forum for presentation and discussion of the latest results in the field of machine learning. This year, ICML was held at Carnegie Mellon University, in Pittsburgh, Pennsylvania, and was co-located with COLT-2006, the 19th Annual Conference on Computational Learning Theory.Coincidentally, Carnegie Mellon University was also the venue for the first ICML---the First Machine Learning Workshop, which was held in 1980. Instead of proceedings, a book was published (Machine Learning: an Artificial Intelligence Approach, ed. Michalski, Carbonell, and Mitchell, Morgan Kaufman, 1983) containing sixteen research papers, and also a comprehensive of the field of machine learning, as it stood in 1983. This bibliography contained 572 entries.In 2006, no less than 548 papers were submitted to ICML---nearly as many as were in the comprehensive published with the papers from the first ICML. These papers were subjected to a thorough review process. In the first round of reviewing, every paper received three reviews by program committee members. Authors were then given an opportunity to view the first-round reviews and respond to them. Led by a Senior Program Committee member, the reviewers then engaged in a discussion of the paper, leading finally to a decision by the Senior Program Committee member in charge of the paper. Papers could be accepted, rejected, or conditionally accepted; the 36 conditionally accepted papers were subject to an additional final round of review by the Senior Program Committee. Of the 548 submissions, 140 were accepted for publication, an acceptance rate of 25.5%.In addition to the technical talks, ICML-2006 also included seven tutorials and eleven workshops, which were held before and after the conference, respectively. Authors presented their papers both orally and in a poster session, allowing time for detailed discussions with any interested attendees of the conference. Each day of the main conference included an invited talk by a prominent researcher. We were very fortunate to be able to host David Haussler, of the University of California at Santa Cruz; Robert Schapire, of Princeton University; and Mandyam V. Srinivasan, of the Australian National University.",,,,,Presentation (obstetrics);Artificial intelligence;Library science;Computer science;Medical education;Medicine,,,,,OPENALEX,"Cohen W, 2006, ","Cohen W, 2006, " +https://openalex.org/W2594639291,10.5860/choice.44-5091,Pattern recognition and machine learning,2007,en,article,2688,CHOICE REVIEWS ONLINE,Choice Reviews Online,,,,,,44,09,44,5091,Artificial intelligence;Computer science;Pattern recognition (psychology);Psychology,,,,,OPENALEX,"NA, 2007, CHOICE REVIEWS ONLINE","NA, 2007, CHOICE REVIEWS ONLINE" +https://openalex.org/W3116286104,10.3390/e23010018,Explainable AI: A Review of Machine Learning Interpretability Methods,2020,en,review,2740,ENTROPY,Entropy,Pantelis Linardatos;Vasilis Papastefanopoulos;Sotiris Kotsiantis,Pantelis Linardatos;Vasilis Papastefanopoulos;Sotiris Kotsiantis,"Department of Mathematics, University of Patras, 26504 Patras, Greece;Department of Mathematics, University of Patras, 26504 Patras, Greece;Department of Mathematics, University of Patras, 26504 Patras, Greece","Pantelis Linardatos (corresponding author), Department of Mathematics, University of Patras, 26504 Patras, Greece","Recent advances in artificial intelligence (AI) have led to its widespread industrial adoption, with machine learning systems demonstrating superhuman performance in a significant number of tasks. However, this surge in performance, has often been achieved through increased model complexity, turning such systems into ""black box"" approaches and causing uncertainty regarding the way they operate and, ultimately, the way that they come to decisions. This ambiguity has made it problematic for machine learning systems to be adopted in sensitive yet critical domains, where their value could be immense, such as healthcare. As a result, scientific interest in the field of Explainable Artificial Intelligence (XAI), a field that is concerned with the development of new methods that explain and interpret machine learning models, has been tremendously reignited over recent years. This study focuses on machine learning interpretability methods; more specifically, a literature review and taxonomy of these methods are presented, as well as links to their programming implementations, in the hope that this survey would serve as a reference point for both theorists and practitioners.",23,1,18,18,Interpretability;Artificial intelligence;Computer science;Machine learning;Ambiguity;Field (mathematics);Implementation;Black box;Management science;Data science;Software engineering;Engineering,GR,"Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Chen T, 2016, ;Friedman J, 2001, THE ANNALS OF STATISTICS;Liaw A, 2007, ;Zeiler M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Ribeiro M, 2016, ;Itti L, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Zhou B, 2016, ;Jordan M, 2015, SCIENCE;Saltelli A, 2007, ;Sobol I, 2001, MATHEMATICS AND COMPUTERS IN SIMULATION;Moosavi-Dezfooli S, 2016, ;Bach S, 2015, PLOS ONE;Papernot N, 2016, ;Morris M, 1991, TECHNOMETRICS;Safavian S, 1991, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Dwork C, 2012, ;Saltelli A, 2009, COMPUTER PHYSICS COMMUNICATIONS;Altmann A, 2010, BIOINFORMATICS;Saltelli A, 2002, COMPUTER PHYSICS COMMUNICATIONS;Campolongo F, 2007, ENVIRONMENTAL MODELLING & SOFTWARE;Feldman M, 2015, ;Goldstein A, 2014, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Caruana R, 2015, ;Montavon G, 2016, PATTERN RECOGNITION;Zeiler M, 2011, ;Kamiran F, 2011, KNOWLEDGE AND INFORMATION SYSTEMS;Zhao R, 2015, ;Cukier R, 1973, THE JOURNAL OF CHEMICAL PHYSICS;Borgonovo E, 2006, RELIABILITY ENGINEERING & SYSTEM SAFETY;Calders T, 2010, DATA MINING AND KNOWLEDGE DISCOVERY;Kamishima T, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Lou Y, 2013, ;Calders T, 2009, ;Sobol I, 2009, MATHEMATICS AND COMPUTERS IN SIMULATION;Plischke E, 2012, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Binder A, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Kamiran F, 2009, ;Ustun B, 2015, MACHINE LEARNING;Tarantola S, 2005, RELIABILITY ENGINEERING & SYSTEM SAFETY;Kamiran F, 2012, ;Leí T, 2016, ;Plischke E, 2009, RELIABILITY ENGINEERING & SYSTEM SAFETY;Dua S, 2013, INTELLIGENT SYSTEMS REFERENCE LIBRARY;Tissot J, 2012, RELIABILITY ENGINEERING & SYSTEM SAFETY;Goodfellow I, 2014, ARXIV (CORNELL UNIVERSITY);Springenberg J, 2014, ARXIV (CORNELL UNIVERSITY);Yosinski J, 2015, ARXIV (CORNELL UNIVERSITY);Bolukbasi T, 2016, ARXIV (CORNELL UNIVERSITY);Joseph M, 2016, ARXIV (CORNELL UNIVERSITY);LeCun Y, 2015, NATURE;Adadi A, 2018, IEEE ACCESS;Esteva A, 2018, NATURE MEDICINE;Papernot N, 2017, ;Chattopadhay A, 2018, ;Dong Y, 2018, ;Moosavi-Dezfooli S, 2017, ;Ribeiro M, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Murdoch W, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Chen P, 2017, ;Su J, 2019, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Sharif M, 2016, ;Weisberg S, 2005, WILEY SERIES IN PROBABILITY AND STATISTICS;Zügner D, 2018, ;Ebrahimi J, 2018, ;Kim B, 2016, NEURAL INFORMATION PROCESSING SYSTEMS;Liang B, 2018, ;Narodytska N, 2017, ;Li J, 2019, ;Gilpin L, 2018, ;Grgić-Hlača N, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Callahan A, 2017, ELSEVIER EBOOKS;Le H, 2017, RESEARCH IN INTERNATIONAL BUSINESS AND FINANCE;Milli S, 2019, ;Hu L, 2019, ;Cissé M, 2017, NEURAL INFORMATION PROCESSING SYSTEMS;Staniak M, 2019, THE R JOURNAL;Kuleshov V, 2018, ;Elzayn H, 2019, ;Samanta S, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Mudrakarta P, 2018, ;Joseph M, 2018, ;Lundberg S, 2017, ARXIV (CORNELL UNIVERSITY);Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY);Shrikumar A, 2017, ARXIV (CORNELL UNIVERSITY);Koh P, 2017, ARXIV (CORNELL UNIVERSITY);Liu Y, 2016, ARXIV (CORNELL UNIVERSITY);Smilkov D, 2017, ARXIV (CORNELL UNIVERSITY);Kim B, 2017, ARXIV (CORNELL UNIVERSITY);Kearns M, 2017, ARXIV (CORNELL UNIVERSITY);Kindermans P, 2017, ARXIV (CORNELL UNIVERSITY);Petsiuk V, 2018, ARXIV (CORNELL UNIVERSITY);Miyato T, 2016, ARXIV (CORNELL UNIVERSITY);Dhurandhar A, 2018, ARXIV (CORNELL UNIVERSITY);Brendel W, 2017, ARXIV (CORNELL UNIVERSITY);Zafar M, 2017, ARXIV (CORNELL UNIVERSITY);Chen J, 2018, ARXIV (CORNELL UNIVERSITY);Schott L, 2018, ARXIV (CORNELL UNIVERSITY);Chen P, 2017, ARXIV (CORNELL UNIVERSITY);Dhurandhar A, 2018, ARXIV (CORNELL UNIVERSITY);Selvaraju R, 2017, ;Arrieta A, 2019, INFORMATION FUSION;Guidotti R, 2019, ISTI OPEN PORTAL;Apley D, 2020, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Jia R, 2017, ;Gunning D, 2019, AI MAGAZINE;Alzantot M, 2018, ;Gao J, 2018, ;Ren S, 2019, ;Chen J, 2020, ;Li L, 2020, ;Zügner D, 2019, ;Feng S, 2018, ;Celis L, 2019, ;Zang Y, 2020, ;Cheng M, 2020, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Staniak M, 2018, LINCOLN (UNIVERSITY OF NEBRASKA);Tan S, 2020, ;Zafar M, 2023, ;Wang Y, 2020, JOURNAL OF MACHINE LEARNING RESEARCH;, , VIEW;Kim B, 2017, ARXIV (CORNELL UNIVERSITY);Sundararajan M, 2017, ARXIV (CORNELL UNIVERSITY);Zafar M, 2015, ARXIV (CORNELL UNIVERSITY);Jin D, 2019, ARXIV (CORNELL UNIVERSITY);Yang P, 2018, ARXIV (CORNELL UNIVERSITY);Garreau D, 2020, ARXIV (CORNELL UNIVERSITY);Li Y, 2019, ARXIV (CORNELL UNIVERSITY);Brendel W, 2019, ARXIV (CORNELL UNIVERSITY);Saltelli A, 1999, TECHNOMETRICS;McCullagh P, 2019, ;Hastie T, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Khandani A, 2010, JOURNAL OF BANKING & FINANCE;Zafar M, 2017, ;Polikar R, 2012, ;1923- S, 1988, CAMBRIDGE UNIVERSITY PRESS EBOOKS",,,OPENALEX,"Linardatos P, 2020, ENTROPY","Linardatos P, 2020, ENTROPY" +https://openalex.org/W2142334564,10.2307/1269742,"Machine Learning, Neural and Statistical Classification",1995,en,article,2192,TECHNOMETRICS,Technometrics,Bill Fulkerson;D. Michie;D. J. Spiegelhalter;C. C. W. Taylor,Bill Fulkerson;D. Michie;D. J. Spiegelhalter;C. C. Taylor,"Deere & Company;University of Edinburgh U.K;University of Edinburgh , U.K;Univ. Forvie Site, Cambridge, U.K.#TAB#;University of Leeds U.K",,Survey of previous comparisons and theoretical work descriptions of methods dataset descriptions criteria for comparison and methodology (including validation) empirical results machine learning on machine learning.,37,4,459,459,Machine learning;Artificial intelligence;Computer science;Artificial neural network;Statistical learning,DE;GB,"Weiss S, 1991, ;Murthy S, 1994, ;Farlow S, 1984, MEDICAL ENTOMOLOGY AND ZOOLOGY;Elder J, 1996, ;Mulier F, 1995, NEURAL NETWORKS;Elder J, 2000, INTELLECT BOOKS;Hastie T, 2014, WILEY STATSREF: STATISTICS REFERENCE ONLINE",,,OPENALEX,"Fulkerson B, 1995, TECHNOMETRICS","Fulkerson B, 1995, TECHNOMETRICS" +https://openalex.org/W2962727772,,Automatic differentiation in machine learning: a survey,2015,en,article,2097,MAYNOOTH UNIVERSITY EPRINTS AND ETHESES ARCHIVE (MAYNOOTH UNIVERSITY),Maynooth University ePrints and eTheses Archive (Maynooth University),Atılım Güneş Baydin;Barak A. Pearlmutter;Alexey Radul;Jeffrey Mark Siskind,Atılım Güneş Baydin;Barak A. Pearlmutter;Alexey Radul;Jeffrey Mark Siskind,"Department of Engineering Science University of Oxford Oxford United Kingdom;Department of Computer Science , National University of Ireland Maynooth , Maynooth, Co. Kildare, Ireland;Department of Brain and Cognitive Sciences, Massachusetts Institute of Technology, Cambridge, MA;School of Electrical & Computer Engineering, Purdue University, West Lafayette, IN",,"Derivatives, mostly in the form of gradients and Hessians, are ubiquitous in machine learning. Automatic differentiation (AD) is a technique for calculating derivatives of numeric functions expressed as computer programs efficiently and accurately, used in fields such as computational fluid dynamics, nuclear engineering, and atmospheric sciences. Despite its advantages and use in other fields, machine learning practitioners have been little influenced by AD and make scant use of available tools. We survey the intersection of AD and machine learning, cover applications where AD has the potential to make a big impact, and report on some recent developments in the adoption of this technique. We aim to dispel some misconceptions that we contend have impeded the use of AD within the machine learning community.",,,,,Computer science;Artificial intelligence;Relevance (law);Machine learning;Automatic differentiation;Differentiable function;CLARITY;Toolbox;Field (mathematics);Algorithmic learning theory;Active learning (machine learning);Theoretical computer science;Algorithm;Programming language;Mathematics,GB;IE;US,"Rumelhart D, 1986, NATURE;Rasmussen C, 2005, THE MIT PRESS EBOOKS;Bottou L, 2010, ;Griewank A, 1987, ;Girolami M, 2011, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Jones N, 1993, MEDICAL ENTOMOLOGY AND ZOOLOGY;Rump S, 1999, ;Neal R, 2011, ;Dennis J, 1996, ;Jacobson D, 1970, ;Loper M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Shivers O, 1991, MODERN HEALTHCARE;Kulkarni T, 2015, ;Walther A, 2012, CHAPMAN & HALL/CRC COMPUTATIONAL SCIENCE SERIES/CHAPMAN & HALL/CRC COMPUTATIONAL SCIENCE;Grabmeier J, 2003, ;Jones S, 1993, ;Jones S, 1991, LECTURE NOTES IN COMPUTER SCIENCE;Sussman G, 2001, OAPEN (OAPEN);Bendtsen C, 1996, ;Bell B, 2008, LECTURE NOTES IN COMPUTATIONAL SCIENCE AND ENGINEERING;Jerrell M, 1997, COMPUTATIONAL ECONOMICS;Gimpel K, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Grabner M, 2008, LECTURE NOTES IN COMPUTATIONAL SCIENCE AND ENGINEERING;Dauvergne B, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Manzyuk O, 2019, JOURNAL OF FUNCTIONAL PROGRAMMING;Naumann U, 2006, LECTURE NOTES IN COMPUTATIONAL SCIENCE AND ENGINEERING;Bischof C, 2002, APPLIED OPTIMIZATION;Pascual V, 2008, LECTURE NOTES IN COMPUTATIONAL SCIENCE AND ENGINEERING;Siskind J, 2005, INTERNATIONAL JOURNAL OF HEMATOLOGY;Rall L, 2006, LECTURE NOTES IN COMPUTATIONAL SCIENCE AND ENGINEERING;Forth S, 2002, ;Siskind J, 2008, ;Yang W, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Haase G, 2002, ;Casanova D, 2002, ;Christianson B, 2012, LECTURE NOTES IN COMPUTATIONAL SCIENCE AND ENGINEERING;Kingma D, 2013, UVA-DARE (UNIVERSITY OF AMSTERDAM);Rezende D, 2014, ARXIV (CORNELL UNIVERSITY);Courbariaux M, 2015, POLYPUBLIE (ÉCOLE POLYTECHNIQUE DE MONTRÉAL);Gupta S, 2015, ARXIV (CORNELL UNIVERSITY);Chetlur S, 2014, ARXIV (CORNELL UNIVERSITY);Maclaurin D, 2015, ARXIV (CORNELL UNIVERSITY);Salimans T, 2014, ARXIV (CORNELL UNIVERSITY);Joulin A, 2015, ARXIV (CORNELL UNIVERSITY);Speelpenning B, 1980, ;Grefenstette E, 2015, ARXIV (CORNELL UNIVERSITY);Nolan J, 1953, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Hinkins R, 1994, ;Barrett D, 2013, ARXIV (CORNELL UNIVERSITY);LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Schmidhuber J, 2014, NEURAL NETWORKS;Peng D, 1976, INDUSTRIAL & ENGINEERING CHEMISTRY FUNDAMENTALS;Williams R, 1992, MACHINE LEARNING;Triggs B, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Chib S, 1995, THE AMERICAN STATISTICIAN;Zhu C, 1997, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Hecht-Nielsen, 1989, ;Bert C, 1996, APPLIED MECHANICS REVIEWS;Giering R, 1998, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Horn B, 1977, ARTIFICIAL INTELLIGENCE;Pearlmutter B, 1994, NEURAL COMPUTATION;Bryson A, 1962, JOURNAL OF APPLIED MECHANICS;Hascoët L, 2013, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Wimp J, 1993, MATHEMATICS OF COMPUTATION;Wengert R, 1964, COMMUNICATIONS OF THE ACM;Linnainmaa S, 1976, BIT NUMERICAL MATHEMATICS;Vishwanathan S, 2006, ;Griewank A, 2003, PAMM;Gebremedhin A, 2013, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Forth S, 2006, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Stuhlmüller A, 2013, ;Hinton G, 1997, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY B BIOLOGICAL SCIENCES;Bischof C, 1997, SOFTWARE PRACTICE AND EXPERIENCE;Schraudolph N, 1999, ;Christianson B, 1994, OPTIMIZATION METHODS & SOFTWARE;Seyab R, 2008, JOURNAL OF PROCESS CONTROL;Sirkes Z, 1997, MONTHLY WEATHER REVIEW;Pearlmutter B, 2008, ACM TRANSACTIONS ON PROGRAMMING LANGUAGES AND SYSTEMS;Müller J, 2005, INTERNATIONAL JOURNAL FOR NUMERICAL METHODS IN FLUIDS;Fornberg B, 1981, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Walther A, 2006, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Pfeiffer F, 1987, ACM SIGNUM NEWSLETTER;Griewank A, 2011, COMPUTING;Bauer F, 1974, SIAM JOURNAL ON NUMERICAL ANALYSIS;Carmichael G, 1997, ATMOSPHERIC ENVIRONMENT;Naumann U, 2004, MATHEMATICAL PROGRAMMING;Siskind J, 2008, LISP AND SYMBOLIC COMPUTATION;Bischof C, 2008, LISP AND SYMBOLIC COMPUTATION;Schraudolph N, 2003, ;Apostolopoulou M, 2009, ;Rich L, 1992, APPLIED NUMERICAL MATHEMATICS;Charpentier I, 2000, OPTIMIZATION METHODS & SOFTWARE;Bischof C, 2006, JOURNAL OF FLUIDS ENGINEERING;Meyer R, 2003, ECONOMETRICS JOURNAL;Appel A, 1989, LISP AND SYMBOLIC COMPUTATION;Juedes D, 1991, UNIVERSITY OF NORTH TEXAS DIGITAL LIBRARY (UNIVERSITY OF NORTH TEXAS);Neidinger R, 1989, COLLEGE MATHEMATICS JOURNAL;Erikssont J, 1998, OPTIMIZATION METHODS & SOFTWARE;Schaul T, 2012, ARXIV (CORNELL UNIVERSITY);LeCun Y, 2015, NATURE;Dalal N, 2005, ;Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Paszke A, 2017, ;Duchi J, 2010, ;Carpenter B, 2017, JOURNAL OF STATISTICAL SOFTWARE;Chen D, 2014, ;Mikolov T, 2011, ;Griewank A, 2008, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Seide F, 2016, ;Ekström U, 2010, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Gershman S, 2014, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Xue Y, 2005, BEIJING GONGYE DAXUE XUEBAO;Finkel J, 2008, ;Yu H, 2013, ;Griewank A, 2003, ACTA NUMERICA;Bischof C, 1996, IEEE COMPUTATIONAL SCIENCE AND ENGINEERING;Shtof A, 2013, COMPUTER GRAPHICS FORUM;Gebremedhin A, 2008, INFORMS JOURNAL ON COMPUTING;Wingate D, 2011, ;Yildirim I, 2015, COGNITIVE SCIENCE;Pock T, 2007, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Güneş B, 2016, ARXIV (CORNELL UNIVERSITY);Bottou L, 1988, ;Sluşanschi E, 2016, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Thomas J, 2006, 11TH AIAA/ISSMO MULTIDISCIPLINARY ANALYSIS AND OPTIMIZATION CONFERENCE;Hill M, 1996, ;Ostiguy J, 2007, ;Šrajer F, 2016, ;Bahdanau D, 2014, ARXIV (CORNELL UNIVERSITY);Abadi M, 2016, ARXIV (CORNELL UNIVERSITY);Such F, 2017, ARXIV (CORNELL UNIVERSITY);Eslami S, 2016, ARXIV (CORNELL UNIVERSITY);Johnson M, 2016, ARXIV (CORNELL UNIVERSITY);Tran D, 2016, ARXIV (CORNELL UNIVERSITY);Graves A, 2014, ARXIV (CORNELL UNIVERSITY);Revels J, 2016, ENLIGHTEN (JURNAL BIMBINGAN DAN KONSELING ISLAM);Tran D, 2017, ARXIV (CORNELL UNIVERSITY);Baydin A, 2017, ARXIV (CORNELL UNIVERSITY);Boltyanskiĭ V, 1961, TRANSLATIONS - AMERICAN MATHEMATICAL SOCIETY/TRANSLATIONS ;Gruslys A, 2016, ARXIV (CORNELL UNIVERSITY);Agarwal N, 2016, ARXIV (CORNELL UNIVERSITY);Maclaurin D, 2016, DIGITAL ACCESS TO SCHOLARSHIP AT HARVARD (DASH) (HARVARD UNIVERSITY);Merriënboer B, 2017, ARXIV (CORNELL UNIVERSITY);Siskind J, 2016, ARXIV (CORNELL UNIVERSITY);Baydin A, 2016, ARXIV (CORNELL UNIVERSITY);Jayanta M, 2021, DROPS (SCHLOSS DAGSTUHL – LEIBNIZ CENTER FOR INFORMATICS);Press W, 1987, AMERICAN JOURNAL OF PHYSICS;Homan M, 2014, ARXIV (CORNELL UNIVERSITY);Werbos P, 1974, MEDICAL ENTOMOLOGY AND ZOOLOGY;Cohen J, 2010, ;Capriotti L, 2011, THE JOURNAL OF COMPUTATIONAL FINANCE;Siskind J, 2018, MURAL - MAYNOOTH UNIVERSITY RESEARCH ARCHIVE LIBRARY (NATIONAL UNIVERSITY OF IRELAND, MAYNOOTH);Tremaine S, 2002, PHYSICS TODAY;Siddharth N, 2018, APOLLO (UNIVERSITY OF CAMBRIDGE)",,,OPENALEX,"Baydin A, 2015, MAYNOOTH UNIVERSITY EPRINTS AND ETHESES ARCHIVE (MAYNOOTH UNIVERSITY)","Baydin A, 2015, MAYNOOTH UNIVERSITY EPRINTS AND ETHESES ARCHIVE (MAYNOOTH UNIVERSITY)" +https://openalex.org/W2767079719,10.1145/3133956.3133982,Practical Secure Aggregation for Privacy-Preserving Machine Learning,2017,en,article,3441,,,Keith Bonawitz;Vladimir Ivanov;Ben Kreuter;Antonio Marcedone;H. Brendan McMahan;Sarvar Patel;Daniel Ramage;Aaron Segal;Karn Seth,Keith Bonawitz;Vladimir Ivanov;Ben Kreuter;Antonio Marcedone;H. Brendan McMahan;Sarvar Patel;Daniel Ramage;Aaron Segal;Karn Seth,"Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA, USA;Cornell Tech & Google Inc., New York, NY, USA;Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA, USA",,"We design a novel, communication-efficient, failure-robust protocol for secure aggregation of high-dimensional data. Our protocol allows a server to compute the sum of large, user-held data vectors from mobile devices in a secure manner (i.e. without learning each user's individual contribution), and can be used, for example, in a federated learning setting, to aggregate user-provided model updates for a deep neural network. We prove the security of our protocol in the honest-but-curious and active adversary settings, and show that security is maintained even if an arbitrarily chosen subset of users drop out at any time. We evaluate the efficiency of our protocol and show, by complexity analysis and a concrete implementation, that its runtime and communication overhead remain low even on large data sets and client pools. For 16-bit input values, our protocol offers $1.73 x communication expansion for 210 users and 220-dimensional vectors, and 1.98 x expansion for 214 users and 224-dimensional vectors over sending data in the clear.",,,1175,1191,Computer science;Overhead (engineering);Protocol (science);Universal composability;Adversary;Computer network;Aggregate (composite);Cryptographic protocol;Distributed computing;Theoretical computer science;Cryptography;Computer security;Operating system,US,"Diffie W, 1976, IEEE TRANSACTIONS ON INFORMATION THEORY;Shamir A, 1979, COMMUNICATIONS OF THE ACM;Goodfellow I, 2016, MIT PRESS EBOOKS;Abadi M, 2016, ;Shokri R, 2017, ;Dwork C, 2013, NOW PUBLISHERS, INC. EBOOKS;Goldreich O, 1987, ;Fredrikson M, 2015, ;Ben-Or M, 1988, ;Narayanan A, 2008, PROCEEDINGS - IEEE SYMPOSIUM ON SECURITY AND PRIVACY/PROCEEDINGS OF THE ... IEEE SYMPOSIUM ON SECURITY AND PRIVACY;Shokri R, 2015, ;Dwork C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Erlingsson Ú, 2014, ;Chaum D, 1988, JOURNAL OF CRYPTOLOGY;Blum M, 1984, SIAM JOURNAL ON COMPUTING;Damgård I, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Yao A, 1982, ;Duchi J, 2013, ;Bellare M, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Bogetoft P, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Rastogi V, 2010, ;Abdalla⋆ M, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Ács G, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Araki T, 2016, ;Burkhart M, 2010, CANCER CYTOPATHOLOGY;Mironov I, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Chan T, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Golle P, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Goodman J, 2002, ;Lampos V, 2015, SCIENTIFIC REPORTS;Halevi S, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Jawurek M, 2013, ;Paparrizos J, 2016, JOURNAL OF ONCOLOGY PRACTICE;Goryczka S, 2015, IEEE TRANSACTIONS ON DEPENDABLE AND SECURE COMPUTING;Lindell Y, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Jansen R, 2016, ;Elahi T, 2014, ;Boyle E, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Leontiadis I, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Wainwright M, 2012, ;Leontiadis I, 2015, LECTURE NOTES IN COMPUTER SCIENCE;연구 처, 2015, ;McCabe K, 2013, JOURNAL OF INTELLECTUAL PROPERTY LAW;McMahan H, 2016, ARXIV (CORNELL UNIVERSITY);Xinghao P, 2017, ARXIV (CORNELL UNIVERSITY);Corrigan-Gibbs H, 2017, ARXIV (CORNELL UNIVERSITY);Corrigan-Gibbs H, 2012, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2012, ARXIV (CORNELL UNIVERSITY);Dwork C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Yao A, 1982, ;Lindell Y, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Duchi J, 2012, ARXIV (CORNELL UNIVERSITY);Kwon Y, 2015, ",,,OPENALEX,"Bonawitz K, 2017, ","Bonawitz K, 2017, " +https://openalex.org/W2751318774,10.1142/9789811201967_0001,Introduction to Machine Learning,2019,en,book-chapter,1659,SERIES IN MACHINE PERCEPTION AND ARTIFICIAL INTELLIGENCE,Series in machine perception and artificial intelligence,,,,,"The goal of machine learning is to program computers to use example data or past experience to solve a given problem. Many successful applications of machine learning exist already, including systems that analyze past sales data to predict customer behavior, optimize robot behavior so that a task can be completed using minimum resources, and extract knowledge from bioinformatics data. Introduction to Machine Learning is a comprehensive textbook on the subject, covering a broad array of topics not usually included in introductory machine learning texts. In order to present a unified treatment of machine learning problems and solutions, it discusses many methods from different fields, including statistics, pattern recognition, neural networks, artificial intelligence, signal processing, control, and data mining. All learning algorithms are explained so that the student can easily move from the equations in the book to a computer program. The text covers such topics as supervised learning, Bayesian decision theory, parametric methods, multivariate methods, multilayer perceptrons, local models, hidden Markov models, assessing and comparing classification algorithms, and reinforcement learning. New to the second edition are chapters on kernel machines, graphical models, and Bayesian estimation; expanded coverage of statistical tests in a chapter on design and analysis of machine learning experiments; case studies available on the Web (with downloadable results for instructors); and many additional exercises. All chapters have been revised and updated. Introduction to Machine Learning can be used by advanced undergraduates and graduate students who have completed courses in computer programming, probability, calculus, and linear algebra. It will also be of interest to engineers in the field who are concerned with the application of machine learning methods. Adaptive Computation and Machine Learning series",,,1,22,Computer science;Artificial intelligence,,,,,OPENALEX,"NA, 2019, SERIES IN MACHINE PERCEPTION AND ARTIFICIAL INTELLIGENCE","NA, 2019, SERIES IN MACHINE PERCEPTION AND ARTIFICIAL INTELLIGENCE" +https://openalex.org/W1993220166,10.1145/1007730.1007735,A study of the behavior of several methods for balancing machine learning training data,2004,en,article,4123,ACM SIGKDD EXPLORATIONS NEWSLETTER,ACM SIGKDD Explorations Newsletter,Gustavo E. A. P. A. Batista;Ronaldo C. Prati;Maria Carolina Monard,Gustavo E. A. P. A. Batista;Ronaldo C. Prati;Maria Carolina Monard,"Instituto de Ciências Matemáticas e de Computação, São Carlos - SP, Brazil;Instituto de Ciências Matemáticas e de Computação, São Carlos - SP, Brazil;Instituto de Ciências Matemáticas e de Computação, São Carlos - SP, Brazil",,"There are several aspects that might influence the performance achieved by existing learning systems. It has been reported that one of these aspects is related to class imbalance in which examples in training data belonging to one class heavily outnumber the examples in the other class. In this situation, which is found in real world data describing an infrequent but important event, the learning system may have difficulties to learn the concept related to the minority class. In this work we perform a broad experimental evaluation involving ten methods, three of them proposed by the authors, to deal with the class imbalance problem in thirteen UCI data sets. Our experiments provide evidence that class imbalance does not systematically hinder the performance of learning systems. In fact, the problem seems to be related to learning with too few minority class examples in the presence of other complicating factors, such as class overlapping. Two of our proposed methods deal with these conditions directly, allying a known over-sampling method with data cleaning methods in order to produce better-defined class clusters. Our comparative experiments show that, in general, over-sampling methods provide more accurate results than under-sampling methods considering the area under the ROC curve (AUC). This result seems to contradict results previously published in the literature. Two of our proposed methods, Smote + Tomek and Smote + ENN, presented very good results for data sets with a small number of positive examples. Moreover, Random over-sampling, a very simple over-sampling method, is very competitive to more complex over-sampling methods. Since the over-sampling methods provided very good performance results, we also measured the syntactic complexity of the decision trees induced from over-sampled data. Our results show that these trees are usually more complex then the ones induced from original data. Random over-sampling usually produced the smallest increase in the mean number of induced rules and Smote + ENN the smallest increase in the mean number of conditions per rule, when compared among the investigated over-sampling methods.",6,1,20,29,Computer science;Class (philosophy);Machine learning;Artificial intelligence;Sampling (signal processing);Simple random sample;Event (particle physics);Data mining;Simple (philosophy),BR,"Chawla N, 2002, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Quinlan J, 1992, ;Blake C, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY;Salzberg S, 1994, ;Aha D, 1991, MACHINE LEARNING;Japkowicz N, 2002, INTELLIGENT DATA ANALYSIS;Bauer E, 1999, MACHINE LEARNING;Kubát M, 1997, ;Wilson D, 1972, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Hart P, 1968, IEEE TRANSACTIONS ON INFORMATION THEORY;Ciaccia P, 1997, ;Domingos P, 1999, ;Stanfill C, 1986, COMMUNICATIONS OF THE ACM;Wilson D, 2000, MACHINE LEARNING;, 1976, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Weiss G, 2003, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Drummond C, 2003, ;Laurikkala J, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Provost F, 1997, ;Ling C, 1998, ;Wade M, 1999, TECHNOMETRICS;Zadrozny B, 2001, ;Prati R, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Batista G, 2003, ;Ferri C, 2002, ",,,OPENALEX,"Batista G, 2004, ACM SIGKDD EXPLORATIONS NEWSLETTER","Batista G, 2004, ACM SIGKDD EXPLORATIONS NEWSLETTER" +https://openalex.org/W1505191356,10.1038/nrg3920,Machine learning applications in genetics and genomics,2015,en,review,2029,NATURE REVIEWS GENETICS,Nature Reviews Genetics,Maxwell W. Libbrecht;William Stafford Noble,Maxwell W. Libbrecht;William Stafford Noble,"Department of Computer Science and Engineering, University of Washington, 185 Stevens Way, Seattle, Washington 98195-2350, USA;Department of Computer Science and Engineering, University of Washington, 185 Stevens Way, Seattle, 98195–2350, Washington, USA;1] Department of Computer Science and Engineering, University of Washington, 185 Stevens Way, Seattle, Washington 98195-2350, USA. [2] Department of Genome Sciences, University of Washington, 3720 15th Ave NE Seattle, Washington 98195-5065, USA;Department of Computer Science and Engineering, University of Washington, 185 Stevens Way, Seattle, 98195–2350, Washington, USA",,,16,6,321,332,Machine learning;Artificial intelligence;Computer science;Genomics;Unsupervised learning;Selection (genetic algorithm);Discriminative model;Epigenomics;Feature selection;Genome;Biology,US,"Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Ashburner M, 2000, NATURE GENETICS;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Cios K, 1997, NEUROCOMPUTING;Hastie T, 2013, ;Boser B, 1992, ;Cohen J, 1968, PSYCHOLOGICAL BULLETIN;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Chapelle O, 2006, THE MIT PRESS EBOOKS;Noble W, 2006, NATURE BIOTECHNOLOGY;Troyanskaya O, 2001, BIOINFORMATICS;Sammut C, 2010, ;Ramaswamy S, 2001, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Segal E, 2006, NATURE;Sonnhammer E, 1997, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Friedman N, 2004, SCIENCE;Bücher P, 1990, JOURNAL OF MOLECULAR BIOLOGY;Vert J, 2004, THE MIT PRESS EBOOKS;Leslie C, 2001, ;Karlić R, 2010, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Bailey T, 1995, PUBMED;Beer M, 2004, CELL;Troyanskaya O, 2003, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Ohler U, 2002, GENOME BIOLOGY;Zien A, 2000, BIOINFORMATICS;Ouyang Z, 2009, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Hoffman M, 2013, ;Yip K, 2012, GENOME BIOLOGY;Swan A, 2013, OMICS A JOURNAL OF INTEGRATIVE BIOLOGY;Pavlidis P, 2002, JOURNAL OF COMPUTATIONAL BIOLOGY;Hughes R, 2014, ARCADIA PUB. EBOOKS;Cuéllar-Partida G, 2011, BIOINFORMATICS;Glaab E, 2012, PLOS ONE;Upstill‐Goddard R, 2012, BRIEFINGS IN BIOINFORMATICS;Bacardit J, 2013, WILEY INTERDISCIPLINARY REVIEWS DATA MINING AND KNOWLEDGE DISCOVERY;Brown M, 1993, PUBMED;Fraser A, 2004, NATURE GENETICS;Degroeve S, 2002, BIOINFORMATICS;Yip K, 2013, GENOME BIOLOGY;Noble J, 2012, ROCZNIKI POLSKIEGO TOWARZYSTWA MATEMATYCZNEGO. SERIA 3, MATEMATYKA STOSOWANA/MATEMATYKA STOSOWANA/MATHEMATICA APPLICANDA;Qiu J, 2008, PLOS COMPUTATIONAL BIOLOGY;Urbanowicz R, 2012, IEEE COMPUTATIONAL INTELLIGENCE MAGAZINE;Picardi E, 2009, METHODS IN MOLECULAR BIOLOGY;Saigo H, 2006, BMC BIOINFORMATICS;Urbanowicz R, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Stamatoyannopoulos J, 2010, NATURE METHODS;Hamelryck T, 2009, STATISTICAL METHODS IN MEDICAL RESEARCH;Manning C, 1999, ;Wasson T, 2009, GENOME RESEARCH;Foissac S, 2012, NATURE;Pearl J, 1988, ;Wolpert D, 1997, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;, 2002, THE MIT PRESS EBOOKS;Kircher M, 2014, NATURE GENETICS;Hitchcock C, 2001, THE PHILOSOPHICAL REVIEW;Heintzman N, 2007, NATURE GENETICS;Friedman N, 2000, JOURNAL OF COMPUTATIONAL BIOLOGY;, 2000, CHOICE REVIEWS ONLINE;Apweiler R, 2001, NUCLEIC ACIDS RESEARCH;Kyburg H, 1991, THE JOURNAL OF PHILOSOPHY;Tikhonov A, 1943, PROCEEDINGS OF THE USSR ACADEMY OF SCIENCES;Lanckriet G, 2004, BIOINFORMATICS;Hoffman M, 2012, NATURE METHODS;M. H, 2012, THE JOURNAL OF THE AMERICAN MEDICAL ASSOCIATION (JAMA) NETWORK (AMERICAN MEDICAL ASSOCIATION);Peña‐Castillo L, 2008, GENOME BIOLOGY;Luengo J, 2011, KNOWLEDGE AND INFORMATION SYSTEMS;Day N, 2007, BIOINFORMATICS;Rätsch G, 2004, THE MIT PRESS EBOOKS;Brenner S, 1995, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Song L, 2010, COLD SPRING HARBOR PROTOCOLS;Piqué-Regi R, 2010, GENOME RESEARCH",,,OPENALEX,"Libbrecht M, 2015, NATURE REVIEWS GENETICS","Libbrecht M, 2015, NATURE REVIEWS GENETICS" +https://openalex.org/W3122548859,10.1177/2053951715622512,How the machine ‘thinks’: Understanding opacity in machine learning algorithms,2016,en,article,2476,BIG DATA & SOCIETY,Big Data & Society,Jenna Burrell,Jenna Burrell,"School of Information, UC-Berkeley, Berkeley, CA, USA","Jenna Burrell (corresponding author), School of Information, UC-Berkeley, Berkeley, CA, USA","This article considers the issue of opacity as a problem for socially consequential mechanisms of classification and ranking, such as spam filters, credit card fraud detection, search engines, news trends, market segmentation and advertising, insurance or loan qualification, and credit scoring. These mechanisms of classification all frequently rely on computational algorithms, and in many cases on machine learning algorithms to do this work. In this article, I draw a distinction between three forms of opacity: (1) opacity as intentional corporate or state secrecy, (2) opacity as technical illiteracy, and (3) an opacity that arises from the characteristics of machine learning algorithms and the scale required to apply them usefully. The analysis in this article gets inside the algorithms themselves. I cite existing literatures in computer science, known industry practices (as they are publicly presented), and do some testing and manipulation of code as a form of lightweight code audit. I argue that recognizing the distinct forms of opacity that may be coming into play in a given application is a key to determining which of a variety of technical and non-technical solutions could help to prevent harm.",3,1,,,Machine learning;Computer science;Artificial intelligence;Credit card fraud;Algorithm;Audit;Opacity;Credit card;Economics;World Wide Web,US,"Wing J, 2006, COMMUNICATIONS OF THE ACM;Bowker G, 1999, THE MIT PRESS EBOOKS;Spiess M, 2000, THE JOURNAL OF ACADEMIC LIBRARIANSHIP;Dwork C, 2012, ;Domingos P, 2012, COMMUNICATIONS OF THE ACM;Pasquale F, 2015, HARVARD UNIVERSITY PRESS EBOOKS;Pasquale F, 2016, CONTEMPORARY SOCIOLOGY A JOURNAL OF REVIEWS;Gillespie T, 2014, THE MIT PRESS EBOOKS;Barocas S, 2016, CALIFORNIA LAW REVIEW;Editors T, 2009, JOURNAL OF HOUSING RESEARCH;Lee I, 2011, ACM INROADS;Datta A, 2015, PROCEEDINGS ON PRIVACY ENHANCING TECHNOLOGIES;Fourcade M, 2013, ACCOUNTING ORGANIZATIONS AND SOCIETY;Söderberg J, 2014, CANADIAN JOURNAL OF COMMUNICATION;Hargittai E, 2018, ;Diakopoulos N, 2014, COLUMBIA ACADEMIC COMMONS (COLUMBIA UNIVERSITY);Burrell J, 2012, THE MIT PRESS EBOOKS;Olazaran M, 1996, SOCIAL STUDIES OF SCIENCE;Burrell J, 2012, ;Brunton F, 2013, THE MIT PRESS EBOOKS;Gandy O, 2009, ETHICS AND INFORMATION TECHNOLOGY;Winograd T, 2006, ARTIFICIAL INTELLIGENCE;, 2011, CHOICE REVIEWS ONLINE;Mateas M, 2005, ;Grudin J, 2006, INTERACTIONS;Ensmenger N, 2003, INTERNATIONAL REVIEW OF SOCIAL HISTORY;Kerbo H, 2007, ;Grudin J, 2006, INTERACTIONS;Shade L, 2011, JOURNAL OF INFORMATION POLICY;Datta A, 2014, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Burrell J, 2016, BIG DATA & SOCIETY","Burrell J, 2016, BIG DATA & SOCIETY" +https://openalex.org/W3145506661,10.1007/978-3-030-10546-4_1,Introduction to Machine Learning,2019,en,book-chapter,1620,SPRINGER BRIEFS IN ELECTRICAL AND COMPUTER ENGINEERING,Springer briefs in electrical and computer engineering,F. Richard Yu;Ying He,F. Richard Yu;Ying He,"Carleton University, Ottawa, ON, Canada;Carleton University, Ottawa, ON, Canada",,,,,1,13,Computer science;Artificial intelligence,CA,"Breiman L, 2001, MACHINE LEARNING;Hochreiter S, 1997, NEURAL COMPUTATION;LeCun Y, 2015, NATURE;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Haykin S, 1998, ;Han J, 2012, CHOICE REVIEWS ONLINE;Wu Y, 1999, TECHNOMETRICS;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Rabiner L, 1989, PROCEEDINGS OF THE IEEE;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Schmidhuber J, 2014, NEURAL NETWORKS;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Quinlan J, 1986, MACHINE LEARNING;Tadeusiewicz R, 1995, CONTROL ENGINEERING PRACTICE;Kanungo T, 2002, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Friedman N, 1997, MACHINE LEARNING;Kotsiantis S, 2007, ;Jensen F, 2007, INFORMATION SCIENCE AND STATISTICS;Kadane J, 1975, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Sak H, 2014, ;Kohonen T, 1998, NEUROCOMPUTING;Heckerman D, 2008, STUDIES IN COMPUTATIONAL INTELLIGENCE;Mikolov T, 2011, ;Zaki M, 2020, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Chapelle O, 2009, IEEE TRANSACTIONS ON NEURAL NETWORKS;Ahmed S, 2008, TECHNOMETRICS;Wu H, 2017, IEEE TRANSACTIONS ON IMAGE PROCESSING;Li X, 2015, ;Patle A, 2013, ;Mucherino A, 2009, SPRINGER OPTIMIZATION AND ITS APPLICATIONS;Lee K, 2005, EXPERT SYSTEMS WITH APPLICATIONS;Li C, 2018, INTERNATIONAL JOURNAL OF COMMUNICATION SYSTEMS;Holgado P, 2017, IEEE TRANSACTIONS ON DEPENDABLE AND SECURE COMPUTING;Timotheou S, 2009, THE COMPUTER JOURNAL;Yekkehkhany B, 2014, ˜THE œINTERNATIONAL ARCHIVES OF THE PHOTOGRAMMETRY, REMOTE SENSING AND SPATIAL INFORMATION SCIENCES/INTERNATIONAL ARCHIVES OF THE PHOTOGRAMMETRY, REMOTE SENSING AND SPATIAL INFORMATION SCIENCES;Zhou X, 2014, ACADEMIC PRESS LIBRARY IN SIGNAL PROCESSING;Burrows W, 1995, JOURNAL OF APPLIED METEOROLOGY;Martínez‐Ramón M, 2006, SYNTHESIS LECTURES ON COMPUTATIONAL ELECTROMAGNETICS;Hu H, 2008, ;Christopoulos C, 2006, SYNTHESIS LECTURES ON COMPUTATIONAL ELECTROMAGNETICS;Karatsiolis S, 2012, ;Bakırcıoğlu H, 2000, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Hulle M, 2012, ;Geubbelmans M, 2024, AMERICAN JOURNAL OF ORTHODONTICS AND DENTOFACIAL ORTHOPEDICS;Pandey G, 2014, ;, 2019, ;Basterrech S, 2016, ARXIV (CORNELL UNIVERSITY);Bakker J, 2017, ",,,OPENALEX,"Yu F, 2019, SPRINGER BRIEFS IN ELECTRICAL AND COMPUTER ENGINEERING","Yu F, 2019, SPRINGER BRIEFS IN ELECTRICAL AND COMPUTER ENGINEERING" +https://openalex.org/W4205539948,10.1093/rfs/hhaa009,Empirical Asset Pricing via Machine Learning,2020,en,article,2225,REVIEW OF FINANCIAL STUDIES,Review of Financial Studies,Shihao Gu;Bryan Kelly;Dacheng Xiu,Shihao Gu;Bryan Kelly;Dacheng Xiu,"Booth School of Business, University of Chicago;Yale University, AQR Capital Management, and NBER;Booth School of Business, University of Chicago","Shihao Gu (corresponding author), Booth School of Business, University of Chicago","Abstract We perform a comparative analysis of machine learning methods for the canonical problem of empirical asset pricing: measuring asset risk premiums. We demonstrate large economic gains to investors using machine learning forecasts, in some cases doubling the performance of leading regression-based strategies from the literature. We identify the best-performing methods (trees and neural networks) and trace their predictive gains to allowing nonlinear predictor interactions missed by other methods. All methods agree on the same set of dominant predictive signals, a set that includes variations on momentum, liquidity, and volatility. Authors have furnished an Internet Appendix, which is available on the Oxford University Press Web site next to the link to the final published paper online.",33,5,2223,2273,Capital asset pricing model;Machine learning;Computer science;Artificial intelligence;Artificial neural network;Volatility (finance);Market liquidity;Econometrics;Asset (computer security);TRACE (psycholinguistics);Set (abstract data type);Economics;Finance,US,"He K, 2016, ;Breiman L, 2001, MACHINE LEARNING;Friedman J, 2001, THE ANNALS OF STATISTICS;Fama E, 1993, JOURNAL OF FINANCIAL ECONOMICS;Gordon A, 1984, BIOMETRICS;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Hornik K, 1989, NEURAL NETWORKS;Hinton G, 2006, NEURAL COMPUTATION;Cybenko G, 1989, MATHEMATICS OF CONTROL SIGNALS AND SYSTEMS;Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Huber P, 1964, THE ANNALS OF MATHEMATICAL STATISTICS;Friedman J, 2000, THE ANNALS OF STATISTICS;Hansen L, 1990, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Welch I, 2007, REVIEW OF FINANCIAL STUDIES;Campbell J, 2007, REVIEW OF FINANCIAL STUDIES;Schapire R, 1990, MACHINE LEARNING;Jarrett K, 2009, ;Jong S, 1993, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Freund Y, 1995, INFORMATION AND COMPUTATION;Masters T, 1993, ;Cochrane J, 2007, REVIEW OF FINANCIAL STUDIES;, 2013, HANDBOOK OF ECONOMIC FORECASTING;Box G, 1953, BIOMETRIKA;Lo A, 1990, REVIEW OF FINANCIAL STUDIES;Ferson W, 1999, THE JOURNAL OF FINANCE;Kelly B, 2019, JOURNAL OF FINANCIAL ECONOMICS;Khandani A, 2010, JOURNAL OF BANKING & FINANCE;Hutchinson J, 1994, THE JOURNAL OF FINANCE;Rapach D, 2013, THE JOURNAL OF FINANCE;Green J, 2017, REVIEW OF FINANCIAL STUDIES;Gleser L, 1989, ;Freyberger J, 2019, REVIEW OF FINANCIAL STUDIES;Kelly B, 2013, THE JOURNAL OF FINANCE;White H, 1980, INTERNATIONAL ECONOMIC REVIEW;Diebold F, 2015, JOURNAL OF BUSINESS AND ECONOMIC STATISTICS;Wilson D, 2003, NEURAL NETWORKS;Lewellen J, 2015, CRITICAL FINANCE REVIEW;Rosenberg B, 1974, JOURNAL OF FINANCIAL AND QUANTITATIVE ANALYSIS;Dimopoulos Y, 1995, NEURAL PROCESSING LETTERS;Fan J, 2016, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Green J, 2013, REVIEW OF ACCOUNTING STUDIES;Yao J, 2000, OMEGA;Koijen R, 2011, ANNUAL REVIEW OF FINANCIAL ECONOMICS;, 2014, ;Kingma D, 2014, UVA-DARE (UNIVERSITY OF AMSTERDAM);, 2008, ;Moritz B, 2016, SSRN ELECTRONIC JOURNAL;Feng G, 2019, NATIONAL BUREAU OF ECONOMIC RESEARCH;Gu S, 2019, SSRN ELECTRONIC JOURNAL;Kozak S, 2019, SSRN ELECTRONIC JOURNAL;Bishop C, 1995, ;Fama E, 2014, JOURNAL OF FINANCIAL ECONOMICS;Harvey C, 2015, REVIEW OF FINANCIAL STUDIES;Fama E, 2008, THE JOURNAL OF FINANCE;Kozak S, 2019, JOURNAL OF FINANCIAL ECONOMICS;Kelly B, 2015, JOURNAL OF ECONOMETRICS;Giglio S, 2021, JOURNAL OF POLITICAL ECONOMY;Diebold F, 1994, NATIONAL BUREAU OF ECONOMIC RESEARCH",,,OPENALEX,"Gu S, 2020, REVIEW OF FINANCIAL STUDIES","Gu S, 2020, REVIEW OF FINANCIAL STUDIES" +https://openalex.org/W1596324102,,Machine Learning: An Artificial Intelligence Approach,2013,en,book,2639,,,Ryszard S. Michalski;Jaime G. Carbonell;Thomas M. Mitchell,Ryszard S. Michalski;Jaime G. Carbonell;Thomas M. Mitchell,,,,,,,,Computer science;Artificial intelligence;Perspective (graphical);Heuristics;Machine learning;Hyper-heuristic;Data science;Cognitive science;Robot learning;Psychology,,,,,OPENALEX,"Michalski R, 2013, ","Michalski R, 2013, " +https://openalex.org/W2133564696,10.48550/arxiv.1409.0473,Neural Machine Translation by Jointly Learning to Align and Translate,2014,en,preprint,14620,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Dzmitry Bahdanau,"Bahdanau, Dzmitry",Jacobs Univ. Bremen (Germany),"Bahdanau, Dzmitry (corresponding author), Jacobs Univ. Bremen (Germany)","Neural machine translation is a recently proposed approach to machine translation. Unlike the traditional statistical machine translation, the neural machine translation aims at building a single neural network that can be jointly tuned to maximize the translation performance. The models proposed recently for neural machine translation often belong to a family of encoder-decoders and consists of an encoder that encodes a source sentence into a fixed-length vector from which a decoder generates a translation. In this paper, we conjecture that the use of a fixed-length vector is a bottleneck in improving the performance of this basic encoder-decoder architecture, and propose to extend this by allowing a model to automatically (soft-)search for parts of a source sentence that are relevant to predicting a target word, without having to form these parts as a hard segment explicitly. With this new approach, we achieve a translation performance comparable to the existing state-of-the-art phrase-based system on the task of English-to-French translation. Furthermore, qualitative analysis reveals that the (soft-)alignments found by the model agree well with our intuition.",,,,,Machine translation;Computer science;Transfer-based machine translation;Example-based machine translation;Sentence;Bottleneck;Artificial intelligence;Translation (biology);Artificial neural network;Natural language processing;Encoder;Phrase;Word (group theory);Speech recognition,DE,"Schuster M, 1997, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Koehn P, 2003, ;Elena L, 2013, DROPS (SCHLOSS DAGSTUHL – LEIBNIZ CENTER FOR INFORMATICS);Graves A, 2013, ;, 2000, APPLIED PHYSICS LETTERS;Philipp K, 2010, CHOICE REVIEWS ONLINE;Kalchbrenner N, 2013, ;Pascanu R, 2014, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Axelrod A, 2011, ;Devlin J, 2014, ;Boulanger-Lewandowski N, 2013, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Schwenk H, 2006, ;Forcada M, 1997, LECTURE NOTES IN COMPUTER SCIENCE;Pouget-Abadie J, 2014, ;Zeiler M, 2012, ARXIV (CORNELL UNIVERSITY);Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Cho K, 2014, ARXIV (CORNELL UNIVERSITY);Graves A, 2012, ARXIV (CORNELL UNIVERSITY);Cho K, 2014, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2013, ;Schwenk H, 2025, ",,,OPENALEX,"Bahdanau D, 2014, ARXIV (CORNELL UNIVERSITY)","Bahdanau D, 2014, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W164706946,,Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning),2005,en,book,1866,THE MIT PRESS EBOOKS,The MIT Press eBooks,Carl Edward Rasmussen;Christopher K. I. Williams,Carl Edward Rasmussen;Christopher K. I. Williams,,,,,,,,Computer science;Computation;Artificial intelligence;Machine learning;Computational learning theory;Gaussian process;Gaussian;Active learning (machine learning);Algorithm;Physics,,,,,OPENALEX,"Rasmussen C, 2005, THE MIT PRESS EBOOKS-a","Rasmussen C, 2005, THE MIT PRESS EBOOKS" +https://openalex.org/W2610886376,10.1257/jep.31.2.87,Machine Learning: An Applied Econometric Approach,2017,en,article,1882,THE JOURNAL OF ECONOMIC PERSPECTIVES,The Journal of Economic Perspectives,Sendhil Mullainathan;Jann Spiess,Sendhil Mullainathan;Jann Spiess,"Sendhil Mullainathan is the Robert C. Waggoner Professor of Economics, Harvard University, Cambridge, Massachusetts;Jann Spiess is a PhD candidate in Economics, Harvard University, Cambridge, Massachusetts",,"Machines are increasingly doing “intelligent” things. Face recognition algorithms use a large dataset of photos labeled as having a face or not to estimate a function that predicts the presence y of a face from pixels x. This similarity to econometrics raises questions: How do these new empirical tools fit with what we know? As empirical economists, how can we use them? We present a way of thinking about machine learning that gives it its own place in the econometric toolbox. Machine learning not only provides new tools, it solves a different problem. Specifically, machine learning revolves around the problem of prediction, while many economic applications revolve around parameter estimation. So applying machine learning to economics requires finding relevant tasks. Machine learning algorithms are now technically easy to use: you can download convenient packages in R or Python. This also raises the risk that the algorithms are applied naively or their output is misinterpreted. We hope to make them conceptually easier to use by providing a crisper understanding of how these algorithms work, where they excel, and where they can stumble—and thus where they can be most usefully applied.",31,2,87,106,Toolbox;Python (programming language);Machine learning;Computer science;Artificial intelligence;Face (sociological concept);Empirical research;Mathematics,US,"Staiger D, 1997, ECONOMETRICA;Dietvorst B, 2014, JOURNAL OF EXPERIMENTAL PSYCHOLOGY GENERAL;Antweiler W, 2004, THE JOURNAL OF FINANCE;Dawes R, 1989, SCIENCE;Hoberg G, 2016, JOURNAL OF POLITICAL ECONOMY;Zhao P, 2006, ;Jean N, 2016, SCIENCE;Varian H, 2014, THE JOURNAL OF ECONOMIC PERSPECTIVES;Athey S, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Belloni A, 2013, THE REVIEW OF ECONOMIC STUDIES;Bound J, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Belloni A, 2012, ECONOMETRICA;Lee B, 2009, STATISTICS IN MEDICINE;Blumenstock J, 2015, SCIENCE;Donaldson D, 2016, THE JOURNAL OF ECONOMIC PERSPECTIVES;Kleinberg J, 2015, AMERICAN ECONOMIC REVIEW;Einav L, 2014, SCIENCE;Bekker P, 1994, ECONOMETRICA;Angrist J, 1995, JOURNAL OF BUSINESS AND ECONOMIC STATISTICS;Lobell D, 2012, FIELD CROPS RESEARCH;Angrist J, 1999, JOURNAL OF APPLIED ECONOMETRICS;Chalfin A, 2016, AMERICAN ECONOMIC REVIEW;Leeb H, 2006, THE ANNALS OF STATISTICS;Vaart A, 2006, STATISTICS & DECISIONS;Carrasco M, 2012, JOURNAL OF ECONOMETRICS;Leeb H, 2007, ECONOMETRIC THEORY;Blumenstock J, 2016, SCIENCE;Imai K, 2010, POLITICAL ANALYSIS;Hansen C, 2014, JOURNAL OF ECONOMETRICS;Athey S, 2015, ;Chandler D, 2011, AMERICAN ECONOMIC REVIEW;McBride L, 2016, THE WORLD BANK ECONOMIC REVIEW;Abelson B, 2014, ;Bekker P, 1992, REPEC: RESEARCH PAPERS IN ECONOMICS",,,OPENALEX,"Mullainathan S, 2017, THE JOURNAL OF ECONOMIC PERSPECTIVES","Mullainathan S, 2017, THE JOURNAL OF ECONOMIC PERSPECTIVES" +https://openalex.org/W1494192115,10.1038/nature14541,Probabilistic machine learning and artificial intelligence,2015,en,review,1978,NATURE,Nature,Zoubin Ghahramani,Zoubin Ghahramani,"Department of Engineering, University of Cambridge, Trumpington Street, Cambridge CB2 1PZ, UK;Department of Engineering, University of Cambridge, , Trumpington Street, Cambridge CB2 1PZ, UK","Zoubin Ghahramani (corresponding author), Department of Engineering, University of Cambridge, Trumpington Street, Cambridge CB2 1PZ, UK; Department of Engineering, University of Cambridge, , Trumpington Street, Cambridge CB2 1PZ, UK",,521,7553,452,459,Artificial intelligence;Probabilistic logic;Computer science;Machine learning;Field (mathematics);Principal (computer security);Bayesian probability;Statistical model;Cognitive robotics;Robot,GB,"Shannon C, 1948, BELL SYSTEM TECHNICAL JOURNAL;Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Gelman A, 1995, ;Rasmussen C, 2005, THE MIT PRESS EBOOKS;Murphy K, 2012, ;Jones D, 1998, JOURNAL OF GLOBAL OPTIMIZATION;Doucet A, 2001, ;Koller D, 2009, ;Lunn D, 2000, STATISTICS AND COMPUTING;1967-2016 M, 2004, KYBERNETES;Ferguson T, 1973, THE ANNALS OF STATISTICS;Jaynes E, 2003, ;Neal R, 1996, LECTURE NOTES IN STATISTICS;Jordan M, 1999, MACHINE LEARNING;, 2018, THE MIT PRESS EBOOKS;Wolpert D, 1995, SCIENCE;Sammut C, 2010, ;Brooks S, 2011, ;Schmidt M, 2009, SCIENCE;Robbins H, 1952, BULLETIN OF THE AMERICAN MATHEMATICAL SOCIETY;Finetti B, 1937, FRENCH DIGITAL MATHEMATICS LIBRARY (NUMDAM);Girolami M, 2011, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Knill D, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS;N. K, 1992, ELSEVIER EBOOKS;Marjoram P, 2003, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Kushner H, 1964, JOURNAL OF BASIC ENGINEERING;Neal R, 2011, ;Skilling J, 1996, ;Jefferys W, 1992, AMERICAN SCIENTIST;, 2006, THE MIT PRESS EBOOKS;Gupta S, 1994, ;Hennig P, 2012, JOURNAL OF MACHINE LEARNING RESEARCH;Milch B, 2007, THE MIT PRESS EBOOKS;Mansinghka V, 2014, ARXIV (CORNELL UNIVERSITY);Pfeffer A, 2001, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Hjort N, 2010, ;Hjort N, 2010, ;Pfeffer A, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Steinruecken C, 2015, ;Russell S, 1991, THE MIT PRESS EBOOKS;Neal R, 1992, ;Horn K, 2003, INTERNATIONAL JOURNAL OF APPROXIMATE REASONING;Wood F, 2011, COMMUNICATIONS OF THE ACM;Hand D, 1986, ADDISON-WESLEY LONGMAN PUBLISHING CO., INC. EBOOKS;Wolstenholme D, 1988, KNOWLEDGE-BASED SYSTEMS;Freer C, 2014, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Woudhuysen H, 1999, TLS, THE TIMES LITERARY SUPPLEMENT/TIMES LITERARY SUPPLEMENT ON CD-ROM/TLS. TIMES LITERARY SUPPLEMENT;Minka T, 2013, ARXIV (CORNELL UNIVERSITY);Sermanet P, 2013, ARXIV (CORNELL UNIVERSITY);Goodman N, 2012, ARXIV (CORNELL UNIVERSITY);Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Rabiner L, 1989, PROCEEDINGS OF THE IEEE;Kumar D, 1995, CHOICE REVIEWS ONLINE;Nilsson N, 1996, ARTIFICIAL INTELLIGENCE;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Thrun S, 2002, COMMUNICATIONS OF THE ACM;Teh Y, 2006, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;, 2007, CHOICE REVIEWS ONLINE;Tenenbaum J, 2011, SCIENCE;Hoffman M, 2013, JOURNAL OF MACHINE LEARNING RESEARCH;Thornton C, 2013, ;Deisenroth M, 2011, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Blumer A, 1987, INFORMATION PROCESSING LETTERS;Griffiths T, 2006, PSYCHOLOGICAL SCIENCE;King R, 2004, NATURE;Kemp C, 2006, ;Barnard G, 1962, ECONOMETRICA;Ziegel E, 1989, TECHNOMETRICS;Miller K, 2009, ;O’Hagan A, 1991, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Griffiths T, 2011, UWA PROFILES AND RESEARCH REPOSITORY (UNIVERSITY OF WESTERN AUSTRALIA);Denève S, 2007, NEURAL COMPUTATION;Lu C, 2015, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Medvedovic M, 2002, BIOINFORMATICS;Diaconis P, 1988, ;Bishop C, 2013, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Marcus G, 2013, PSYCHOLOGICAL SCIENCE;Wingate D, 2011, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Jordan M, 2013, BERNOULLI;Houlsby N, 2012, ;Ghahramani Z, 2013, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Fischer B, 2003, JOURNAL OF FUNCTIONAL PROGRAMMING;Koller D, 1997, ;Rasmussen C, 2009, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;Goodman N, 2015, PSYCHOLOGICAL SCIENCE;Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Brochu E, 2010, ARXIV (CORNELL UNIVERSITY);Snoek J, 2012, DIGITAL ACCESS TO SCHOLARSHIP AT HARVARD (DASH) (HARVARD UNIVERSITY);Wood F, 2014, OPEN COLLECTIONS;Hernández-Lobato J, 2014, ARXIV (CORNELL UNIVERSITY);Korattikara A, 2013, ARXIV (CORNELL UNIVERSITY);Lloyd J, 2014, ARXIV (CORNELL UNIVERSITY);Adams R, 2009, ARXIV (CORNELL UNIVERSITY);, 2004, CHOICE REVIEWS ONLINE;Jaynes E, 2003, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Ziegel E, 1989, TECHNOMETRICS;Cox R, 2001, JOHNS HOPKINS UNIVERSITY PRESS EBOOKS;Oksanen T, 2009, ;Hjort N, 2010, CAMBRIDGE UNIVERSITY PRESS EBOOKS;, 1992, CHOICE REVIEWS ONLINE;, 2013, ;Rasmussen C, 2022, DIRECTORY OF OPEN ACCESS BOOKS (OAPEN FOUNDATION)",,,OPENALEX,"Ghahramani Z, 2015, NATURE","Ghahramani Z, 2015, NATURE" +https://openalex.org/W2919115771,10.1038/nature14539,Deep learning,2015,en,review,81513,NATURE,Nature,Yann LeCun;Yoshua Bengio;Geoffrey E. Hinton,Yann LeCun;Yoshua Bengio;Geoffrey Hinton,"1] Facebook AI Research, 770 Broadway, New York, New York 10003 USA. [2] New York University, 715 Broadway, New York, New York 10003, USA;Facebook AI Research, 770 Broadway, New York, 10003, New York, USA;New York University, 715 Broadway, New York, 10003, New York, USA;Department of Computer Science and Operations Research Université de Montréal, Pavillon André-Aisenstadt, PO Box 6128 Centre-Ville STN Montréal, Quebec H3C 3J7, Canada;Department of Computer Science and Operations Research Université de Montréal, Pavillon André-Aisenstadt, PO Box 6128 Centre-Ville STN, Montréal, H3C 3J7, Quebec, Canada;1] Google, 1600 Amphitheatre Parkway, Mountain View, California 94043, USA. [2] Department of Computer Science, University of Toronto, 6 King's College Road, Toronto, Ontario M5S 3G4, Canada;Google, 1600 Amphitheatre Parkway, Mountain View, 94043, California, USA;Department of Computer Science, University of Toronto, 6 King's College Road, Toronto, M5S 3G4, Ontario, Canada","Yann LeCun (corresponding author), 1] Facebook AI Research, 770 Broadway, New York, New York 10003 USA. [2] New York University, 715 Broadway, New York, New York 10003, USA; Facebook AI Research, 770 Broadway, New York, 10003, New York, USA; New York University, 715 Broadway, New York, 10003, New York, USA",,521,7553,436,444,Computer science;Deep learning;Artificial intelligence;Abstraction;Representation (politics);Layer (electronics);Object (grammar);Backpropagation;Convolutional neural network;Feature learning;Pattern recognition (psychology);Speech recognition;Artificial neural network,US;CA,"Hochreiter S, 1997, NEURAL COMPUTATION;LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Rumelhart D, 1986, NATURE;Mnih V, 2015, NATURE;Cho K, 2014, ;Hinton G, 2006, SCIENCE;Hinton G, 2006, NEURAL COMPUTATION;Hubel D, 1962, THE JOURNAL OF PHYSIOLOGY;Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Bengio Y, 1994, IEEE TRANSACTIONS ON NEURAL NETWORKS;Felleman D, 1991, CEREBRAL CORTEX;Vincent P, 2008, ;Bengio Y, 2009, NOW PUBLISHERS, INC. EBOOKS;, 2018, THE MIT PRESS EBOOKS;Goldman‐Rakic P, 1991, CEREBRAL CORTEX;Clarke M, 1974, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Lawrence S, 1997, IEEE TRANSACTIONS ON NEURAL NETWORKS;Werbos P, 1974, MEDICAL ENTOMOLOGY AND ZOOLOGY;Farabet C, 2012, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Waibel A, 1989, IEEE TRANSACTIONS ON ACOUSTICS SPEECH AND SIGNAL PROCESSING;Mohamed A, 2011, IEEE TRANSACTIONS ON AUDIO SPEECH AND LANGUAGE PROCESSING;Xiong H, 2014, SCIENCE;Ma J, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Hinton G, 1995, SCIENCE;Helmstaedter M, 2013, NATURE;Cireşan D, 2012, NEURAL NETWORKS;Cadieu C, 2014, PLOS COMPUTATIONAL BIOLOGY;Raina R, 2009, ;Rogers T, 2004, THE MIT PRESS EBOOKS;García C, 2004, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Schwenk H, 2006, COMPUTER SPEECH & LANGUAGE;Kavukcuoglu K, 2010, ;Leung M, 2014, BIOINFORMATICS;Turaga S, 2009, NEURAL COMPUTATION;Hadsell R, 2009, JOURNAL OF FIELD ROBOTICS;Feng N, 2005, IEEE TRANSACTIONS ON IMAGE PROCESSING;Bottou L, 2013, MACHINE LEARNING;Boser B, 1991, IEEE JOURNAL OF SOLID-STATE CIRCUITS;Farabet C, 2011, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Ciodaro T, 2012, JOURNAL OF PHYSICS CONFERENCE SERIES;Neumann B, 2001, K&UUMLNSTLICHE INTELL.;Ranzato M, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Nasr M, 2011, ;Xu K, 2015, ARXIV (CORNELL UNIVERSITY);Sermanet P, 2013, ARXIV (CORNELL UNIVERSITY);Dauphin Y, 2014, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"LeCun Y, 2015, NATURE","LeCun Y, 2015, NATURE" +https://openalex.org/W2998506103,10.1016/j.ymssp.2019.106587,Applications of machine learning to machine fault diagnosis: A review and roadmap,2020,en,review,2687,MECHANICAL SYSTEMS AND SIGNAL PROCESSING,Mechanical Systems and Signal Processing,Yaguo Lei;Bin Yang;Xinwei Jiang;Feng Jia;Naipeng Li;Asoke K. Nandi,Yaguo Lei;Bin Yang;Xinwei Jiang;Feng Jia;Naipeng Li;Asoke K. Nandi,"Key Laboratory of Education Ministry for Modern Design and Rotor-Bearing System, Xi’an Jiaotong University, Xi’an 710049, China;Key Laboratory of Education Ministry for Modern Design and Rotor-Bearing System, Xi’an Jiaotong University, Xi’an 710049, China;Key Laboratory of Education Ministry for Modern Design and Rotor-Bearing System, Xi’an Jiaotong University, Xi’an 710049, China;Key Laboratory of Education Ministry for Modern Design and Rotor-Bearing System, Xi’an Jiaotong University, Xi’an 710049, China;Key Laboratory of Education Ministry for Modern Design and Rotor-Bearing System, Xi’an Jiaotong University, Xi’an 710049, China;Department of Electronic and Computer Engineering, Brunel University London, Uxbridge UB8 3PH, United Kingdom","Yaguo Lei (corresponding author), Key Laboratory of Education Ministry for Modern Design and Rotor-Bearing System, Xi’an Jiaotong University, Xi’an 710049, China",,138,,106587,106587,Artificial intelligence;Machine learning;Computer science;Bridge (graph theory);Engineering,CN;GB,"Rumelhart D, 1986, NATURE;Nair V, 2010, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Koller D, 2009, ;Platt J, 1998, THE MIT PRESS EBOOKS;Gao Z, 2015, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Kira K, 1992, ELSEVIER EBOOKS;Kononenko I, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Yin S, 2014, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Yang Y, 2005, JOURNAL OF SOUND AND VIBRATION;Chen J, 2015, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Thrun S, 1998, ;Hall M, 1998, RESEARCH COMMONS (UNIVERSITY OF WAIKATO);Wang Y, 2015, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Tran V, 2014, EXPERT SYSTEMS WITH APPLICATIONS;Pandya D, 2013, EXPERT SYSTEMS WITH APPLICATIONS;Kothari S, 1993, ADVANCES IN COMPUTERS;Wu J, 2008, EXPERT SYSTEMS WITH APPLICATIONS;Li Y, 2015, MEASUREMENT;Yang J, 2006, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Lei Y, 2009, EXPERT SYSTEMS WITH APPLICATIONS;Zarei J, 2014, MECHATRONICS;Gryllias K, 2011, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Yang B, 2004, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Yang B, 2003, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Zhang X, 2015, NEUROCOMPUTING;Kang M, 2014, IEEE TRANSACTIONS ON POWER ELECTRONICS;Boutros T, 2011, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Keskes H, 2015, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Ziani R, 2014, JOURNAL OF INTELLIGENT MANUFACTURING;Boukra T, 2012, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Widodo A, 2009, MECHATRONICS;Liu H, 2022, INDUSTRIAL AND ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE AND EXPERT SYSTEMS;Zhang K, 2011, NEUROCOMPUTING;Hang J, 2015, FUZZY SETS AND SYSTEMS;Chen J, 2015, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Kurek J, 2009, NEURAL COMPUTING AND APPLICATIONS;Walker R, 2014, MECHANISM AND MACHINE THEORY;Li F, 2015, NEUROCOMPUTING;Dong S, 2013, MEASUREMENT;Geramifard O, 2013, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Zhu K, 2015, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Wang Z, 2013, JOURNAL OF AEROSPACE ENGINEERING;Gelgele H, 1998, JOURNAL OF INTELLIGENT MANUFACTURING;Arabacı H, 2009, NEURAL COMPUTING AND APPLICATIONS;Qiu J, 2015, JOURNAL OF NATURAL GAS SCIENCE AND ENGINEERING;Yoon J, 2015, IET SCIENCE MEASUREMENT & TECHNOLOGY;Zhong B, 1999, NEURAL COMPUTING AND APPLICATIONS;Xiao Z, 2015, MATHEMATICAL PROBLEMS IN ENGINEERING;Liu Q, 2003, NEURAL PROCESSING LETTERS;Krishnamurthi M, 1992, COMPUTERS & INDUSTRIAL ENGINEERING;Samanta B, 2003, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Chiang L, 2003, COMPUTERS & CHEMICAL ENGINEERING;Jack L, 2002, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Widodo A, 2008, EXPERT SYSTEMS WITH APPLICATIONS;Zhang X, 2013, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Ebrahimi B, 2013, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Shen C, 2013, MEASUREMENT;Lei Y, 2009, EXPERT SYSTEMS WITH APPLICATIONS;Saïdi L, 2014, ISA TRANSACTIONS;Ünal M, 2014, MEASUREMENT;Zhu K, 2013, MEASUREMENT;Amarnath M, 2012, MEASUREMENT;Jiang H, 2014, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Şahin S, 2011, EXPERT SYSTEMS WITH APPLICATIONS;He D, 2012, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Keskes H, 2013, ELECTRIC POWER SYSTEMS RESEARCH;Su Z, 2015, NEUROCOMPUTING;Li X, 2013, MEASUREMENT;Moosavi S, 2015, ELECTRIC POWER SYSTEMS RESEARCH;Wu S, 2013, ENTROPY;Tang X, 2010, KNOWLEDGE-BASED SYSTEMS;Yang D, 2002, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Jack L, 2001, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Wuxing L, 2003, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Yuwono M, 2015, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Li H, 2009, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Chen C, 2004, DIGITAL SIGNAL PROCESSING;Castejón C, 2009, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Vong C, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Hao R, 2011, MEASUREMENT SCIENCE AND TECHNOLOGY;Kang M, 2013, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Wu J, 2009, EXPERT SYSTEMS WITH APPLICATIONS;He Y, 2013, INFORMATION SCIENCES;Wang G, 2007, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Gharavian M, 2013, NEUROCOMPUTING;Kuo R, 1995, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Wu J, 2006, EXPERT SYSTEMS WITH APPLICATIONS;Cabal‐Yépez E, 2012, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Mohammed A, 2013, MECCANICA;Van M, 2015, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Lin T, 2014, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Lu P, 2001, JOURNAL OF ENGINEERING FOR GAS TURBINES AND POWER;Varma A, 1999, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Sharkey A, 2000, NEURAL COMPUTING AND APPLICATIONS;Guo Q, 2006, DIGITAL SIGNAL PROCESSING;Vingerhoeds R, 1995, CONTROL ENGINEERING PRACTICE;Hajnayeb A, 2008, INDUSTRIAL LUBRICATION AND TRIBOLOGY;Wu H, 2004, AIRCRAFT ENGINEERING AND AEROSPACE TECHNOLOGY;Zhang C, 2010, ;White M, 1991, MEASUREMENT;Schmidhuber J, 2014, NEURAL NETWORKS;Bolón‐Canedo V, 2012, KNOWLEDGE AND INFORMATION SYSTEMS;Dai X, 2013, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Lei Y, 2013, MEASUREMENT;Tamilselvan P, 2013, RELIABILITY ENGINEERING & SYSTEM SAFETY;Samanta B, 2003, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Samanta B, 2003, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Lei Y, 2007, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Sugumaran V, 2006, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Abbasion S, 2007, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Yang Y, 2006, MEASUREMENT;Safizadeh M, 2013, INFORMATION FUSION;Widodo A, 2006, EXPERT SYSTEMS WITH APPLICATIONS;Widodo A, 2006, EXPERT SYSTEMS WITH APPLICATIONS;Wu S, 2012, ENTROPY;Sakthivel N, 2009, EXPERT SYSTEMS WITH APPLICATIONS;Jegadeeshwaran R, 2014, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Sheng-fa Y, 2005, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Wang Y, 2013, APPLIED ACOUSTICS;Hajnayeb A, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Chen F, 2012, MEASUREMENT;Saravanan N, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Yang D, 2015, MECHANISM AND MACHINE THEORY;Yang J, 2001, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Chen F, 2013, MEASUREMENT;Sugumaran V, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Ebersbach S, 2006, EXPERT SYSTEMS WITH APPLICATIONS;Jiang L, 2013, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Wong M, 2005, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Rojas‐Domínguez A, 2005, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Salem S, 2012, ISA TRANSACTIONS;WU J, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Lu W, 2013, JOURNAL OF SOUND AND VIBRATION;Widodo A, 2009, NONDESTRUCTIVE TESTING AND EVALUATION;Hernandez-Vargas M, 2014, COMPUTERS & ELECTRICAL ENGINEERING;Wu J, 2010, EXPERT SYSTEMS WITH APPLICATIONS;Wu J, 2009, EXPERT SYSTEMS WITH APPLICATIONS;Kang M, 2014, IEEE TRANSACTIONS ON MAGNETICS;Ao H, 2013, JOURNAL OF VIBRATION AND CONTROL;Vong C, 2011, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Lei Y, 2009, JOURNAL OF VIBRATION AND ACOUSTICS;Barakat M, 2013, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Jiang L, 2012, JOURNAL OF VIBRATION AND CONTROL;Jena D, 2013, APPLIED ACOUSTICS;Xiao W, 2011, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Ilott P, 1997, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART E JOURNAL OF PROCESS MECHANICAL ENGINEERING;Chen X, 2014, APPLIED MATHEMATICS AND COMPUTATION;Wu J, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Yu J, 2015, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Cortes C, 1995, MACHINE LEARNING;Quinlan J, 1992, ;淳司 柴, 2017, JOURNAL OF JAPAN SOCIETY FOR FUZZY THEORY AND INTELLIGENT INFORMATICS;Hinton G, 2006, SCIENCE;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Peng H, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Hinton G, 2002, NEURAL COMPUTATION;Tibshirani R, 2004, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Long M, 2013, ;Тихонов А, 1995, ;Dai W, 2007, ;Widodo A, 2007, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Liao S, 2004, EXPERT SYSTEMS WITH APPLICATIONS;Gao Z, 2015, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Naimi A, 2014, AMERICAN JOURNAL OF EPIDEMIOLOGY;Lei Y, 2010, EXPERT SYSTEMS WITH APPLICATIONS;Lei Y, 2009, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Sun W, 2006, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Muruganatham B, 2012, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Ebrahimi B, 2010, IEEE TRANSACTIONS ON POWER ELECTRONICS;Yang B, 2008, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Tang B, 2013, RENEWABLE ENERGY;Rafiee J, 2008, EXPERT SYSTEMS WITH APPLICATIONS;Li Z, 2012, MEASUREMENT;Ghate V, 2010, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Li N, 2011, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Jayaswal P, 2010, JOURNAL OF VIBRATION AND CONTROL;Yang B, 2005, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Tsoumas I, 2008, IEEE TRANSACTIONS ON ENERGY CONVERSION;Cerrada M, 2015, SENSORS;Lee H, 2000, IEEE TRANSACTIONS ON POWER DELIVERY;Bordoloi D, 2014, MEASUREMENT;Xian G, 2009, EXPERT SYSTEMS WITH APPLICATIONS;Bacha K, 2012, INTERNATIONAL JOURNAL OF ELECTRICAL POWER & ENERGY SYSTEMS;Wang Y, 2011, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Knapp G, 1992, INTERNATIONAL JOURNAL OF PRODUCTION RESEARCH;Namdari M, 2014, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Shahriar M, 2013, EURASIP JOURNAL ON IMAGE AND VIDEO PROCESSING;Barakat M, 2012, INTERNATIONAL JOURNAL OF MACHINE LEARNING AND CYBERNETICS;Becerra J, 2011, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;McCormick A, 1997, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;McCormick A, 1997, IEEE TRANSACTIONS ON NEURAL NETWORKS;Liu S, 2003, THE INTERNATIONAL JOURNAL OF ADVANCED MANUFACTURING TECHNOLOGY;Liu T, 2013, JOURNAL OF VIBRATION AND CONTROL;Almeida L, 2014, JOURNAL OF VIBRATION AND CONTROL;S V, 2012, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART J JOURNAL OF ENGINEERING TRIBOLOGY;Angeli C, 1999, EXPERT SYSTEMS;Chen H, 2013, DOAJ (DOAJ: DIRECTORY OF OPEN ACCESS JOURNALS);Shen Y, 2006, ;He K, 2016, ;Pan S, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;, 2013, CHOICE REVIEWS ONLINE;Jia F, 2015, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;İnce T, 2016, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Lei Y, 2016, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Rai A, 2016, TRIBOLOGY INTERNATIONAL;Lü C, 2016, SIGNAL PROCESSING;Lu W, 2016, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Shao H, 2015, MEASUREMENT SCIENCE AND TECHNOLOGY;Li C, 2016, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Zheng J, 2016, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Li C, 2015, NEUROCOMPUTING;Liu H, 2016, SHOCK AND VIBRATION;Long M, 2016, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Yang T, 2016, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Ayhan B, 2006, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Zhang X, 2015, KNOWLEDGE-BASED SYSTEMS;Yin J, 2016, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Sadeghian A, 2009, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Xie J, 2016, ;Zhou H, 2015, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Głowacz A, 2016, APPLIED ACOUSTICS;Malik H, 2016, IET RENEWABLE POWER GENERATION;Shen F, 2015, ;Asr M, 2016, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Goyal D, 2016, ARCHIVES OF COMPUTATIONAL METHODS IN ENGINEERING;Li Y, 2016, MECHANISM AND MACHINE THEORY;Yang Z, 2016, ENERGIES;Liu H, 2016, JOURNAL OF SOUND AND VIBRATION;Liu Y, 2015, NEUROCOMPUTING;Li Z, 2012, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Waqar T, 2016, MEASUREMENT;Abu-Mahfouz I, 2005, INTERNATIONAL JOURNAL OF GENERAL SYSTEMS;Kane P, 2016, JOURNAL OF LOW FREQUENCY NOISE, VIBRATION AND ACTIVE CONTROL;Chen J, 2016, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Dong S, 2015, JOURNAL OF THE BRAZILIAN SOCIETY OF MECHANICAL SCIENCES AND ENGINEERING;Sun J, 2014, TM - TECHNISCHES MESSEN;Palácios R, 2016, JOURNAL OF CONTROL AUTOMATION AND ELECTRICAL SYSTEMS;An X, 2016, TRANSACTIONS OF THE INSTITUTE OF MEASUREMENT AND CONTROL;Dong S, 2015, MEASUREMENT AND CONTROL;Khazaee M, 2016, STRUCTURAL HEALTH MONITORING;Wong P, 2016, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Lee S, 2009, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Jiang X, 2015, JOURNAL OF VIBROENGINEERING;Heidari M, 2016, JOURNAL OF VIBROENGINEERING;Liu Z, 2012, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Khazaee M, 2016, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART D JOURNAL OF AUTOMOBILE ENGINEERING;Jin X, 2014, JOURNAL OF VIBROENGINEERING;Goodfellow I, 2016, MIT PRESS EBOOKS;Zhāng W, 2017, SENSORS;Zhāng W, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Chen Z, 2017, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Xia M, 2017, IEEE/ASME TRANSACTIONS ON MECHATRONICS;Jing L, 2017, MEASUREMENT;Shao H, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Jia F, 2017, NEUROCOMPUTING;Ding X, 2017, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Lü C, 2017, ADVANCED ENGINEERING INFORMATICS;Zhang R, 2017, IEEE ACCESS;Jing L, 2017, SENSORS;Shao H, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Wang Z, 2017, IEEE SENSORS JOURNAL;Liu R, 2016, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Sun W, 2017, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Shao H, 2017, ISA TRANSACTIONS;Li Y, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Eren L, 2017, MATHEMATICAL PROBLEMS IN ENGINEERING;Li S, 2017, SENSORS;Janssens O, 2017, IEEE/ASME TRANSACTIONS ON MECHATRONICS;Fuan W, 2017, MEASUREMENT SCIENCE AND TECHNOLOGY;Gangsar P, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Ahmed H, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Xia M, 2017, IET SCIENCE MEASUREMENT & TECHNOLOGY;Sun W, 2017, MATERIALS;Li C, 2017, IEEE ACCESS;Guo X, 2016, APPLIED SCIENCES;He J, 2017, SENSORS;Martínez-Morales J, 2016, ELECTRICAL ENGINEERING;Gao Z, 2017, NEUROCOMPUTING;Xing Z, 2017, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Cheng F, 2016, IEEE TRANSACTIONS ON INDUSTRY APPLICATIONS;Qu Y, 2017, APPLIED SCIENCES;Merainani B, 2017, JOURNAL OF VIBRATION AND CONTROL;Ma J, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Cai C, 2017, CLUSTER COMPUTING;Li Z, 2016, JOURNAL OF PROPULSION AND POWER;Yao B, 2017, IEEE ACCESS;Chen C, 2017, ;Islam M, 2017, THE JOURNAL OF THE ACOUSTICAL SOCIETY OF AMERICA;He X, 2016, MATHEMATICAL PROBLEMS IN ENGINEERING;Wu L, 2017, APPLIED SCIENCES;Jiang H, 2017, JOURNAL OF VIBROENGINEERING;Yan J, 2016, JOURNAL OF VIBROENGINEERING;Xu T, 2017, JOURNAL OF INTELLIGENT & FUZZY SYSTEMS;Wang J, 2016, ;Heidari M, 2017, JOURNAL OF VIBROENGINEERING;Gerdes M, 2016, EKSPLOATACJA I NIEZAWODNOSC - MAINTENANCE AND RELIABILITY;Wen L, 2017, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Liu R, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Khan S, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Wen L, 2017, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS SYSTEMS;Li Y, 2018, PATTERN RECOGNITION;Liu H, 2018, ISA TRANSACTIONS;Jia F, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Shao H, 2017, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Cao P, 2018, IEEE ACCESS;Shao H, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Sun J, 2017, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Verstraete D, 2017, SHOCK AND VIBRATION;Zhao M, 2017, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Shao H, 2017, KNOWLEDGE-BASED SYSTEMS;Ma M, 2018, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Wang S, 2017, KNOWLEDGE-BASED SYSTEMS;Guo S, 2018, SENSORS;Zhang Y, 2018, JOURNAL OF MANUFACTURING SYSTEMS;Duan Z, 2018, THE INTERNATIONAL JOURNAL OF ADVANCED MANUFACTURING TECHNOLOGY;Li Y, 2017, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Li Y, 2018, JOURNAL OF SOUND AND VIBRATION;Islam M, 2018, RELIABILITY ENGINEERING & SYSTEM SAFETY;Shao S, 2017, CHINESE JOURNAL OF MECHANICAL ENGINEERING;Park S, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Venkateswara H, 2017, IEEE SIGNAL PROCESSING MAGAZINE;Berredjem T, 2018, EXPERT SYSTEMS WITH APPLICATIONS;Chen Y, 2018, NEUROCOMPUTING;Shao H, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Zhang X, 2017, NEUROCOMPUTING;Tang S, 2018, NEUROCOMPUTING;Zhu X, 2018, IEEE ACCESS;Praveenkumar T, 2017, MEASUREMENT;Yu D, 2018, INTERNATIONAL JOURNAL OF ELECTRICAL POWER & ENERGY SYSTEMS;Sohaib M, 2018, SHOCK AND VIBRATION;Gangsar P, 2018, JOURNAL OF DYNAMIC SYSTEMS MEASUREMENT AND CONTROL;Huang D, 2018, MICROELECTRONICS RELIABILITY;Naha A, 2017, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Vanraj, 2017, STRUCTURAL HEALTH MONITORING;Zgarni S, 2018, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Han D, 2017, ADVANCES IN MECHANICAL ENGINEERING;Dong S, 2017, JOURNAL OF LOW FREQUENCY NOISE, VIBRATION AND ACTIVE CONTROL;Duong B, 2018, SENSORS;Tang T, 2018, JOURNAL OF SOUND AND VIBRATION;Wang X, 2017, JOURNAL OF VIBROENGINEERING;Yu J, 2018, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Tyagi S, 2017, APPLIED ARTIFICIAL INTELLIGENCE;Yuan H, 2017, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Safizadeh M, 2017, INSIGHT - NON-DESTRUCTIVE TESTING AND CONDITION MONITORING;Jiang Q, 2017, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Jiang F, 2018, ADVANCES IN MECHANICAL ENGINEERING;Zhao R, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Shao S, 2018, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Guo L, 2018, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Jiang G, 2018, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Hoang D, 2018, NEUROCOMPUTING;Stetco A, 2018, RENEWABLE ENERGY;Eren L, 2018, JOURNAL OF SIGNAL PROCESSING SYSTEMS;Li X, 2018, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Zhu Z, 2018, NEUROCOMPUTING;Li X, 2018, NEUROCOMPUTING;, 2017, ELSEVIER EBOOKS;Liu G, 2018, MATHEMATICAL PROBLEMS IN ENGINEERING;Meng Z, 2018, MEASUREMENT;Han T, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Zhao M, 2018, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Shen C, 2018, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Guo Y, 2018, APPLIED ENERGY;Zhang B, 2018, IEEE ACCESS;Guo D, 2018, NEUROCOMPUTING;Li Y, 2018, JOURNAL OF SOUND AND VIBRATION;Han Y, 2018, MEASUREMENT;Mao W, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Jiao J, 2018, KNOWLEDGE-BASED SYSTEMS;Yao Y, 2018, APPLIED SCIENCES;Jafarian K, 2018, MEASUREMENT;Hasan M, 2018, APPLIED SCIENCES;Tong Z, 2018, IEEE ACCESS;Zhao X, 2018, NEUROCOMPUTING;Appana D, 2018, SOFT COMPUTING;Duan L, 2018, JOURNAL OF INTELLIGENT & FUZZY SYSTEMS;Jin Y, 2018, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Xie J, 2018, IEEE ACCESS;Wang J, 2018, MEASUREMENT SCIENCE AND TECHNOLOGY;Ahmed H, 2018, IEEE ACCESS;Singh M, 2018, MEASUREMENT;Rapur J, 2018, JOURNAL OF THE BRAZILIAN SOCIETY OF MECHANICAL SCIENCES AND ENGINEERING;Zhuang Y, 2018, MEASUREMENT SCIENCE AND TECHNOLOGY;Rapur J, 2018, JOURNAL OF NONDESTRUCTIVE EVALUATION;Zabihihesari A, 2018, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART C JOURNAL OF MECHANICAL ENGINEERING SCIENCE;Yu J, 2018, JOURNAL OF SOUND AND VIBRATION;Zheng Y, 2019, SENSORS;Saufi M, 2018, MEASUREMENT SCIENCE AND TECHNOLOGY;Zhang M, 2018, IEEE TRANSACTIONS ON INSTRUMENTATION AND MEASUREMENT;Jiang F, 2018, APPLIED SCIENCES;Jiang H, 2018, JOURNAL OF INTELLIGENT & FUZZY SYSTEMS;Zhou P, 2018, APPLIED SCIENCES;Yu H, 2018, IEEE SIGNAL PROCESSING LETTERS;Liu L, 2018, JOURNAL OF SOUND AND VIBRATION;Xin Y, 2018, JOURNAL OF VIBROENGINEERING;Chen Z, 2018, JOURNAL OF INTELLIGENT & FUZZY SYSTEMS;Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Cortes C, 1995, MACHINE LEARNING;Gu J, 2017, PATTERN RECOGNITION;Yang B, 2019, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Xu Y, 2019, IEEE ACCESS;Han T, 2018, KNOWLEDGE-BASED SYSTEMS;We Z, 2018, ISA TRANSACTIONS;Qiao Z, 2019, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Yang B, 2019, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Peng D, 2018, IEEE ACCESS;Zhao D, 2019, COMPUTERS IN INDUSTRY;Huang R, 2018, IEEE ACCESS;Cao X, 2019, COMPUTERS IN INDUSTRY;Islam M, 2019, COMPUTERS IN INDUSTRY;Wang X, 2019, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Zheng H, 2019, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Liu X, 2019, SENSORS;Xu X, 2019, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Ma S, 2019, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Han Y, 2019, COMPUTERS IN INDUSTRY;Li X, 2019, APPLIED SCIENCES;Su L, 2019, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Qian W, 2018, IEEE ACCESS;Guo S, 2018, SENSORS;Suh S, 2019, APPLIED SCIENCES;Tong Z, 2018, SHOCK AND VIBRATION;Chen H, 2013, SHOCK AND VIBRATION;Li J, 2019, SENSORS;Gangsar P, 2019, JOURNAL OF THE BRAZILIAN SOCIETY OF MECHANICAL SCIENCES AND ENGINEERING;Li Y, 2019, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Tang G, 2018, APPLIED SCIENCES;Xie Y, 2018, ;Pang B, 2018, ENTROPY;Yang B, 2018, 2018 INTERNATIONAL CONFERENCE ON SENSING,DIAGNOSTICS, PROGNOSTICS, AND CONTROL (SDPC);Qian W, 2018, ;Chen X, 2019, COMPLEXITY;Yu J, 2018, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Jia Y, 2018, SENSORS;Zhang X, 2019, JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY;Chunfeng W, 2018, ",,,OPENALEX,"Lei Y, 2020, MECHANICAL SYSTEMS AND SIGNAL PROCESSING","Lei Y, 2020, MECHANICAL SYSTEMS AND SIGNAL PROCESSING" +https://openalex.org/W2791315675,10.1016/j.neucom.2017.11.077,Feature selection in machine learning: A new perspective,2018,en,article,2055,NEUROCOMPUTING,Neurocomputing,Jie Cai;Jiawei Luo;Shulin Wang;Sheng Yang,Jie Cai;Jiawei Luo;Shulin Wang;Sheng Yang,"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China;College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China;College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China;College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","Sheng Yang (corresponding author), College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China",,300,,70,79,Feature selection;Machine learning;Computer science;Artificial intelligence;Cluster analysis;Feature (linguistics);Perspective (graphical);Selection (genetic algorithm);Unsupervised learning;Supervised learning;Feature learning;Data mining;Artificial neural network,CN,"1959- B, 1994, CHOICE REVIEWS ONLINE;Kira K, 1992, ELSEVIER EBOOKS;Kononenko I, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Fayyad U, 1993, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Agrawal R, 1998, ;Liu H, 1998, ;Hall M, 2000, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);, 2007, ;Zou Q, 2015, IEEE GEOSCIENCE AND REMOTE SENSING LETTERS;Lazar C, 2012, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;Xue B, 2013, APPLIED SOFT COMPUTING;Langley P, 1995, MEDICAL ENTOMOLOGY AND ZOOLOGY;Japkowicz N, 2000, ;Zou Q, 2015, NEUROCOMPUTING;Rauber T, 2014, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Bolón‐Canedo V, 2015, KNOWLEDGE-BASED SYSTEMS;Opitz D, 1999, ;Chen Y, 2009, PATTERN RECOGNITION LETTERS;Deng J, 2011, ;Perkins S, 2003, ;Akadi A, 2010, KNOWLEDGE AND INFORMATION SYSTEMS;Wu X, 2010, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Yu K, 2014, ;Zhang K, 2011, NEUROCOMPUTING;Lu J, 2008, KNOWLEDGE-BASED SYSTEMS;Hsu W, 2003, INFORMATION SCIENCES;Li Y, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Novovičová J, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Liu H, 2011, ;Benabdeslem K, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Zhang Y, 2012, NEUROCOMPUTING;Goltsev A, 2011, NEURAL NETWORKS;Shen L, 2006, EURASIP JOURNAL ON ADVANCES IN SIGNAL PROCESSING;Jie B, 2013, LECTURE NOTES IN COMPUTER SCIENCE;Liu Q, 2014, ;Vaithyanathan S, 1999, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Dash M, 1999, INTERNATIONAL CONFERENCE ON MANAGEMENT OF DATA;Kwak N, 2003, ;Wang L, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Gao Y, 2012, MOLECULAR BIOSYSTEMS;Ienco D, 2008, ;Doquire G, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Deepthi P, 2015, ;Mirkin B, 1999, MACHINE LEARNING;Cheng H, 2011, COMMUNICATIONS IN COMPUTER AND INFORMATION SCIENCE;Liu H, 2004, IN SILICO BIOLOGY;Yang W, 2011, ;, 2007, ;Langley P, 1994, ;Bonev B, 2010, TESIS DOCTORALS EN XARXA (CONSORCI DE SERVEIS UNIVERSITARIS DE CATALUNYA);Efron B, 2004, THE ANNALS OF STATISTICS;Zou H, 2006, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Strawderman R, 2000, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Blum A, 1997, ARTIFICIAL INTELLIGENCE;Liu H, 2005, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Marshall J, 1995, NEURAL NETWORKS;Swets D, 1996, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Mitra P, 2002, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Weinberger K, 2009, ;Dy J, 2004, ;Świniarski R, 2002, PATTERN RECOGNITION LETTERS;Witten D, 2010, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Zhang J, 2006, APPLIED MATHEMATICS AND COMPUTATION;Song Q, 2011, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Chuang L, 2007, COMPUTATIONAL BIOLOGY AND CHEMISTRY;Amiri F, 2011, JOURNAL OF NETWORK AND COMPUTER APPLICATIONS;Huang D, 1999, INTERNATIONAL JOURNAL OF PATTERN RECOGNITION AND ARTIFICIAL INTELLIGENCE;Stein G, 2005, ;Li B, 2008, PATTERN RECOGNITION;Zhang D, 2007, PATTERN RECOGNITION;Sotoca J, 2009, PATTERN RECOGNITION;Zhao J, 2008, NEUROCOMPUTING;Bolón‐Canedo V, 2011, PATTERN RECOGNITION;Zhang L, 2014, PATTERN RECOGNITION;Huang D, 2013, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;Figueredo J, 2013, EXPERT SYSTEMS WITH APPLICATIONS;Rashedi E, 2012, KNOWLEDGE-BASED SYSTEMS;Alazab A, 2012, ;Shu W, 2014, PATTERN RECOGNITION;Liu Y, 2012, NEUROCOMPUTING;Sun Z, 2005, DIGITAL SIGNAL PROCESSING;Zhao X, 2013, PROCEDIA COMPUTER SCIENCE;Khotanzad A, 1990, PATTERN RECOGNITION;Chiang L, 2003, JOURNAL OF PROCESS CONTROL;Ahn H, 2007, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Liu H, 2010, PATTERN RECOGNITION;Kalakech M, 2010, PATTERN RECOGNITION LETTERS;Bock K, 2010, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Herman G, 2013, PATTERN RECOGNITION;Li H, 2014, APPLIED SOFT COMPUTING;Li G, 2008, ;Zhang Y, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Khoshgoftaar T, 2012, ;Álvarez-Estévez D, 2010, EXPERT SYSTEMS WITH APPLICATIONS;Vasconcelos M, 2009, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Wang J, 2010, PROCEDIA COMPUTER SCIENCE;Chen L, 2010, ;Gibert J, 2012, PATTERN RECOGNITION LETTERS;Lv S, 2013, ;Furlanello C, 2005, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Han J, 2012, CHOICE REVIEWS ONLINE;Quinlan J, 1992, ;Zou H, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Peng H, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Guyon I, 2002, MACHINE LEARNING;Ho T, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Rodríguez Á, 2014, SCIENCE;Lewis D, 2004, GOLDSMITHS (UNIVERSITY OF LONDON);Battiti R, 1994, IEEE TRANSACTIONS ON NEURAL NETWORKS;Yu L, 2004, ;He X, 2005, ;Vergara J, 2013, NEURAL COMPUTING AND APPLICATIONS;Fleuret F, 2004, ;Saeys Y, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Abeel T, 2009, BIOINFORMATICS;Dhillon I, 2003, ;Liu H, 2002, PUBMED;Huang D, 2008, IEEE TRANSACTIONS ON NEURAL NETWORKS;Lee C, 2005, INFORMATION PROCESSING & MANAGEMENT;Yang H, 1999, ;Lin W, 2012, BRIEFINGS IN BIOINFORMATICS;Wu X, 2012, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Huang D, 2006, BIOINFORMATICS;Jing L, 2003, ;Chun-Hou Z, 2009, IEEE TRANSACTIONS ON INFORMATION TECHNOLOGY IN BIOMEDICINE;Au W, 2005, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;Tan M, 2014, DR-NTU (NANYANG TECHNOLOGICAL UNIVERSITY);Zhou J, 2006, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Derrac J, 2011, INFORMATION SCIENCES;Javed K, 2011, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Sun Z, 2005, IEEE GEOSCIENCE AND REMOTE SENSING LETTERS;Crowley J, 1984, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Devaney M, 1997, ;Novovičová J, 1996, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Hastie T, 2004, BIOSTATISTICS;Benabdeslem K, 2013, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Doquire G, 2013, NEUROCOMPUTING;Choi J, 2010, IEEE TRANSACTIONS ON IMAGE PROCESSING;Lane M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Swets D, 2002, ;Landeghem S, 2010, BIOINFORMATICS;Vasconcelos N, 2003, ;Michalak K, 2006, DIGITAL LIBRARY OF ZIELONA GORA (UNIVERSITY OF ZIELONA GÓRA);Saxena A, 2010, FUZZY INFORMATION AND ENGINEERING;Cheng H, 2008, ;Yang M, 2010, ;Padungweang P, 2009, ;Vandenbroucke N, 2000, ;Breiman L, 2001, MACHINE LEARNING;GuyonIsabelle, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Pujol J, 2005, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Beier P, 2001, JOURNAL OF WILDLIFE MANAGEMENT;Tang J, 2014, ;, 2002, ;Agrawal R, 1998, ACM SIGMOD RECORD;Ang J, 2015, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;FleuretFrançois, 2004, JOURNAL OF MACHINE LEARNING RESEARCH;, 2005, PROCEEDINGS. 2005 IEEE INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORKS, 2005.;Brazdil P, 1993, ;Sheikhpour R, 2016, PATTERN RECOGNITION;, 2004, ;Alelyani S, 2018, ;Wang L, 2016, METHODS;, 2001, ;Sleeman D, 1992, ;Tsai T, 2011, BIOINFORMATICS AND BIOMEDICINE;Hastie T, 2004, BIOSTATISTICS;Wang Y, 2016, PATTERN RECOGNITION;Ruangkanokmas P, 2016, ;Bakhtiari A, 2011, ;Zhong J, 2015, TSINGHUA SCIENCE & TECHNOLOGY;Martín-Smith P, 2017, NEUROCOMPUTING;Singh V, 2016, ;Wang S, 2012, BMC BIOINFORMATICS;Huang J, 2007, ;Liu Y, 2010, ;Mirzaei A, 2017, NEUROCOMPUTING;Yang X, 2016, MULTIMEDIA TOOLS AND APPLICATIONS;Zhou P, 2015, ;Oberlander J, 1991, CONFERENCE COGNITIVE SCIENCE;Vasconcelos N, 2004, ;Alibeigi M, 2011, ;Antoniades A, 2016, ;Yu Y, 2011, COMMUNICATIONS IN COMPUTER AND INFORMATION SCIENCE;, 2008, ;, 2010, ;, 2013, MULTIMEDIA TOOLS AND APPLICATIONS;, 2004, ;, 2009, ;Chid A, 2007, ;, 2002, ;Song L, 2007, ARXIV.ORG;, 2015, ",,,OPENALEX,"Cai J, 2018, NEUROCOMPUTING","Cai J, 2018, NEUROCOMPUTING" +https://openalex.org/W2910705748,10.1073/pnas.1900654116,"Definitions, methods, and applications in interpretable machine learning",2019,en,article,2070,PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES,Proceedings of the National Academy of Sciences,William J. Murdoch;Chandan Singh;Karl Kumbier;Reza Abbasi-Asl;Bin Yu,W. James Murdoch;Chandan Singh;Karl Kumbier;Reza Abbasi-Asl;Bin Yu,"Statistics Department, University of California, Berkeley, CA 94720;Electrical Engineering and Computer Science Department, University of California, Berkeley, CA 94720;Statistics Department, University of California, Berkeley, CA 94720;Allen Institute for Brain Science, Seattle, WA 98109;Department of Neurology, University of California, San Francisco, CA 94158;Electrical Engineering and Computer Science Department, University of California, Berkeley, CA 94720;Department of Neurology, University of California, San Francisco, CA 94158 and;Electrical Engineering and Computer Science Department, University of California, Berkeley, CA 94720;Statistics Department, University of California, Berkeley, CA 94720","Bin Yu (corresponding author), Electrical Engineering and Computer Science Department, University of California, Berkeley, CA 94720;; Statistics Department, University of California, Berkeley, CA 94720;","Machine-learning models have demonstrated great success in learning complex patterns that enable them to make predictions about unobserved data. In addition to using models for prediction, the ability to interpret what a model has learned is receiving an increasing amount of attention. However, this increased focus has led to considerable confusion about the notion of interpretability. In particular, it is unclear how the wide array of proposed interpretation methods are related and what common concepts can be used to evaluate them. We aim to address these concerns by defining interpretability in the context of machine learning and introducing the predictive, descriptive, relevant (PDR) framework for discussing interpretations. The PDR framework provides 3 overarching desiderata for evaluation: predictive accuracy, descriptive accuracy, and relevancy, with relevancy judged relative to a human audience. Moreover, to help manage the deluge of interpretation methods, we introduce a categorization of existing techniques into model-based and post hoc categories, with subgroups including sparsity, modularity, and simulatability. To demonstrate how practitioners can use the PDR framework to evaluate and understand interpretations, we provide numerous real-world examples. These examples highlight the often underappreciated role played by human audiences in discussions of interpretability. Finally, based on our framework, we discuss limitations of existing methods and directions for future work. We hope that this work will provide a common vocabulary that will make it easier for both practitioners and researchers to discuss and choose from the full range of interpretation methods.",116,44,22071,22080,Interpretability;Computer science;Artificial intelligence;Categorization;Machine learning;Context (archaeology);Interpretation (philosophy);Modularity (biology);Vocabulary;Data science;Focus (optics),US,"Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Dalal N, 2005, ;Gordon A, 1984, BIOMETRICS;Wickham H, 2009, ;Zeiler M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Ribeiro M, 2016, ;Ginestet C, 2011, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Burnham K, 2004, SOCIOLOGICAL METHODS & RESEARCH;McKinney W, 2010, PROCEEDINGS OF THE PYTHON IN SCIENCE CONFERENCES;Bell A, 1995, NEURAL COMPUTATION;Goodall C, 1988, TECHNOMETRICS;Koller D, 2009, ;Hotelling H, 1936, BIOMETRIKA;boyd d, 2012, INFORMATION COMMUNICATION & SOCIETY;Pati Y, 2002, ;Breiman L, 2001, STATISTICAL SCIENCE;Pérez F, 2007, COMPUTING IN SCIENCE & ENGINEERING;Akaike H, 1987, SPRINGER SERIES IN STATISTICS;Ruppert D, 1987, TECHNOMETRICS;Olshausen B, 1997, VISION RESEARCH;Hastie T, 1986, STATISTICAL SCIENCE;Dwork C, 2012, ;Strobl C, 2008, BMC BIOINFORMATICS;Altmann A, 2010, BIOINFORMATICS;Box G, 1976, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Caruana R, 2015, ;Keil F, 2005, ANNUAL REVIEW OF PSYCHOLOGY;Hastie T, 1995, STATISTICAL METHODS IN MEDICAL RESEARCH;Friedman J, 2008, THE ANNALS OF APPLIED STATISTICS;Olden J, 2004, ECOLOGICAL MODELLING;Huang C, 2006, EXPERT SYSTEMS WITH APPLICATIONS;Letham B, 2015, THE ANNALS OF APPLIED STATISTICS;Lombrozo T, 2006, TRENDS IN COGNITIVE SCIENCES;Schulz S, 2005, ;Craven M, 1995, NEURAL INFORMATION PROCESSING SYSTEMS;Roe A, 2012, NEURON;Zaidan O, 2007, ;Freedman D, 1989, MATHEMATICAL SOCIAL SCIENCES;Amaratunga D, 2008, BIOINFORMATICS;Hooker G, 2007, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Wu S, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Lim C, 2015, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Caruana R, 1999, PUBMED;Brennan T, 2013, CRIMINOLOGY & PUBLIC POLICY;Shi T, 2008, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Springenberg J, 2014, ARXIV (CORNELL UNIVERSITY);Karpathy A, 2015, ARXIV (CORNELL UNIVERSITY);Baehrens D, 2009, ARXIV.ORG;Wei D, 2015, ARXIV (CORNELL UNIVERSITY);Barter R, 2015, ARXIV (CORNELL UNIVERSITY);Breiman L, 2001, MACHINE LEARNING;Flach P, 2015, ;Litjens G, 2017, MEDICAL IMAGE ANALYSIS;Teymur O, 2016, ;Charles-Maxime G, 2024, DAGSTUHL RESEARCH ONLINE PUBLICATION SERVER;Rudin C, 2019, NATURE MACHINE INTELLIGENCE;Ribeiro M, 2016, ;Guidotti R, 2019, ISTI OPEN PORTAL;Kluyver T, 2016, IOS PRESS EBOOKS;Goodman B, 2017, AI MAGAZINE;Jennison C, 1987, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Angermueller C, 2016, MOLECULAR SYSTEMS BIOLOGY;Datta A, 2016, ;Chakraborty S, 2017, ;Ancona M, 2018, REPOSITORY FOR PUBLICATIONS AND RESEARCH DATA (ETH ZURICH);Strobelt H, 2017, IEEE TRANSACTIONS ON VISUALIZATION AND COMPUTER GRAPHICS;Kim J, 2017, ;Basu S, 2018, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Zhang Q, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Guidotti R, 2018, ACM COMPUTING SURVEYS;Vu M, 2018, JOURNAL OF NEUROSCIENCE;Rudin C, 2018, ARXIV (CORNELL UNIVERSITY);Gilpin L, 2018, ARXIV (CORNELL UNIVERSITY);Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY);Selvaraju R, 2016, ARXIV (CORNELL UNIVERSITY);Strobelt H, 2016, REPEC: RESEARCH PAPERS IN ECONOMICS;Abbasi-Asl R, 2017, ARXIV (CORNELL UNIVERSITY);Pimentel H, 2018, QUANTITATIVE BIOLOGY;Lundberg S, 2017, ARXIV (CORNELL UNIVERSITY);Sundararajan M, 2017, ARXIV (CORNELL UNIVERSITY);Hardt M, 2016, ARXIV (CORNELL UNIVERSITY);Koh P, 2017, ARXIV (CORNELL UNIVERSITY);Adebayo J, 2018, ARXIV (CORNELL UNIVERSITY);Shrikumar A, 2016, ARXIV (CORNELL UNIVERSITY);Lundberg S, 2018, ARXIV (CORNELL UNIVERSITY);Zintgraf L, 2017, ARXIV (CORNELL UNIVERSITY);Papernot N, 2018, ARXIV (CORNELL UNIVERSITY);Frosst N, 2017, ARXIV (CORNELL UNIVERSITY);Murdoch W, 2018, ARXIV (CORNELL UNIVERSITY);Dabkowski P, 2017, ARXIV (CORNELL UNIVERSITY);Singh C, 2018, ARXIV (CORNELL UNIVERSITY);Murdoch W, 2017, ARXIV (CORNELL UNIVERSITY);Nie W, 2018, ARXIV (CORNELL UNIVERSITY);Kim J, 2017, ARXIV (CORNELL UNIVERSITY);Abbasi-Asl R, 2018, BIORXIV (COLD SPRING HARBOR LABORATORY);Zhang Q, 2017, ARXIV (CORNELL UNIVERSITY);Kumbier K, 2018, BIORXIV (COLD SPRING HARBOR LABORATORY);Tsang M, 2018, ARXIV (CORNELL UNIVERSITY);Kumbier K, 2018, ARXIV (CORNELL UNIVERSITY);Devlin S, 2019, ARXIV (CORNELL UNIVERSITY);Wickham H, 2016, USE R!;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;, 1996, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Jolliffe I, 1986, SPRINGER SERIES IN STATISTICS;Hotelling H, 1936, BIOMETRIKA;Olah C, 2017, DISTILL;A. R, 2016, MAX PLANCK DIGITAL LIBRARY;Box G, 1976, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Freedman D, 1991, SOCIOLOGICAL METHODOLOGY;Bell A, 1999, THE MIT PRESS EBOOKS;Yu B, 2013, BERNOULLI;Murdoch W, 2018, ARXIV (CORNELL UNIVERSITY);Singh C, 2018, ARXIV (CORNELL UNIVERSITY);Olden J, 2004, ECOLOGICAL MODELLING;, 1993, ;Wickham H, 2016, ;Varoquaux G, 2010, PROCEEDINGS OF THE PYTHON IN SCIENCE CONFERENCES",,,OPENALEX,"Murdoch W, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES","Murdoch W, 2019, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES" +https://openalex.org/W2342408547,10.1109/comst.2015.2494502,A Survey of Data Mining and Machine Learning Methods for Cyber Security Intrusion Detection,2015,en,article,3068,IEEE COMMUNICATIONS SURVEYS & TUTORIALS,IEEE Communications Surveys & Tutorials,Anna L. Buczak;Erhan Guven,Anna L. Buczak;Erhan Guven,"The Johns Hopkins University Applied Physics Laboratory, Laurel, MD, USA;The Johns Hopkins University Applied Physics Laboratory, Laurel, MD, USA",,"This survey paper describes a focused literature survey of machine learning (ML) and data mining (DM) methods for cyber analytics in support of intrusion detection. Short tutorial descriptions of each ML/DM method are provided. Based on the number of citations or the relevance of an emerging method, papers representing each method were identified, read, and summarized. Because data are so important in ML/DM approaches, some well-known cyber data sets used in ML/DM are described. The complexity of ML/DM algorithms is addressed, discussion of challenges for using ML/DM for cyber security is presented, and some recommendations on when to use a given method are provided.",18,2,1153,1176,Computer science;Intrusion detection system;Data mining;Relevance (law);Analytics;Intrusion;Data science;Machine learning,US,"Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Witten I, 2011, ELSEVIER EBOOKS;Ester M, 1996, ;Koza J, 1992, MEDICAL ENTOMOLOGY AND ZOOLOGY;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Sarle W, 1990, TECHNOMETRICS;Salzberg S, 1994, ;Jain A, 1996, COMPUTER;Jensen F, 2007, INFORMATION SCIENCE AND STATISTICS;Cohen W, 1995, ELSEVIER EBOOKS;Goldberg D, 1988, MACHINE LEARNING;Beyer H, 2002, NATURAL COMPUTING;Agrawal R, 1996, KNOWLEDGE DISCOVERY AND DATA MINING;Fayyad U, 1996, COMMUNICATIONS OF THE ACM;Huang G, 2011, INTERNATIONAL JOURNAL OF MACHINE LEARNING AND CYBERNETICS;Heckerman D, 2008, STUDIES IN COMPUTATIONAL INTELLIGENCE;Bhuyan M, 2013, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Lee W, 2003, ;Moiseev S, 1981, PHYSICA D NONLINEAR PHENOMENA;Baum L, 1967, BULLETIN OF THE AMERICAN MATHEMATICAL SOCIETY;Lippmann R, 2002, ;Lippmann R, 2000, COMPUTER NETWORKS;Zhang Y, 2003, WIRELESS NETWORKS;Kuok C, 1998, ACM SIGMOD RECORD;Bilge L, 2011, ;Amiri F, 2011, JOURNAL OF NETWORK AND COMPUTER APPLICATIONS;Shon T, 2007, INFORMATION SCIENCES;Lyon G, 2009, MEDICAL ENTOMOLOGY AND ZOOLOGY;Cannady J, 1998, NSUWORKS (NOVA SOUTHEASTERN UNIVERSITY);Li Y, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Leung K, 2005, ;Livadas C, 2006, ;Lippmann R, 2000, COMPUTER NETWORKS;Kruegel C, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Ahsan K, 2002, ;Sequeira K, 2002, ;Oliveto P, 2007, INTERNATIONAL JOURNAL OF AUTOMATION AND COMPUTING;Hu W, 2003, INTERNATIONAL CONFERENCE ON MACHINE LEARNING AND APPLICATIONS;Hu Y, 2004, ;Ariu D, 2011, COMPUTERS & SECURITY;Joshi S, 2005, ;Wagner C, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Jemili F, 2007, ;Årnes A, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Blowers M, 2013, ADVANCES IN INFORMATION SECURITY;Brahmi H, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Graczyk M, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Apiletti D, 2008, COMPUTER NETWORKS;Hendry G, 2008, PROCEEDINGS OF SPIE, THE INTERNATIONAL SOCIETY FOR OPTICAL ENGINEERING/PROCEEDINGS OF SPIE;Li Z, 2007, ;Breiman L, 2001, MACHINE LEARNING;Kennedy J, 2002, ;Watts D, 1998, NATURE;Vapnik V, 1995, ;Quinlan J, 1992, ;Hornik K, 1989, NEURAL NETWORKS;Dean J, 2008, COMMUNICATIONS OF THE ACM;Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Quinlan J, 1986, MACHINE LEARNING;Agrawal R, 1993, ;Friedman J, 1991, THE ANNALS OF STATISTICS;Dorigo M, 1997, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Freund Y, 1996, ;Flury B, 1989, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Forney G, 1973, PROCEEDINGS OF THE IEEE;Agrawal R, 2002, ;Tavallaee M, 2009, ;Polikar R, 2006, IEEE CIRCUITS AND SYSTEMS MAGAZINE;Caruana R, 2006, ;García‐Teodoro P, 2008, COMPUTERS & SECURITY;Thu T, 2008, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Wu S, 2009, APPLIED SOFT COMPUTING;Holland P, 1971, COMPARATIVE GROUP STUDIES;Zhang J, 2008, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Amor N, 2004, ;Sperotto A, 2010, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Mukkamala S, 2004, JOURNAL OF NETWORK AND COMPUTER APPLICATIONS;Kruegel C, 2004, ;Bilge L, 2014, ACM TRANSACTIONS ON INFORMATION AND SYSTEM SECURITY;Li W, 2004, NSUWORKS (NOVA SOUTHEASTERN UNIVERSITY);Bilge L, 2012, ;Tajbakhsh A, 2008, APPLIED SOFT COMPUTING;Panda M, 2007, ;Fan W, 2004, KNOWLEDGE AND INFORMATION SYSTEMS;Bivens A, 2002, ;Guazzelli A, 2009, THE R JOURNAL;Lu W, 2004, COMPUTATIONAL INTELLIGENCE;Luo J, 2000, INTERNATIONAL JOURNAL OF INTELLIGENT SYSTEMS;Abraham A, 2007, ;Hansen J, 2006, DECISION SUPPORT SYSTEMS;Gharibian F, 2007, ;Morel B, 2011, ;Benferhat S, 2008, ;Khan M, 2011, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Mukkamala S, 2004, ;Long P, 2007, ;Han H, 2003, ;Hussain O, 2007, ;Zadeh L, 1965, INFORMATION AND CONTROL;Zadeh L, 1996, ;Quinlan J, 1986, MACHINE LEARNING;HornikK., 1989, NEURAL NETWORKS;Chiang L, 2001, ADVANCED TEXTBOOKS IN CONTROL AND SIGNAL PROCESSING;Jensen F, 2001, ;Farmer J, 1986, PHYSICA D NONLINEAR PHENOMENA;Rahbar, 2008, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Ye N, 2013, ;Michalski R, 1983, MACHINE LEARNING;Michalski R, 1983, MACHINE LEARNING;Beyer H, 2007, SCHOLARPEDIA;Hu Z, 2008, ;Jolliffe I, 2005, ENCYCLOPEDIA OF STATISTICS IN BEHAVIORAL SCIENCE",,,OPENALEX,"Buczak A, 2015, IEEE COMMUNICATIONS SURVEYS & TUTORIALS","Buczak A, 2015, IEEE COMMUNICATIONS SURVEYS & TUTORIALS" +https://openalex.org/W2982720039,,UCI Repository of Machine Learning Databases,1996,en,article,2343,MEDICAL ENTOMOLOGY AND ZOOLOGY,Medical Entomology and Zoology,Christopher J. Merz,Christopher J. Merz,,"Christopher J. Merz (corresponding author), ",,,,,,Database;Computer science;Artificial intelligence,,,,,OPENALEX,"Merz C, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY","Merz C, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY" +https://openalex.org/W2968923792,10.1038/s41524-019-0221-0,Recent advances and applications of machine learning in solid-state materials science,2019,en,article,2394,NPJ COMPUTATIONAL MATERIALS,npj Computational Materials,Jonathan Schmidt;Mário R. G. Marques;Silvana Botti;Miguel A. L. Marques,Jonathan Schmidt;Mário R. G. Marques;Silvana Botti;Miguel A. L. Marques,"Institut für Physik, Martin-Luther-Universität, Halle-Wittenberg, Germany;Institut für Physik, Martin-Luther-Universität, 06120, Halle-Wittenberg, Halle (Saale), Germany;Institut für Physik, Martin-Luther-Universität, Halle-Wittenberg, Germany;Institut für Physik, Martin-Luther-Universität, 06120, Halle-Wittenberg, Halle (Saale), Germany;Institut für Festkörpertheorie und -optik, Friedrich-Schiller-Universität Jena, Jena, Germany;Institut für Festkörpertheorie und -optik, Friedrich-Schiller-Universität Jena, Max-Wien-Platz 1, 07743, Jena, Germany;Institut für Physik, Martin-Luther-Universität, Halle-Wittenberg, Germany;Institut für Physik, Martin-Luther-Universität, 06120, Halle-Wittenberg, Halle (Saale), Germany",,"Abstract One of the most exciting tools that have entered the material science toolbox in recent years is machine learning. This collection of statistical methods has already proved to be capable of considerably speeding up both fundamental and applied research. At present, we are witnessing an explosion of works that develop and apply machine learning to solid-state systems. We provide a comprehensive overview and analysis of the most recent research in this topic. As a starting point, we introduce machine learning principles, algorithms, descriptors, and databases in materials science. We continue with the description of different machine learning approaches for the discovery of stable materials and the prediction of their crystal structure. Then we discuss research in numerous quantitative structure–property relationships and various approaches for the replacement of first-principle methods by machine learning. We review how active learning and surrogate-based optimization can be applied to improve the rational design process and related examples of applications. Two major questions are always the interpretability of and the physical understanding gained from machine learning models. We consider therefore the different facets of interpretability and their importance in materials science. Finally, we propose solutions and future research paths for various challenges in computational materials science.",5,1,,,Interpretability;Toolbox;Machine learning;Artificial intelligence;Computer science;Process (computing);Property (philosophy);Point (geometry);Mathematics,DE,"Friedman J, 2001, THE ANNALS OF STATISTICS;He K, 2015, ;Zeiler M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Bednorz J, 1986, THE EUROPEAN PHYSICAL JOURNAL B;Murphy K, 2012, ;Jones D, 1998, JOURNAL OF GLOBAL OPTIMIZATION;Jensen F, 1998, ;Bach S, 2015, PLOS ONE;Strobl C, 2007, BMC BIOINFORMATICS;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Martin R, 2004, ;Gori M, 2006, PROCEEDINGS. 2005 IEEE INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORKS, 2005.;Thompson A, 2014, JOURNAL OF COMPUTATIONAL PHYSICS;Vert J, 2004, THE MIT PRESS EBOOKS;Jong M, 2015, SCIENTIFIC DATA;Braams B, 2009, INTERNATIONAL REVIEWS IN PHYSICAL CHEMISTRY;, 2005, CHOICE REVIEWS ONLINE;Kaner R, 2005, SCIENCE;Drucker H, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Guzella T, 2009, EXPERT SYSTEMS WITH APPLICATIONS;, 2008, CHOICE REVIEWS ONLINE;Seko A, 2015, PHYSICAL REVIEW LETTERS;Maddox J, 1988, NATURE;Chan P, 1998, ;Faber F, 2016, PHYSICAL REVIEW LETTERS;Kolen J, 2009, ;Lee J, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Ghasemi S, 2015, PHYSICAL REVIEW B;Isayev O, 2014, CHEMISTRY OF MATERIALS;Ramakrishnan R, 2015, THE JOURNAL OF CHEMICAL PHYSICS;Vellido A, 2012, ;Gaultois M, 2016, APL MATERIALS;Seko A, 2015, PHYSICAL REVIEW B;Pettifor D, 1988, MATERIALS SCIENCE AND TECHNOLOGY;Seko A, 2014, PHYSICAL REVIEW B;Sumpter B, 1992, CHEMICAL PHYSICS LETTERS;Yoon B, 2013, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Zakutayev A, 2014, CHEMISTRY OF MATERIALS;Broderick S, 2011, JOURNAL OF THE AMERICAN CERAMIC SOCIETY;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Siddorn M, 2015, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Wang Y, 2015, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Liu S, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Meremianin A, 2006, JOURNAL OF PHYSICS A MATHEMATICAL AND GENERAL;Owolabi T, 2014, JOURNALS & BOOKS HOSTING (INTERNATIONAL KNOWLEDGE SHARING PLATFORM);Johnson D, 2013, ELSEVIER EBOOKS;Lindström D, 2015, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Bruna J, 2013, ARXIV (CORNELL UNIVERSITY);Weyl H, , ;Baldi P, 1998, ;Perdew J, 1996, PHYSICAL REVIEW LETTERS;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;McCulloch W, 1943, BULLETIN OF MATHEMATICAL BIOLOGY;Jain A, 2013, APL MATERIALS;Hill R, 1952, PROCEEDINGS OF THE PHYSICAL SOCIETY SECTION A;Tran F, 2009, PHYSICAL REVIEW LETTERS;Ong S, 2012, COMPUTATIONAL MATERIALS SCIENCE;Schmidt M, 2009, SCIENCE;Wang Y, 2012, COMPUTER PHYSICS COMMUNICATIONS;Saal J, 2013, JOM;Behler J, 2011, THE JOURNAL OF CHEMICAL PHYSICS;Kitchin J, 2004, PHYSICAL REVIEW LETTERS;Loader C, 1999, STATISCTICS AND COMPUTING/STATISTICS AND COMPUTING;Martoňák R, 2003, PHYSICAL REVIEW LETTERS;Leininger T, 1997, CHEMICAL PHYSICS LETTERS;Kohn W, 1965, PHYSICAL REVIEW LETTERS;Pickard C, 2006, PHYSICAL REVIEW LETTERS;Tsuneyuki S, 1988, PHYSICAL REVIEW LETTERS;Hautier G, 2010, CHEMISTRY OF MATERIALS;Fischer C, 2006, NATURE MATERIALS;Koskinen P, 2009, COMPUTATIONAL MATERIALS SCIENCE;Lüders M, 2005, PHYSICAL REVIEW B;Éliashberg G, 1960, JOURNAL OF EXPERIMENTAL AND THEORETICAL PHYSICS;Matthias B, 1955, PHYSICAL REVIEW;Szlachta W, 2014, PHYSICAL REVIEW B;Cśanyi G, 2004, PHYSICAL REVIEW LETTERS;Schön J, 1996, ANGEWANDTE CHEMIE INTERNATIONAL EDITION IN ENGLISH;Baskes M, 1997, MATERIALS CHEMISTRY AND PHYSICS;Pannetier J, 1990, NATURE;Becker C, 2013, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Sosso G, 2012, PHYSICAL REVIEW B;Harper P, 1955, PROCEEDINGS OF THE PHYSICAL SOCIETY SECTION A;Johnson S, 2007, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Bajorath J, 2009, DRUG DISCOVERY TODAY;Kuz’min V, 2008, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Ruggiu F, 2010, MOLECULAR INFORMATICS;Pettifor D, 1986, JOURNAL OF PHYSICS C SOLID STATE PHYSICS;Lam P, 1987, PHYSICAL REVIEW. B, CONDENSED MATTER;Schneider G, 2008, TRENDS IN BIOTECHNOLOGY;Owolabi T, 2014, JOURNAL OF SUPERCONDUCTIVITY AND NOVEL MAGNETISM;Moussa J, 2012, PHYSICAL REVIEW LETTERS;Xu B, 2013, SCIENTIFIC REPORTS;Sanville E, 2008, JOURNAL OF PHYSICS CONDENSED MATTER;Kadantsev E, 2004, PHYSICAL REVIEW A;Abdellahi M, 2014, CERAMICS INTERNATIONAL;Villars P, 1988, PHYSICAL REVIEW. B, CONDENSED MATTER;Sinkov N, 2010, TALANTA;Matos C, 2007, ANALYTICAL CHEMISTRY;Cohen M, 1988, MATERIALS SCIENCE AND ENGINEERING A;Doll K, 2008, JOURNAL OF PHYSICS CONFERENCE SERIES;Hochreiter S, 1997, NEURAL COMPUTATION;MacKerell A, 1998, THE JOURNAL OF PHYSICAL CHEMISTRY B;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Geurts P, 2006, MACHINE LEARNING;Daw M, 1984, PHYSICAL REVIEW. B, CONDENSED MATTER;Stillinger F, 1985, PHYSICAL REVIEW. B, CONDENSED MATTER;Behler J, 2007, PHYSICAL REVIEW LETTERS;Goldschmidt V, 1926, DIE NATURWISSENSCHAFTEN;Ackley D, 1985, COGNITIVE SCIENCE;Bartók A, 2013, PHYSICAL REVIEW B;Oganov A, 2006, THE JOURNAL OF CHEMICAL PHYSICS;Porezag D, 1995, PHYSICAL REVIEW. B, CONDENSED MATTER;Werner P, 1985, JOURNAL OF APPLIED CRYSTALLOGRAPHY;Tersoff J, 1986, PHYSICAL REVIEW LETTERS;Cohen M, 1985, PHYSICAL REVIEW. B, CONDENSED MATTER;Waag W, 2014, JOURNAL OF POWER SOURCES;Woodley S, 2008, NATURE MATERIALS;Maggiora G, 2006, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Snyder J, 2012, PHYSICAL REVIEW LETTERS;Li Z, 2015, PHYSICAL REVIEW LETTERS;Montavon G, 2013, NEW JOURNAL OF PHYSICS;Sanders M, 1984, JOURNAL OF THE CHEMICAL SOCIETY CHEMICAL COMMUNICATIONS;Blank T, 1995, THE JOURNAL OF CHEMICAL PHYSICS;Schütt K, 2014, PHYSICAL REVIEW B;Koinuma H, 2004, NATURE MATERIALS;Dalsin J, 2005, MATERIALS TODAY;Khaliullin R, 2011, NATURE MATERIALS;Olson G, 2000, SCIENCE;Handley C, 2010, THE JOURNAL OF PHYSICAL CHEMISTRY A;Marques M, 2005, PHYSICAL REVIEW B;Yan J, 2014, ENERGY & ENVIRONMENTAL SCIENCE;Pettifor D, 1984, SOLID STATE COMMUNICATIONS;Seifert G, 2012, WILEY INTERDISCIPLINARY REVIEWS COMPUTATIONAL MOLECULAR SCIENCE;Tozer D, 1996, THE JOURNAL OF CHEMICAL PHYSICS;Dey P, 2013, COMPUTATIONAL MATERIALS SCIENCE;Snyder J, 2013, THE JOURNAL OF CHEMICAL PHYSICS;Shoemaker D, 2014, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Zhang X, 2012, ADVANCED FUNCTIONAL MATERIALS;Eshet H, 2010, PHYSICAL REVIEW B;Gilman J, 2001, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Kuz’min V, 2005, JOURNAL OF MOLECULAR MODELING;Gu T, 2005, SOLID STATE SCIENCES;Walsh A, 2015, NATURE CHEMISTRY;Suram S, 2015, ACS COMBINATORIAL SCIENCE;Tatlıer M, 2010, NEURAL COMPUTING AND APPLICATIONS;Plaut D, 1987, COMPUTER SPEECH & LANGUAGE;Rabe K, 1992, PHYSICAL REVIEW. B, CONDENSED MATTER;Freeman C, 1992, JOURNAL OF THE CHEMICAL SOCIETY CHEMICAL COMMUNICATIONS;Morita T, 1957, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Becke A, 1988, PHYSICAL REVIEW. A, GENERAL PHYSICS;Srivastava N, 2014, ;淳司 柴, 2017, JOURNAL OF JAPAN SOCIETY FOR FUZZY THEORY AND INTELLIGENT INFORMATICS;Hinton G, 2006, SCIENCE;Hasan M, 2010, REVIEWS OF MODERN PHYSICS;Heyd J, 2003, THE JOURNAL OF CHEMICAL PHYSICS;Boser B, 1992, ;Murnaghan F, 1944, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Birch F, 1947, PHYSICAL REVIEW;Golbraikh A, 2002, JOURNAL OF MOLECULAR GRAPHICS AND MODELLING;Geman S, 1992, NEURAL COMPUTATION;Schapire R, 1990, MACHINE LEARNING;Stanley K, 2002, EVOLUTIONARY COMPUTATION;Moore J, 2010, NATURE;Bartók A, 2010, PHYSICAL REVIEW LETTERS;Wang Y, 2010, PHYSICAL REVIEW B;Liu A, 1989, SCIENCE;Cahill D, 1992, PHYSICAL REVIEW. B, CONDENSED MATTER;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Forrester A, 2009, PROGRESS IN AEROSPACE SCIENCES;Tropsha A, 2003, QSAR & COMBINATORIAL SCIENCE;Ramakrishnan R, 2014, SCIENTIFIC DATA;Daw M, 1993, MATERIALS SCIENCE REPORTS;Beest B, 1990, PHYSICAL REVIEW LETTERS;Picard R, 1984, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Nguyen A, 2013, APPLIED ENERGY;Gražulis S, 2011, NUCLEIC ACIDS RESEARCH;Chapelle O, 2011, ;Goedecker S, 2004, THE JOURNAL OF CHEMICAL PHYSICS;Huang C, 2006, EXPERT SYSTEMS WITH APPLICATIONS;Santosa F, 1986, SIAM JOURNAL ON SCIENTIFIC AND STATISTICAL COMPUTING;Foulkes W, 1989, PHYSICAL REVIEW. B, CONDENSED MATTER;Potyrailo R, 2011, ACS COMBINATORIAL SCIENCE;Artrith N, 2011, PHYSICAL REVIEW B;Blatov V, 2004, CRYSTALLOGRAPHY REVIEWS;Seko A, 2014, PHYSICAL REVIEW B;Oganov A, 2009, THE JOURNAL OF CHEMICAL PHYSICS;Paszkowicz W, 2013, MATERIALS AND MANUFACTURING PROCESSES;Pukrittayakamee A, 2009, THE JOURNAL OF CHEMICAL PHYSICS;Park W, 2012, ADVANCED FUNCTIONAL MATERIALS;Kramer G, 1991, PHYSICAL REVIEW. B, CONDENSED MATTER;Bush T, 1995, JOURNAL OF MATERIALS CHEMISTRY;Eijck B, 2000, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Villars P, 2003, JOURNAL OF ALLOYS AND COMPOUNDS;Gale J, 1998, THE JOURNAL OF PHYSICAL CHEMISTRY B;Witkoskie J, 2004, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Gottwald D, 2005, THE JOURNAL OF CHEMICAL PHYSICS;Sastre G, 2003, CHEMISTRY OF MATERIALS;Chen Z, 2005, ANALYTICAL CHEMISTRY;Agatonović-Kuštrin S, 2000, JOURNAL OF PHARMACEUTICAL AND BIOMEDICAL ANALYSIS;He K, 2016, ;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Zou H, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Wang J, 2004, JOURNAL OF COMPUTATIONAL CHEMISTRY;Quinlan J, 1986, MACHINE LEARNING;Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;LeCun Y, 1989, NEURAL COMPUTATION;Snyder G, 2008, NATURE MATERIALS;Tropp J, 2007, IEEE TRANSACTIONS ON INFORMATION THEORY;Scarselli F, 2008, IEEE TRANSACTIONS ON NEURAL NETWORKS;Candès E, 2006, COMMUNICATIONS ON PURE AND APPLIED MATHEMATICS;Duin A, 2001, THE JOURNAL OF PHYSICAL CHEMISTRY A;Glorot X, 2012, ;Pati Y, 2002, ;Cordero B, 2008, DALTON TRANSACTIONS;LeCun Y, 1989, NEURAL INFORMATION PROCESSING SYSTEMS;Browne C, 2012, IEEE TRANSACTIONS ON COMPUTATIONAL INTELLIGENCE AND AI IN GAMES;Fan J, 2008, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Quinlan J, 1987, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Curtarolo S, 2013, NATURE MATERIALS;Ruddigkeit L, 2012, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Glass C, 2006, COMPUTER PHYSICS COMMUNICATIONS;Pickard C, 2011, JOURNAL OF PHYSICS CONDENSED MATTER;Pazzani M, 1997, MACHINE LEARNING;Vítek V, 1968, PHILOSOPHICAL MAGAZINE;Morgenstern D, 1962, ECONOMETRICA;Blum L, 2009, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;Evgeniou T, 2005, ;Alexander J, 1928, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Hachmann J, 2011, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Shan S, 2009, STRUCTURAL AND MULTIDISCIPLINARY OPTIMIZATION;Faber F, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Frazier P, 2009, INFORMS JOURNAL ON COMPUTING;Murray C, 1993, MOLECULAR PHYSICS;Álvarez M, 2012, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Rajan K, 2015, ANNUAL REVIEW OF MATERIALS RESEARCH;Emmerich M, 2011, ;Swamidass S, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Gorai P, 2015, COMPUTATIONAL MATERIALS SCIENCE;Wagner T, 2010, ;Pilania G, 2015, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE CRYSTAL ENGINEERING AND MATERIALS;Vita A, 1997, MRS PROCEEDINGS;Klintenberg M, 2012, COMPUTATIONAL MATERIALS SCIENCE;Obeidat S, 2011, ;Clevert D, 2015, ARXIV (CORNELL UNIVERSITY);Duvenaud D, 2015, ARXIV (CORNELL UNIVERSITY);Jain P, 2014, ARXIV (CORNELL UNIVERSITY);Kohn W, 1965, PHYSICAL REVIEW;Wilkinson M, 2016, SCIENTIFIC DATA;Silver D, 2016, NATURE;Pearson K, 1901, THE LONDON EDINBURGH AND DUBLIN PHILOSOPHICAL MAGAZINE AND JOURNAL OF SCIENCE;Groom C, 2016, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE CRYSTAL ENGINEERING AND MATERIALS;Weiss K, 2016, JOURNAL OF BIG DATA;Kirklin S, 2015, NPJ COMPUTATIONAL MATERIALS;Carleo G, 2017, SCIENCE;Raccuglia P, 2016, NATURE;Kearnes S, 2016, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Ward L, 2016, NPJ COMPUTATIONAL MATERIALS;Carrasquilla J, 2017, NATURE PHYSICS;Montavon G, 2016, PATTERN RECOGNITION;De S, 2016, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Xue D, 2016, NATURE COMMUNICATIONS;Isayev O, 2017, NATURE COMMUNICATIONS;Reilly A, 2016, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE CRYSTAL ENGINEERING AND MATERIALS;Artrith N, 2016, COMPUTATIONAL MATERIALS SCIENCE;Ma X, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Mannodi‐Kanakkithodi A, 2016, SCIENTIFIC REPORTS;, 2010, ;Kalinin S, 2015, NATURE MATERIALS;Artrith N, 2012, PHYSICAL REVIEW B;Ueno T, 2016, MATERIALS DISCOVERY;Chern S, 1946, ANNALS OF MATHEMATICS;Carlucci L, 2014, CHEMICAL REVIEWS;Setyawan W, 2011, ACS COMBINATORIAL SCIENCE;Kim C, 2016, CHEMISTRY OF MATERIALS;Balachandran P, 2016, SCIENTIFIC REPORTS;Ward L, 2016, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Kim C, 2016, THE JOURNAL OF PHYSICAL CHEMISTRY C;Park W, 2014, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Powell W, 2012, WILEY SERIES IN PROBABILITY AND STATISTICS;Shandiz M, 2016, COMPUTATIONAL MATERIALS SCIENCE;Butler K, 2016, CHEMICAL SOCIETY REVIEWS;Oliynyk A, 2016, CHEMISTRY OF MATERIALS;Puchala B, 2016, JOM;Glawe H, 2016, NEW JOURNAL OF PHYSICS;Boes J, 2016, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Norman M, 2016, REPORTS ON PROGRESS IN PHYSICS;Kiyohara S, 2016, JAPANESE JOURNAL OF APPLIED PHYSICS;Pilania G, 2015, PHYSICAL REVIEW B;Rouet‐Leduc B, 2016, SCIENTIFIC REPORTS;Balachandran P, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Meredig B, 2014, CHEMISTRY OF MATERIALS;Ziatdinov M, 2016, NANOTECHNOLOGY;Mitsui T, 1998, JOURNAL OF CHEMICAL SOFTWARE;Defferrard M, 2016, ARXIV (CORNELL UNIVERSITY);Lipton Z, 2016, ARXIV (CORNELL UNIVERSITY);Zhang L, 2018, PHYSICAL REVIEW LETTERS;Schütt K, 2017, NATURE COMMUNICATIONS;Behler J, 2016, THE JOURNAL OF CHEMICAL PHYSICS;Liu Y, 2017, JOURNAL OF MATERIOMICS;Behler J, 2017, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Deringer V, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Brockherde F, 2017, NATURE COMMUNICATIONS;Shi S, 2016, CHINESE PHYSICS B;Ward L, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Oliynyk A, 2016, CHEMISTRY OF MATERIALS;Paganini M, 2018, PHYSICAL REVIEW. D/PHYSICAL REVIEW. D.;Ma J, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Artrith N, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Schmidt J, 2017, CHEMISTRY OF MATERIALS;Pilania G, 2016, COMPUTATIONAL MATERIALS SCIENCE;Seko A, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Green M, 2017, APPLIED PHYSICS REVIEWS;Dragoni D, 2018, PHYSICAL REVIEW MATERIALS;Sanvito S, 2017, SCIENCE ADVANCES;Zhang Y, 2017, PHYSICAL REVIEW LETTERS;Zhang P, 2018, PHYSICAL REVIEW LETTERS;Jong M, 2016, SCIENTIFIC REPORTS;Han J, 2018, COMMUNICATIONS IN COMPUTATIONAL PHYSICS;Schawinski K, 2017, MONTHLY NOTICES OF THE ROYAL ASTRONOMICAL SOCIETY LETTERS;Park W, 2017, IUCRJ;Glielmo A, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Evans J, 2017, CHEMISTRY OF MATERIALS;Ling J, 2017, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Zhu Q, 2018, NATURE COMMUNICATIONS;Chen C, 2017, PHYSICAL REVIEW MATERIALS;Sánchez-Lengeling B, 2017, CHEMRXIV;Xue D, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Legrain F, 2017, CHEMISTRY OF MATERIALS;Ghiringhelli L, 2017, NEW JOURNAL OF PHYSICS;DeCost B, 2016, JOM;Faraji S, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Rosenbrock C, 2017, NPJ COMPUTATIONAL MATERIALS;Zhan T, 2017, SCIENTIFIC REPORTS;Pankajakshan P, 2017, CHEMISTRY OF MATERIALS;Kiyohara S, 2016, SCIENCE ADVANCES;Hajinazar S, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Furmanchuk A, 2016, RSC ADVANCES;Kumar D, 2017, ;Kruglov I, 2017, SCIENTIFIC REPORTS;Dieb S, 2017, SCIENCE AND TECHNOLOGY OF ADVANCED MATERIALS;Dehghannasiri R, 2017, COMPUTATIONAL MATERIALS SCIENCE;Kikuchi S, 2017, PHYSICA B CONDENSED MATTER;Liu R, 2016, ;Okamoto Y, 2017, THE JOURNAL OF PHYSICAL CHEMISTRY A;Wang B, 2016, ;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Xie T, 2018, PHYSICAL REVIEW LETTERS;Schütt K, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Bartel C, 2019, SCIENCE ADVANCES;Ward L, 2018, COMPUTATIONAL MATERIALS SCIENCE;Zhang Y, 2018, NPJ COMPUTATIONAL MATERIALS;Yao K, 2018, CHEMICAL SCIENCE;Zhuo Y, 2018, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Stanev V, 2018, NPJ COMPUTATIONAL MATERIALS;Ziletti A, 2018, NATURE COMMUNICATIONS;Correa-Baena J, 2018, JOULE;Deng D, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Li W, 2018, COMPUTATIONAL MATERIALS SCIENCE;Kamath A, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Ye W, 2018, NATURE COMMUNICATIONS;Wang J, 2017, NATURE MATERIALS;Deringer V, 2018, PHYSICAL REVIEW LETTERS;Graser J, 2018, CHEMISTRY OF MATERIALS;Rowe P, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Wood M, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Zakutayev A, 2018, SCIENTIFIC DATA;Ren Z, 2018, ;Jacobs R, 2018, ADVANCED ENERGY MATERIALS;Toyao T, 2018, THE JOURNAL OF PHYSICAL CHEMISTRY C;Beach M, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Yamashita T, 2018, PHYSICAL REVIEW MATERIALS;Gopakumar A, 2018, SCIENTIFIC REPORTS;Artrith N, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Glielmo A, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Pham T, 2017, SCIENCE AND TECHNOLOGY OF ADVANCED MATERIALS;Jacobsen T, 2018, PHYSICAL REVIEW LETTERS;Furmanchuk A, 2017, JOURNAL OF COMPUTATIONAL CHEMISTRY;Oliynyk A, 2017, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Sosso G, 2018, MOLECULAR SIMULATION;Kobayashi R, 2017, PHYSICAL REVIEW MATERIALS;Sun N, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Zhai X, 2018, COMPUTATIONAL MATERIALS SCIENCE;Schmitz G, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Liu Y, 2017, COMPUTATIONAL MATERIALS SCIENCE;Weston L, 2018, PHYSICAL REVIEW MATERIALS;Nagai R, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Dieb S, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Liu Q, 2017, THE JOURNAL OF PHYSICAL CHEMISTRY A;Schmidt J, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Pham T, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Yuan F, 2017, SCIENTIFIC REPORTS;Dieb S, 2018, ;Pilania G, 2018, JOURNAL OF MATERIALS SCIENCE;Ghahramani A, 2018, BIORXIV (COLD SPRING HARBOR LABORATORY);Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Paszke A, 2017, ;Butler K, 2018, NATURE;Chen C, 2019, CHEMISTRY OF MATERIALS;Haastrup S, 2018, 2D MATERIALS;Ouyang R, 2018, PHYSICAL REVIEW MATERIALS;Lu S, 2018, NATURE COMMUNICATIONS;Schütt K, 2018, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Rajan A, 2018, CHEMISTRY OF MATERIALS;Ryan K, 2018, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Tehrani A, 2018, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Dimiduk D, 2018, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Sendek A, 2018, CHEMISTRY OF MATERIALS;Wei H, 2018, INTERNATIONAL JOURNAL OF HEAT AND MASS TRANSFER;Balachandran P, 2018, NATURE COMMUNICATIONS;Jäger M, 2018, NPJ COMPUTATIONAL MATERIALS;Meredig B, 2018, MOLECULAR SYSTEMS DESIGN & ENGINEERING;Rajan K, 2005, STATISTICAL ANALYSIS AND DATA MINING THE ASA DATA SCIENCE JOURNAL;Balachandran P, 2018, PHYSICAL REVIEW MATERIALS;Cassar D, 2018, ACTA MATERIALIA;Oftelie L, 2018, NPJ COMPUTATIONAL MATERIALS;Jes\'us C, 2014, ARXIV (CORNELL UNIVERSITY);Yamawaki M, 2018, SCIENCE ADVANCES;Li X, 2018, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Σολωμού Α, 2018, MATERIALS & DESIGN;Jalem R, 2018, SCIENTIFIC REPORTS;Kim K, 2018, PHYSICAL REVIEW MATERIALS;Kauwe S, 2018, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Zheng X, 2018, CHEMICAL SCIENCE;Yeo B, 2019, SCIENTIFIC REPORTS;Li X, 2018, ;Xie T, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Ward L, 2018, MRS BULLETIN;Wu Y, 2018, ACS APPLIED NANO MATERIALS;Dam H, 2018, JOURNAL OF THE PHYSICAL SOCIETY OF JAPAN;Balachandran P, 2018, SPRINGER SERIES IN MATERIALS SCIENCE;Zhang B, 2018, CHINESE PHYSICS B;Kiyohara S, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Liu Z, 2019, ARXIV (CORNELL UNIVERSITY);Ioffe S, 2024, ARXIV (CORNELL UNIVERSITY);Gilmer J, 2017, ARXIV (CORNELL UNIVERSITY);Sutton C, 2018, ARXIV (CORNELL UNIVERSITY);Ledig C, 2017, ;Sun J, 2015, PHYSICAL REVIEW LETTERS;Yu F, 2019, SPRINGER BRIEFS IN ELECTRICAL AND COMPUTER ENGINEERING;Himanen L, 2019, COMPUTER PHYSICS COMMUNICATIONS;F B, 2017, MPG.PURE (MAX PLANCK SOCIETY);Montavon G, 2013, MPG.PURE (MAX PLANCK SOCIETY);Carrete J, 2014, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);R. O, 2018, MPG.PURE (MAX PLANCK SOCIETY);Li X, 2018, SCIENTIFIC REPORTS;Ye W, 2018, ;A. Z, 2018, MPG.PURE (MAX PLANCK SOCIETY);Liu Z, 2020, JOURNAL OF THE OPTICAL SOCIETY OF AMERICA A;L. G, 2017, MAX PLANCK DIGITAL LIBRARY;Smith J, 2018, CHEMRXIV;A.V. M, 2006, MPG.PURE (MAX PLANCK SOCIETY);Gaultois M, 2015, ARXIV (CORNELL UNIVERSITY);Nouira A, 2018, ARXIV (CORNELL UNIVERSITY);Eldar Y, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Webb G, 2017, ;Snyder G, 2010, CO-PUBLISHED WITH MACMILLAN PUBLISHERS LTD, UK EBOOKS;Paszkowicz W, 2009, MATERIALS AND MANUFACTURING PROCESSES;Newnham R, 2004, OXFORD UNIVERSITY PRESS EBOOKS;Sánchez-Lengeling B, 2017, CHEMRXIV;Lookman T, 2018, SPRINGER SERIES IN MATERIALS SCIENCE;Obeidat S, 2011, SPECTROSCOPY AN INTERNATIONAL JOURNAL;Sparks T, 2018, CHEMRXIV;Rasmussen C, 2022, DIRECTORY OF OPEN ACCESS BOOKS (OAPEN FOUNDATION);Wager S, 2013, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Schmidt J, 2019, NPJ COMPUTATIONAL MATERIALS","Schmidt J, 2019, NPJ COMPUTATIONAL MATERIALS" +https://openalex.org/W2168029744,10.1214/009053607000000677,Kernel methods in machine learning,2008,en,article,1594,THE ANNALS OF STATISTICS,The Annals of Statistics,Thomas Hofmann;Bernhard Schölkopf;Alexander J. Smola,Thomas Hofmann;Bernhard Schölkopf;Alexander J. Smola,"Darmstadt University of Technology, Max Planck Institute for Biological Cybernetics and National ICT Australia;Darmstadt University of Technology, Max Planck Institute for Biological Cybernetics and National ICT Australia;MAX PLANCK INSTITUTE FOR BIOLOGICAL CYBERNETICS TBINGEN GERMANY",,"We review machine learning methods employing positive definite kernels. These methods formulate learning and estimation problems in a reproducing kernel Hilbert space (RKHS) of functions defined on the data domain, expanded in terms of a kernel. Working in linear spaces of function has the benefit of facilitating the construction and analysis of learning algorithms while at the same time allowing large classes of functions. The latter include nonlinear functions as well as functions defined on nonvectorial data. We cover a wide range of methods, ranging from binary classifiers to sophisticated methods for estimation with structured data.",36,3,,,Reproducing kernel Hilbert space;Kernel (algebra);Kernel method;Hilbert space;Binary classification;Range (aeronautics);Kernel embedding of distributions;Representer theorem;Pattern recognition (psychology);Radial basis function kernel,AU;DE,"Chen S, 1998, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Platt J, 1998, THE MIT PRESS EBOOKS;Hotelling H, 1936, BIOMETRIKA;Fiedler M, 1973, CZECHOSLOVAK MATHEMATICAL JOURNAL;Тихонов А, 1963, MEDICAL ENTOMOLOGY AND ZOOLOGY;, 2000, THE MIT PRESS EBOOKS;Gretton A, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Айзерман М, 1964, AUTOMATION AND REMOTE CONTROL;Bamber D, 1975, JOURNAL OF MATHEMATICAL PSYCHOLOGY;Baum L, 1972, MEDICAL ENTOMOLOGY AND ZOOLOGY;Morozov V, 1984, ;Kimeldorf G, 1971, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Darroch J, 1972, THE ANNALS OF MATHEMATICAL STATISTICS;Vapnik V, 1963, AUTOMATION AND REMOTE CONTROL;Berg C, 1984, GRADUATE TEXTS IN MATHEMATICS;Bartlett P, 2006, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Vert J, 2004, THE MIT PRESS EBOOKS;Leslie C, 2001, ;Smola A, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Kettenring J, 1971, BIOMETRIKA;Bennett K, 1992, OPTIMIZATION METHODS & SOFTWARE;Rényi A, 1959, ACTA MATHEMATICA ACADEMIAE SCIENTIARUM HUNGARICAE;Kashima H, 2003, ;Smola A, 1998, NEURAL NETWORKS;Ham J, 2004, ;Hilbert D, 1989, TEUBNER-ARCHIV ZUR MATHEMATIK;Kondor R, 2002, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Boucheron S, 2005, ESAIM PROBABILITY AND STATISTICS;Mangasarian O, 1965, OPERATIONS RESEARCH;Bochner S, 1933, MATHEMATISCHE ANNALEN;Smola A, 1998, ALGORITHMICA;Wahba G, 1995, THE ANNALS OF STATISTICS;, 2000, APPLIED PHYSICS LETTERS;Stitson M, 1998, THE MIT PRESS EBOOKS;Vishwanathan S, 2006, INTERNATIONAL JOURNAL OF COMPUTER VISION;Stewart J, 1976, ROCKY MOUNTAIN JOURNAL OF MATHEMATICS;Steinwart I, 2002, JOURNAL OF COMPLEXITY;Jebara T, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Green P, 1985, LECTURE NOTES IN STATISTICS;Lafferty J, 2004, ;Altün Y, 2004, ;Gretton A, 2005, MAX PLANCK INSTITUTE FOR PLASMA PHYSICS;Dauxois J, 1998, THE ANNALS OF STATISTICS;Culotta A, 2005, SCHOLARWORKS@UMASSAMHERST (UNIVERSITY OF MASSACHUSETTS AMHERST);Magerman D, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Bennet K, 2000, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Zettlemoyer L, 2012, ARXIV (CORNELL UNIVERSITY);McCallum A, 2005, ;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Vapnik V, 1995, ;Boser B, 1992, ;Schölkopf B, 1998, NEURAL COMPUTATION;Freund Y, 1996, ;Schölkopf B, 2001, NEURAL COMPUTATION;Poggio T, 1990, PROCEEDINGS OF THE IEEE;Wainwright M, 2007, NOW PUBLISHERS, INC. EBOOKS;Schölkopf B, 2000, NEURAL COMPUTATION;Vapnik V, 1996, ;Tsochantaridis I, 2005, MPG.PURE (MAX PLANCK SOCIETY);Cardoso J, 1998, PROCEEDINGS OF THE IEEE;Crammer K, 2002, ;Borgwardt K, 2006, BIOINFORMATICS;Friedman J, 1974, IEEE TRANSACTIONS ON COMPUTERS;Sha F, 2003, ;Hettich R, 1993, SIAM REVIEW;Schoenberg I, 1938, ANNALS OF MATHEMATICS;Collins M, 2002, THE MIT PRESS EBOOKS;Joachims T, 2005, ;, 2000, APPLIED PHYSICS LETTERS;DeCoste D, 2002, MACHINE LEARNING;, 2000, APPLIED PHYSICS LETTERS;Mercer J, 1909, PROCEEDINGS OF THE ROYAL SOCIETY OF LONDON SERIES A CONTAINING PAPERS OF A MATHEMATICAL AND PHYSICAL CHARACTER;Altün Y, 2003, ;Gärtner T, 2003, ACM SIGKDD EXPLORATIONS NEWSLETTER;Zien A, 2000, BIOINFORMATICS;Collins M, 2005, COMPUTATIONAL LINGUISTICS;Koltchinskii V, 2001, IEEE TRANSACTIONS ON INFORMATION THEORY;Basilico J, 2004, ;Kwa, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Vishwanathan S, 2004, THE MIT PRESS EBOOKS;Dawid A, 1992, STATISTICS AND COMPUTING;Jaakkola T, 1999, ;Mendelson S, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Yang H, 1997, NEURAL COMPUTATION;Watkins C, 2000, THE MIT PRESS EBOOKS;Parzen E, 1970, ;Cortes C, 2005, ;Rätsch G, 2007, PLOS COMPUTATIONAL BIOLOGY;Chen A, 2005, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Chapelle O, 2004, ;Poggio T, 1975, BIOLOGICAL CYBERNETICS;Einmahl J, 1992, THE ANNALS OF STATISTICS;Fitzgerald C, 1995, LINEAR ALGEBRA AND ITS APPLICATIONS;Das S, 1994, LINEAR ALGEBRA AND ITS APPLICATIONS;Altün Y, 2004, UNCERTAINTY IN ARTIFICIAL INTELLIGENCE;Crammer K, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Alon N, 2002, ;Cortes C, 1995, MACHINE LEARNING;Hoerl A, 1970, TECHNOMETRICS;Wahba G, 1990, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Vapnik V, 1971, THEORY OF PROBABILITY AND ITS APPLICATIONS;Kuhn H, 1951, ;Huber P, 1985, THE ANNALS OF STATISTICS;Herbrich R, 2000, THE MIT PRESS EBOOKS;Hammersley J, 1971, ;Aronszajn N, 1950, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Cristianini N, 2002, THE MIT PRESS EBOOKS;Friedman J, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Murray M, 1993, ;Cook D, 1993, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;, 2000, APPLIED PHYSICS LETTERS;O’Sullivan F, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION",,,OPENALEX,"Hofmann T, 2008, THE ANNALS OF STATISTICS","Hofmann T, 2008, THE ANNALS OF STATISTICS" +https://openalex.org/W4213308398,10.1007/978-3-030-05318-5,Automated Machine Learning,2019,en,book,1392,˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING,˜The œSpringer series on challenges in machine learning,Frank Hutter;Lars Kotthoff;Joaquin Vanschoren,Frank Hutter;Lars Kotthoff;Joaquin Vanschoren,"Department of Computer Science, University of Freiburg, Freiburg, Germany;University of Wyoming, Laramie, USA;Eindhoven University of Technology, Eindhoven, The Netherlands",,"This open access book gives the first comprehensive overview of general methods in Automatic Machine Learning, AutoML, collects descriptions of existing AutoML systems based on these methods, and discusses the first international challenge of AutoML systems.",,,,,Computer science;Artificial intelligence;Machine learning,DE;US;NL,"Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Wolpert D, 1992, NEURAL NETWORKS;Hutter F, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Ghahramani Z, 2015, NATURE;Ripley B, 1993, ;Thrun S, 1998, ;Brazdil P, 2009, ;Ginsbourger D, 2010, ADAPTATION, LEARNING, AND OPTIMIZATION;Wang Z, 2016, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Gardner J, 2014, POLYPUBLIE (ÉCOLE POLYTECHNIQUE DE MONTRÉAL);Hutter F, 2014, ;Pfahringer B, 2000, ;Bardenet R, 2013, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Simon P, 2013, ;Bischl B, 2016, ARTIFICIAL INTELLIGENCE;Kohavi R, 1995, ELSEVIER EBOOKS;Yogatama D, 2014, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Gramacy R, 2011, OXFORD UNIVERSITY PRESS EBOOKS;Almeida L, 1999, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Schmidt M, 2009, GENETIC AND EVOLUTIONARY COMPUTATION;Bensusan H, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Castiello C, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Mackay D, 1996, ;Bensusan H, 2000, BRISTOL RESEARCH (UNIVERSITY OF BRISTOL);Bensusan H, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Hutter F, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Niculescu-Mizil A, 2009, KNOWLEDGE DISCOVERY AND DATA MINING;Fürnkranz J, 2001, ;Rijn J, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Stern D, 2010, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Murray I, 2005, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Soares R, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Soares C, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Greene C, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Leite R, 2010, FRONTIERS IN ARTIFICIAL INTELLIGENCE AND APPLICATIONS;Zabinsky Z, 2003, NONCONVEX OPTIMIZATION AND ITS APPLICATIONS;Hilario M, 2001, LECTURE NOTES IN COMPUTER SCIENCE;John G, 1994, ;Santos P, 2005, ;Vilalta R, 2002, ;Hand D, 1986, ADDISON-WESLEY LONGMAN PUBLISHING CO., INC. EBOOKS;Opper M, 2000, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Dietterich T, 2002, ;Koch P, 2010, ;Kingma D, 2014, UVA-DARE (UNIVERSITY OF AMSTERDAM);Zeiler M, 2012, ARXIV (CORNELL UNIVERSITY);Wolpert D, 1995, REPEC: RESEARCH PAPERS IN ECONOMICS;Dieleman S, 2015, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Snoek J, 2014, ARXIV (CORNELL UNIVERSITY);Friedman J, 2001, THE ANNALS OF STATISTICS;Kullback S, 1951, THE ANNALS OF MATHEMATICAL STATISTICS;Demšar J, 2006, ;, 2007, KYBERNETES;Bozdogan H, 1987, PSYCHOMETRIKA;Ricci⋆ F, 2010, ;Nadaraya É, 1964, THEORY OF PROBABILITY AND ITS APPLICATIONS;Schmidt M, 2009, SCIENCE;Goldberg D, 1991, FOUNDATIONS OF GENETIC ALGORITHMS;Yu L, 2003, ;Guyon I, 2006, ;Eberhart R, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Dempe S, 2002, KLUWER ACADEMIC PUBLISHERS EBOOKS;Banzhaf W, 1997, ;Caruana R, 2004, ;Miller G, 1989, INTERNATIONAL CONFERENCE ON GENETIC ALGORITHMS;Friedrichs F, 2005, NEUROCOMPUTING;Schonlau M, 1998, LECTURE NOTES-MONOGRAPH SERIES;King R, 1995, APPLIED ARTIFICIAL INTELLIGENCE;Lin S, 2010, WILEY INTERDISCIPLINARY REVIEWS COMPUTATIONAL STATISTICS;Hengst B, 2002, ;Momma M, 2002, ;Guyon I, 2010, ;Ali S, 2006, NEUROCOMPUTING;Guyon I, 2015, ;Leite R, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Hutter F, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Chalmers D, 1991, ELSEVIER EBOOKS;Peng Y, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Fröhlich H, 2006, PROCEEDINGS. 2005 IEEE INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORKS, 2005.;Lindner G, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Igel C, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Mantovani R, 2015, ;Todorovski L, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Rúnarsson T, 2002, ;Wistuba M, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Schilling N, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Escalante H, 2010, ;Escalante H, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Andrieu C, 2003, ;Arinze B, 1994, OMEGA;Todorovski L, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Guerra S, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Bhagat P, 2005, ELSEVIER EBOOKS;Kalousis A, 2003, ;Kohavi R, 1995, ;Maclaurin D, 2015, ARXIV (CORNELL UNIVERSITY);Gelbart M, 2014, ARXIV (CORNELL UNIVERSITY);Jamieson K, 2015, ARXIV (CORNELL UNIVERSITY);Dwork C, 2015, ARXIV (CORNELL UNIVERSITY);Srivastava N, 2014, ;Bergstra J, 2012, ;Stekhoven D, 2011, BIOINFORMATICS;Razavian A, 2014, ;Troyanskaya O, 2001, BIOINFORMATICS;Blum A, 1997, ARTIFICIAL INTELLIGENCE;Stanley K, 2002, EVOLUTIONARY COMPUTATION;Bergstra J, 2011, ;Fei-Fei L, 2006, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Fortin F, 2012, ;Bergstra J, 2013, DIGITAL ACCESS TO SCHOLARSHIP AT HARVARD (DASH) (HARVARD UNIVERSITY);Dahl G, 2013, ;Larochelle H, 2007, ;Rousseeuw P, 2011, WILEY INTERDISCIPLINARY REVIEWS DATA MINING AND KNOWLEDGE DISCOVERY;Vapnik V, 2000, NEURAL COMPUTATION;Smith‐Miles K, 2009, ACM COMPUTING SURVEYS;Samanta B, 2003, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Swersky K, 2013, DIGITAL ACCESS TO SCHOLARSHIP AT HARVARD (DASH) (HARVARD UNIVERSITY);Velez D, 2007, GENETIC EPIDEMIOLOGY;Schmidhuber J, 1992, NEURAL COMPUTATION;Lemke C, 2013, ARTIFICIAL INTELLIGENCE REVIEW;Provost F, 1999, ;Martinsson P, 2010, APPLIED AND COMPUTATIONAL HARMONIC ANALYSIS;Komer B, 2014, PROCEEDINGS OF THE PYTHON IN SCIENCE CONFERENCES;Bhowan U, 2013, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Adankon M, 2008, PATTERN RECOGNITION;Bischl B, 2012, EVOLUTIONARY COMPUTATION;Soares C, 2004, MACHINE LEARNING;Escalante H, 2009, ;Sparks E, 2015, ;Gomes T, 2011, NEUROCOMPUTING;Reif M, 2012, PATTERN ANALYSIS AND APPLICATIONS;Cheng W, 2009, ;Jordan M, 2013, BERNOULLI;Cook W, 1997, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Biem A, 2004, ;Leite R, 2005, ;Moore G, 2011, MACHINE LEARNING;Bengio S, 1995, NEURAL PROCESSING LETTERS;Strijov V, 2010, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Nguyen P, 2014, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Moore G, 2009, ;Zutty J, 2015, ;Sun Q, 2012, ;Salama M, 2013, MEMETIC COMPUTING;Miranda P, 2013, ;Wolstenholme D, 1988, KNOWLEDGE-BASED SYSTEMS;SAHNI P, 2012, INTERNATIONAL JOURNAL OF SCIENTIFIC RESEARCH;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);Brochu E, 2010, ARXIV (CORNELL UNIVERSITY);Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Mnih V, 2015, NATURE;Ioannidis J, 2005, PLOS MEDICINE;Hyvärinen A, 2000, NEURAL NETWORKS;Duchi J, 2010, ;Taigman Y, 2014, ;Cireşan D, 2012, ;Yao X, 1999, PROCEEDINGS OF THE IEEE;Fulkerson B, 1995, TECHNOMETRICS;Cawley G, 2010, ;Evgeniou T, 2004, ;Angeline P, 1994, IEEE TRANSACTIONS ON NEURAL NETWORKS;Evgeniou T, 2005, ;Stanley K, 2009, ARTIFICIAL LIFE;Ho T, 2002, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Statnikov A, 2008, BMC BIOINFORMATICS;King R, 2004, NATURE;Hastie T, 2004, ;Hochreiter S, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Rahimi A, 2008, ;Thrun S, 1998, ;Ali S, 2006, APPLIED SOFT COMPUTING;Karnin Z, 2013, ;Cawley G, 2007, UEA DIGITAL REPOSITORY (UNIVERSITY OF EAST ANGLIA);Urbanowicz R, 2012, BIODATA MINING;Bart E, 2005, ;Guo X, 2008, NEUROCOMPUTING;Fink M, 2004, ;Prudêncio R, 2004, NEUROCOMPUTING;Guan H, 2009, ;Serban F, 2013, ACM COMPUTING SURVEYS;Caruana R, 2006, PROCEEDINGS;Keerthi S, 2007, THE MIT PRESS EBOOKS;Sun Q, 2013, MACHINE LEARNING;Schmidhuber J, 2002, IEEE INTERNATIONAL CONFERENCE ON NEURAL NETWORKS;Kalousis A, 2002, ;Bennett K, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Chen P, 2005, ;Bosch A, 2005, DATA ARCHIVING AND NETWORKED SERVICES (DANS);Lacoste A, 2014, ;Städler N, 2014, JOURNAL OF MACHINE LEARNING RESEARCH;Sun Q, 2013, LECTURE NOTES IN COMPUTER SCIENCE;Guyon I, 2007, NEURAL NETWORKS;Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Snoek J, 2012, ARXIV (CORNELL UNIVERSITY);Wilson A, 2013, ARXIV (CORNELL UNIVERSITY);Schaul T, 2012, ARXIV (CORNELL UNIVERSITY);Duvenaud D, 2013, ARXIV (CORNELL UNIVERSITY);Kandasamy K, 2015, ARXIV (CORNELL UNIVERSITY);Murray I, 2010, ARXIV (CORNELL UNIVERSITY);He K, 2016, ;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Chen T, 2016, ;Szegedy C, 2016, ;Pan S, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Silver D, 2016, NATURE;Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Lu Z, 2010, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Shahriari B, 2015, PROCEEDINGS OF THE IEEE;Rendle S, 2010, ;Team T, 2016, ARXIV (CORNELL UNIVERSITY);Feurer M, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Bergstra J, 2013, PROCEEDINGS OF THE PYTHON IN SCIENCE CONFERENCES;Floreano D, 2008, EVOLUTIONARY INTELLIGENCE;Olson R, 2016, ;Bengio Y, 2000, NEURAL COMPUTATION;Domhan T, 2015, FREIDOK PLUS (UNIVERSITÄTSBIBLIOTHEK FREIBURG);Feurer M, 2015, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Luo G, 2016, NETWORK MODELING ANALYSIS IN HEALTH INFORMATICS AND BIOINFORMATICS;Maron O, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Caruana R, 1994, ;Hutter F, 2015, KI - KÜNSTLICHE INTELLIGENZ;Reif M, 2012, MACHINE LEARNING;Hernández-Lobato J, 2016, APOLLO (UNIVERSITY OF CAMBRIDGE);Wistuba M, 2015, ;Hutter F, 2009, CIRCLE (UNIVERSITY OF BRITISH COLUMBIA);Rijn J, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Lloyd J, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Jing Y, 2008, JOURNAL OF JIAMUSI UNIVERSITY;Pinto F, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Thornton C, 2012, ARXIV (CORNELL UNIVERSITY);Schulz E, 2016, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Reif M, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Wang L, 2015, ;Reif M, 2012, ;Domhan T, 2014, ;Mantovani R, 2015, UNESP INSTITUTIONAL REPOSITORY (SÃO PAULO STATE UNIVERSITY);Priya R, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Ridd P, 2014, ;Bürger F, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Schilling N, 2015, ;Olier I, 2015, ;Loshchilov I, 2016, ARXIV (CORNELL UNIVERSITY);Pedregosa F, 2016, ARXIV (CORNELL UNIVERSITY);Thrun S, 1994, ;Swersky K, 2014, ARXIV (CORNELL UNIVERSITY);Lévesque J, 2016, ARXIV (CORNELL UNIVERSITY);Dewancker I, 2016, ARXIV (CORNELL UNIVERSITY);Mısır M, 2013, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Chollet F, 2017, ;, 2012, WILEY SERIES IN PROBABILITY AND STATISTICS;Santoro A, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;, 2009, ;Kotthoff L, 2019, ˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING;Olson R, 2017, BIODATA MINING;Springenberg J, 2016, NEURAL INFORMATION PROCESSING SYSTEMS;Mendoza H, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Ilievski I, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Sá A, 2017, LECTURE NOTES IN COMPUTER SCIENCE;Mantovani R, 2016, ;Kandasamy K, 2016, NEURAL INFORMATION PROCESSING SYSTEMS;Mısır M, 2016, ARTIFICIAL INTELLIGENCE;Daniel C, 2016, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Gardner J, 2017, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Bilalli B, 2017, COMPUTER STANDARDS & INTERFACES;McIntire M, 2016, UNCERTAINTY IN ARTIFICIAL INTELLIGENCE;Horn D, 2016, ;Lévesque J, 2017, ;Jenatton R, 2017, ;Hwang Y, 2016, SCHOLARWORKS@UNIST (ULSAN NATIONAL INSTITUTE OF SCIENCE AND TECHNOLOGY);Shah A, 2016, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Wistuba M, 2017, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Post M, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Kuba P, 2002, ;, 2008, ;Finn C, 2017, ARXIV (CORNELL UNIVERSITY);Snell J, 2017, ARXIV (CORNELL UNIVERSITY);Zoph B, 2016, ARXIV (CORNELL UNIVERSITY);Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY);Li L, 2016, ARXIV (CORNELL UNIVERSITY);Salimans T, 2017, ARXIV (CORNELL UNIVERSITY);, 2006, ;Duan Y, 2016, ARXIV (CORNELL UNIVERSITY);Gastaldi X, 2017, ARXIV (CORNELL UNIVERSITY);Chen Y, 2016, ARXIV (CORNELL UNIVERSITY);Negrinho R, 2017, ARXIV (CORNELL UNIVERSITY);Kandasamy K, 2017, ARXIV (CORNELL UNIVERSITY);Li K, 2017, ARXIV (CORNELL UNIVERSITY);Saxena S, 2016, ARXIV (CORNELL UNIVERSITY);Snoek J, 2014, TSPACE (UNIVERSITY OF TORONTO);Valera I, 2017, APOLLO (UNIVERSITY OF CAMBRIDGE);Kim H, 2017, ARXIV (CORNELL UNIVERSITY);Pinto F, 2017, ;Janz D, 2016, ARXIV (CORNELL UNIVERSITY);Ravi S, 2017, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Zhong Z, 2018, ;Cai H, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Dong J, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Klein A, 2017, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Mohr F, 2018, MACHINE LEARNING;Vartak M, 2017, NEURAL INFORMATION PROCESSING SYSTEMS;Rijn J, 2017, MACHINE LEARNING;Suganuma M, 2018, ;Wistuba M, 2017, MACHINE LEARNING;Li L, 2017, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Jin H, 2018, ARXIV (CORNELL UNIVERSITY);Yamada Y, 2018, ARXIV (CORNELL UNIVERSITY);Lorena A, 2017, MACHINE LEARNING;Klein A, 2017, ELECTRONIC JOURNAL OF STATISTICS;Bilalli B, 2017, INTERNATIONAL JOURNAL OF APPLIED MATHEMATICS AND COMPUTER SCIENCE;Abdulrahman S, 2017, MACHINE LEARNING;Lu X, 2018, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Ramachandran A, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Nguyen V, 2018, KNOWLEDGE AND INFORMATION SYSTEMS;Chandrashekaran A, 2017, LECTURE NOTES IN COMPUTER SCIENCE;Kietz J, 2012, ZURICH OPEN REPOSITORY AND ARCHIVE (UNIVERSITY OF ZURICH);McInerney J, 2017, NEURAL INFORMATION PROCESSING SYSTEMS;Zhang H, 2017, ARXIV (CORNELL UNIVERSITY);DeVries T, 2017, ARXIV (CORNELL UNIVERSITY);Mishra N, 2017, ARXIV (CORNELL UNIVERSITY);Ren M, 2018, ARXIV (CORNELL UNIVERSITY);Probst P, 2018, ARXIV (CORNELL UNIVERSITY);Such F, 2017, ARXIV (CORNELL UNIVERSITY);Brock A, 2017, ARXIV (CORNELL UNIVERSITY);Chrabąszcz P, 2017, ARXIV (CORNELL UNIVERSITY);Kandasamy K, 2018, ARXIV (CORNELL UNIVERSITY);Elsken T, 2017, ARXIV (CORNELL UNIVERSITY);Cai H, 2018, ARXIV (CORNELL UNIVERSITY);Ashok A, 2017, ARXIV (CORNELL UNIVERSITY);Kalousis A, 2002, ARCHIVE OUVERTE UNIGE (UNIVERSITY OF GENEVA);Wang Z, 2017, ARXIV (CORNELL UNIVERSITY);Wang J, 2018, ARXIV (CORNELL UNIVERSITY);Pang K, 2018, ARXIV (CORNELL UNIVERSITY);Alaa A, 2018, ARXIV (CORNELL UNIVERSITY);Oh C, 2018, ARXIV (CORNELL UNIVERSITY);Suganuma M, 2018, ARXIV (CORNELL UNIVERSITY);Feurer M, 2018, ARXIV (CORNELL UNIVERSITY);Chen B, 2018, ARXIV (CORNELL UNIVERSITY);Kim J, 2017, ARXIV (CORNELL UNIVERSITY);Wistuba M, 2017, ARXIV (CORNELL UNIVERSITY);Meyerson E, 2018, ARXIV (CORNELL UNIVERSITY);Perrone V, 2017, ARXIV (CORNELL UNIVERSITY);Huang G, 2017, ;Paszke A, 2017, ;Zagoruyko S, 2016, ;Cohen W, 2008, ;Real E, 2019, ;Liu C, 2018, LECTURE NOTES IN COMPUTER SCIENCE;ZhaoPeng, 2006, JOURNAL OF MACHINE LEARNING RESEARCH;Li L, 2017, JOURNAL OF MACHINE LEARNING RESEARCH;Hutter F, 2009, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Simon D, 2013, ;Olson R, 2019, ˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING;Bender G, 2018, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Sculley D, 2018, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Liang J, 2018, PROCEEDINGS OF THE GENETIC AND EVOLUTIONARY COMPUTATION CONFERENCE;Shin R, 2018, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Luketina J, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Ahmed K, 2018, LECTURE NOTES IN COMPUTER SCIENCE;WainerJacques, 2017, AMERICANAE (AECID LIBRARY);Yang C, 2018, ARXIV (CORNELL UNIVERSITY);Klein A, 2018, ;BoulléMarc, 2007, JOURNAL OF MACHINE LEARNING RESEARCH;Rívolli A, 2018, ARXIV (CORNELL UNIVERSITY);Lévesque J, 2018, ;Ramachandran A, 2019, LECTURE NOTES IN COMPUTER SCIENCE;Wever M, 2018, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;KruegerTammo, 2015, JOURNAL OF MACHINE LEARNING RESEARCH;Abadi M, 2016, ARXIV (CORNELL UNIVERSITY);Elsken T, 2018, ARXIV (CORNELL UNIVERSITY);Snoek J, 2015, ARXIV (CORNELL UNIVERSITY);Klein A, 2016, ARXIV (CORNELL UNIVERSITY);Lloyd J, 2014, ARXIV (CORNELL UNIVERSITY);Sabharwal A, 2016, ;Zhong Z, 2018, ARXIV (CORNELL UNIVERSITY);Zhang Y, 2016, ARXIV (CORNELL UNIVERSITY);Eggensperger K, 2017, ARXIV (CORNELL UNIVERSITY);Asuncion A, 2007, MEDICAL ENTOMOLOGY AND ZOOLOGY;Russell E, 2000, ADVANCES IN INDUSTRIAL CONTROL;Buuren S, 2010, ;Kendall M, 1938, BIOMETRIKA;, 2005, IEEE TRANSACTIONS ON NEURAL NETWORKS;Deb K, 2002, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Thompson W, 1933, BIOMETRIKA;Efron B, 1983, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Rosenstein M, 2005, ;Mackay D, 1992, ;Robbins H, 1985, ;Baxter J, 1995, ;Daniel H, 2015, ARXIV (CORNELL UNIVERSITY);Maron O, 1997, ;Nguyen T, 2018, INTERNATIONAL JOURNAL OF DATA SCIENCE AND ANALYTICS;Leite R, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Guyon I, 2006, THE 2006 IEEE INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORK PROCEEDINGS;Krizhevsky A, 2024, ;Rasmussen C, 2022, DIRECTORY OF OPEN ACCESS BOOKS (OAPEN FOUNDATION);Donahue J, 2013, ARXIV (CORNELL UNIVERSITY);, 2005, ;Elsken T, 2018, ARXIV (CORNELL UNIVERSITY);Thornton C, 2012, ARXIV (CORNELL UNIVERSITY);Weerts H, 2020, ARXIV (CORNELL UNIVERSITY);Zela A, 2018, ARXIV (CORNELL UNIVERSITY);Finn C, 2017, ARXIV (CORNELL UNIVERSITY);Hernández-Lobato J, 2017, ARXIV (CORNELL UNIVERSITY);, 2003, TECHNOMETRICS;McQuarrie A, 1998, WORLD SCIENTIFIC EBOOKS;Miikkulainen R, 2023, ELSEVIER EBOOKS;Hansen N, 2016, ARXIV (CORNELL UNIVERSITY);Zhou Y, 2018, ARXIV (CORNELL UNIVERSITY);Rawal A, 2018, ARXIV (CORNELL UNIVERSITY);Reed S, 2017, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Hutter F, 2019, ˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING","Hutter F, 2019, ˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING" +https://openalex.org/W3123436326,10.1111/ectj.12097,Double/debiased machine learning for treatment and structural parameters,2017,en,article,2454,ECONOMETRICS JOURNAL,Econometrics Journal,Victor Chernozhukov;Denis Chetverikov;Mert Demirer;Esther Duflo;Christian Hansen;Whitney K. Newey;James M. Robins,Victor Chernozhukov;Denis Chetverikov;Mert Demirer;Esther Duflo;Christian Hansen;Whitney Newey;James Robins,"Massachusetts Institute of Technology, 50 Memorial Drive, Cambridge, MA 02139, USA;University of California Los Angeles, 315 Portola Plaza, Los Angeles, CA 90095, USA;Massachusetts Institute of Technology, 50 Memorial Drive, Cambridge, MA 02139, USA;Massachusetts Institute of Technology, 50 Memorial Drive, Cambridge, MA 02139, USA;University of Chicago, 5807 S. Woodlawn Ave., Chicago, IL 60637, USA;Massachusetts Institute of Technology, 50 Memorial Drive, Cambridge, MA 02139, USA;Harvard University, 677 Huntington Avenue, Boston, MA 02115, USA",,"We revisit the classic semi‐parametric problem of inference on a low‐dimensional parameter θ0 in the presence of high‐dimensional nuisance parameters η0. We depart from the classical setting by allowing for η0 to be so high‐dimensional that the traditional assumptions (e.g. Donsker properties) that limit complexity of the parameter space for this object break down. To estimate η0, we consider the use of statistical or machine learning (ML) methods, which are particularly well suited to estimation in modern, very high‐dimensional cases. ML methods perform well by employing regularization to reduce variance and trading off regularization bias with overfitting in practice. However, both regularization bias and overfitting in estimating η0 cause a heavy bias in estimators of θ0 that are obtained by naively plugging ML estimators of η0 into estimating equations for θ0. This bias results in the naive estimator failing to be N−1/2 consistent, where N is the sample size. We show that the impact of regularization bias and overfitting on estimation of the parameter of interest θ0 can be removed by using two simple, yet critical, ingredients: (1) using Neyman‐orthogonal moments/scores that have reduced sensitivity with respect to nuisance parameters to estimate θ0; (2) making use of cross‐fitting, which provides an efficient form of data‐splitting. We call the resulting set of methods double or debiased ML (DML). We verify that DML delivers point estimators that concentrate in an N−1/2‐neighbourhood of the true parameter values and are approximately unbiased and normally distributed, which allows construction of valid confidence statements. The generic statistical theory of DML is elementary and simultaneously relies on only weak theoretical requirements, which will admit the use of a broad array of modern ML methods for estimating the nuisance parameters, such as random forests, lasso, ridge, deep neural nets, boosted trees, and various hybrids and ensembles of these methods. We illustrate the general theory by applying it to provide theoretical properties of the following: DML applied to learn the main regression parameter in a partially linear regression model; DML applied to learn the coefficient on an endogenous variable in a partially linear instrumental variables model; DML applied to learn the average treatment effect and the average treatment effect on the treated under unconfoundedness; DML applied to learn the local average treatment effect in an instrumental variables setting. In addition to these theoretical applications, we also illustrate the use of DML in three empirical examples.",21,1,C1,C68,Overfitting;Estimator;Nuisance parameter;Regularization (linguistics);Mathematics;Statistics;Algorithm;Artificial intelligence;Applied mathematics;Computer science;Artificial neural network,US,"Rosenbaum P, 1983, BIOMETRIKA;Hansen L, 1982, ECONOMETRICA;Imbens G, 1994, ECONOMETRICA;Vaart A, 1998, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Abadie A, 2005, ECONOMETRICA;Bickel P, 2009, THE ANNALS OF STATISTICS;Robinson P, 1988, ECONOMETRICA;Belloni A, 2013, THE REVIEW OF ECONOMIC STUDIES;Schick A, 1994, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Zhang C, 2013, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hahn J, 1998, ECONOMETRICA;Belloni A, 2012, ECONOMETRICA;Robins J, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Laan M, 2006, THE INTERNATIONAL JOURNAL OF BIOSTATISTICS;Geer S, 2014, THE ANNALS OF STATISTICS;Chamberlain G, 1987, JOURNAL OF ECONOMETRICS;Newey W, 1994, ECONOMETRICA;Dasgupta A, 2008, SPRINGER TEXTS IN STATISTICS;Bickel P, 1982, THE ANNALS OF STATISTICS;Newey W, 1990, JOURNAL OF APPLIED ECONOMETRICS;Imai K, 2013, THE ANNALS OF APPLIED STATISTICS;Chen X, 2003, ECONOMETRICA;Angrist J, 1995, JOURNAL OF BUSINESS AND ECONOMIC STATISTICS;Farrell M, 2015, JOURNAL OF ECONOMETRICS;Chamberlain G, 1992, ECONOMETRICA;Severini T, 1992, THE ANNALS OF STATISTICS;Andrews D, 1994, ECONOMETRICA;Bartoo J, 1967, THE ANNALS OF MATHEMATICAL STATISTICS;Fan J, 2011, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Frölich M, 2006, JOURNAL OF ECONOMETRICS;Chernozhukov V, 2015, AMERICAN ECONOMIC REVIEW;Schick A, 1986, THE ANNALS OF STATISTICS;Chernozhukov V, 2004, THE REVIEW OF ECONOMICS AND STATISTICS;Vaart A, 1991, THE ANNALS OF STATISTICS;Belloni A, 2014, BIOMETRIKA;Chen X, 1999, IEEE TRANSACTIONS ON INFORMATION THEORY;Zheng W, 2011, SPRINGER SERIES IN STATISTICS;Robins J, 2008, INSTITUTE OF MATHEMATICAL STATISTICS EBOOKS;Newey W, 2004, ECONOMETRICA;Levit B, 1976, THEORY OF PROBABILITY AND ITS APPLICATIONS;Ai C, 2012, JOURNAL OF ECONOMETRICS;Hubbard A, 2016, THE INTERNATIONAL JOURNAL OF BIOSTATISTICS;Linton O, 1996, ECONOMETRIC THEORY;Bera A, 2010, ECONOMETRIC THEORY;Wooldridge J, 1991, JOURNAL OF ECONOMETRICS;Bilias Y, 2000, JOURNAL OF APPLIED ECONOMETRICS;Javanmard A, 2013, ARXIV (CORNELL UNIVERSITY);Gautier É, 2011, ARXIV (CORNELL UNIVERSITY);Belloni A, 2011, SSRN ELECTRONIC JOURNAL;Newey W, 1998, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Acemoğlu D, 2001, AMERICAN ECONOMIC REVIEW;Hirano K, 2003, ECONOMETRICA;Laan M, 2007, STATISTICAL APPLICATIONS IN GENETICS AND MOLECULAR BIOLOGY;Bühlmann P, 2011, SPRINGER SERIES IN STATISTICS;Ибрагимов И, 1981, ;Laan M, 2011, SPRINGER SERIES IN STATISTICS;Rogers G, 1968, TECHNOMETRICS;Belloni A, 2011, BIOMETRIKA;Belloni A, 2013, BERNOULLI;Belloni A, 2010, THE ANNALS OF STATISTICS;Belloni A, 2017, ECONOMETRICA;Poterba J, 1995, JOURNAL OF PUBLIC ECONOMICS;Chernozhukov V, 2014, THE ANNALS OF STATISTICS;Andrews D, 1994, HANDBOOK OF ECONOMETRICS;Belloni A, 2016, JOURNAL OF BUSINESS AND ECONOMIC STATISTICS;Chernozhukov V, 2015, ANNUAL REVIEW OF ECONOMICS;Belloni A, 2014, THE ANNALS OF STATISTICS;J. R, 2008, ;Robins J, 2017, THE ANNALS OF STATISTICS;Belloni A, 2013, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Luedtke A, 2016, THE INTERNATIONAL JOURNAL OF BIOSTATISTICS;Jm R, 2013, PUBMED;Laan v, 2015, ;Walter R, 1959, AMERICAN MATHEMATICAL MONTHLY;, 1995, CHOICE REVIEWS ONLINE;Toth B, 2016, COLLECTION OF BIOSTATISTICS RESEARCH ARCHIVE;Bickel P, 1988, UC BERKELEY;Zheng W, 2018, THE INTERNATIONAL JOURNAL OF BIOSTATISTICS;Feder P, 1980, TECHNOMETRICS;Wager S, 2015, ARXIV (CORNELL UNIVERSITY);Chernozhukov V, 2017, ;Ichimura H, 2015, ;Luo Y, 2016, ARXIV (CORNELL UNIVERSITY);Athey S, 2016, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Chernozhukov V, 2017, ECONOMETRICS JOURNAL","Chernozhukov V, 2017, ECONOMETRICS JOURNAL" +https://openalex.org/W398859631,,International Conference on Machine Learning (ICML)-2005,2006,en,article,2486,人工知能学会誌 = JOURNAL OF JAPANESE SOCIETY FOR ARTIFICIAL INTELLIGENCE,人工知能学会誌 = Journal of Japanese Society for Artificial Intelligence,宏治 津田,宏治 津田,,"宏治 津田 (corresponding author), ",,21,2,262,,Computer science;Geography;Artificial intelligence;Political science,,,,,OPENALEX,"津田 宏, 2006, 人工知能学会誌 = JOURNAL OF JAPANESE SOCIETY FOR ARTIFICIAL INTELLIGENCE","津田 宏, 2006, 人工知能学会誌 = JOURNAL OF JAPANESE SOCIETY FOR ARTIFICIAL INTELLIGENCE" +https://openalex.org/W2104489082,10.1103/physrevlett.108.058301,Fast and Accurate Modeling of Molecular Atomization Energies with Machine Learning,2012,en,article,2382,PHYSICAL REVIEW LETTERS,Physical Review Letters,Matthias Rupp;Alexandre Tkatchenko;Klaus‐Robert Müller;O. Anatole von Lilienfeld,Matthias Rupp;Alexandre Tkatchenko;Klaus-Robert Müller;O. Anatole von Lilienfeld,"Machine Learning Group, Technical University of Berlin, Franklinstr 28/29, 10587 Berlin, Germany;Machine Learning Group, Technical University of Berlin;Institute of Pure and Applied Mathematics, University of California Los Angeles;Fritz-Haber-Institut der Max-Planck-Gesellschaft, 14195 Berlin, Germany;Institute of Pure and Applied Mathematics, University of California Los Angeles, Los Angeles, California 90095, USA;Institute of Pure and Applied Mathematics, University of California Los Angeles;Theory, Fritz Haber Institute, Max Planck Society;Institute of Pure and Applied Mathematics, University of California Los Angeles, Los Angeles, California 90095, USA;Machine Learning Group, Technical University of Berlin, Franklinstr 28/29, 10587 Berlin, Germany;Machine Learning Group, Technical University of Berlin;Institute of Pure and Applied Mathematics, University of California Los Angeles;Argonne Leadership Computing Facility, Argonne National Laboratory, Argonne, Illinois 60439, USA;Institute of Pure and Applied Mathematics, University of California Los Angeles, Los Angeles, California 90095, USA;Argonne Leadership Computing Facility, Argonne National Laboratory;Institute of Pure and Applied Mathematics, University of California Los Angeles",,"We introduce a machine learning model to predict atomization energies of a diverse set of organic molecules, based on nuclear charges and atomic positions only. The problem of solving the molecular Schrödinger equation is mapped onto a nonlinear statistical regression problem of reduced complexity. Regression models are trained on and compared to atomization energies computed with hybrid density-functional theory. Cross validation over more than seven thousand organic molecules yields a mean absolute error of ∼10 kcal/mol. Applicability is demonstrated for the prediction of molecular atomization potential energy curves.",108,5,058301,058301,Computer science;Statistical physics;Physics,US;DE,"Becke A, 1993, THE JOURNAL OF CHEMICAL PHYSICS;Kohn W, 1965, PHYSICAL REVIEW;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Hastie T, 2013, ;Lu Z, 2010, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Perdew J, 1996, THE JOURNAL OF CHEMICAL PHYSICS;Ernzerhof M, 1999, THE JOURNAL OF CHEMICAL PHYSICS;Behler J, 2007, PHYSICAL REVIEW LETTERS;Todeschini R, 2000, METHODS AND PRINCIPLES IN MEDICINAL CHEMISTRY;Stewart J, 2007, JOURNAL OF MOLECULAR MODELING;Müller K, 2001, IEEE TRANSACTIONS ON NEURAL NETWORKS;Bartók A, 2010, PHYSICAL REVIEW LETTERS;Blüm V, 2009, COMPUTER PHYSICS COMMUNICATIONS;Behler J, 2011, THE JOURNAL OF CHEMICAL PHYSICS;, 1999, CHOICE REVIEWS ONLINE;Tuckerman M, 2023, ;Cottrell T, 1958, ;Blum L, 2009, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Benson S, 1965, JOURNAL OF CHEMICAL EDUCATION;Hautier G, 2010, CHEMISTRY OF MATERIALS;Schneider G, 2010, NATURE REVIEWS DRUG DISCOVERY;Fink T, 2007, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Lorenz S, 2004, CHEMICAL PHYSICS LETTERS;Guha R, 2006, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Fink T, 2005, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Kirkpatrick P, 2004, NATURE;Cśanyi G, 2004, PHYSICAL REVIEW LETTERS;Coifman R, 2008, MULTISCALE MODELING AND SIMULATION;Lynch B, 2003, THE JOURNAL OF PHYSICAL CHEMISTRY A;Behler J, 2008, PHYSICAL REVIEW LETTERS;Brown A, 2003, THE JOURNAL OF CHEMICAL PHYSICS;Lilienfeld O, 2005, PHYSICAL REVIEW LETTERS;Lilienfeld O, 2006, THE JOURNAL OF CHEMICAL PHYSICS;Handley C, 2009, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Müller K, 1996, NEURAL COMPUTATION;Sheppard D, 2010, THE JOURNAL OF CHEMICAL PHYSICS;Lilienfeld O, 2009, THE JOURNAL OF CHEMICAL PHYSICS;Lim T, 2010, MOLECULAR PHYSICS",,,OPENALEX,"Rupp M, 2012, PHYSICAL REVIEW LETTERS","Rupp M, 2012, PHYSICAL REVIEW LETTERS" +https://openalex.org/W2951278869,10.48550/arxiv.cs/0205070,Thumbs up? Sentiment Classification using Machine Learning Techniques,2002,en,preprint,2215,ARXIV.ORG,ArXiv.org,Bo Pang;Lillian Lee;Shivakumar Vaithyanathan,"Pang, Bo;Lee, Lillian;Vaithyanathan, Shivakumar","[Cornell Univ., Ithaca, NY];[Cornell Univ., Ithaca, NY];IBM Almaden Research Center, San Jose´, CA#TAB#",,"We consider the problem of classifying documents not by topic, but by overall sentiment, e.g., determining whether a review is positive or negative. Using movie reviews as data, we find that standard machine learning techniques definitively outperform human-produced baselines. However, the three machine learning methods we employed (Naive Bayes, maximum entropy classification, and support vector machines) do not perform as well on sentiment classification as on traditional topic-based categorization. We conclude by examining factors that make the sentiment classification problem more challenging.",,,,,Naive Bayes classifier;Computer science;Categorization;Artificial intelligence;Sentiment analysis;Support vector machine;Machine learning;Principle of maximum entropy;Natural language processing,US,"Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Biber D, 1988, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Joachims T, 2006, TECHNICAL REPORTS;Peter P, 2002, MEETING OF THE ASSOCIATION FOR COMPUTATIONAL LINGUISTICS;McCallum A, 1998, ;Berger A, 1996, ;Lewis D, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Hatzivassiloglou V, 1997, ;Hatzivassiloglou V, 2000, ;Terveen L, 1997, COMMUNICATIONS OF THE ACM;Mosteller F, 2012, ;Karlgren J, 1994, ;Ellen, 1997, ;Chen S, 2000, IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING;Wiebe J, 2001, ;Pedersen T, 2001, ;Avneri G, 1998, ;Huettner A, 2000, ;Tatemura J, 2000, ;Sack W, 1994, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Kessler B, 1997, ARXIV.ORG",,,OPENALEX,"Pang B, 2002, ARXIV ORG","Pang B, 2002, ARXIV ORG" +https://openalex.org/W1590183771,10.1017/cbo9780511804779,Bayesian Reasoning and Machine Learning,2012,en,book,1653,CAMBRIDGE UNIVERSITY PRESS EBOOKS,Cambridge University Press eBooks,David Barber,David Barber,University College London;University college London,"David Barber (corresponding author), University College London; University college London","Machine learning methods extract value from vast data sets quickly and with modest resources. They are established tools in a wide range of industrial applications, including search engines, DNA sequencing, stock market analysis, and robot locomotion, and their use is spreading rapidly. People who know the methods have their choice of rewarding jobs. This hands-on text opens these opportunities to computer science students with modest mathematical backgrounds. It is designed for final-year undergraduates and master's students with limited background in linear algebra and calculus. Comprehensive and coherent, it develops everything from basic reasoning to advanced techniques within the framework of graphical models. Students learn more than a menu of techniques, they develop analytical and problem-solving skills that equip them for the real world. Numerous examples and exercises, both computer based and theoretical, are included in every chapter. Resources for students and instructors, including a MATLAB toolbox, are available online.",,,,,Toolbox;Computer science;Artificial intelligence;Machine learning;Data science;Human–computer interaction;Programming language,AU;GB,,,,OPENALEX,"Barber D, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS","Barber D, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS" +https://openalex.org/W2792919287,10.1038/nmeth.4642,Statistics versus machine learning,2018,en,article,1488,NATURE METHODS,Nature Methods,Danilo Bzdok;Naomi Altman;Martin Krzywinski,Danilo Bzdok;Naomi Altman;Martin Krzywinski,"Department of Psychiatry, RWTH Aachen University, Germany, and a Visiting Professor at INRIA/Neurospin Saclay in France;Department of Psychiatry, RWTH Aachen University, Germany;Statistics at The Pennsylvania State University;Professor of Statistics at The Pennsylvania State University,;Canada's Michael Smith Genome Sciences Centre;staff scientist at Canada's Michael Smith Genome Sciences Centre,",,,15,4,233,234,Computer science;Statistics;Computational biology;Artificial intelligence;Machine learning;Biology;Mathematics,FR;DE;US;CA,"Lever J, 2016, NATURE METHODS;Altman N, 2017, NATURE METHODS;Bzdok D, 2017, NATURE METHODS;Bzdok D, 2017, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Kulesa A, 2015, NATURE METHODS;Altman N, 2016, NATURE METHODS;Krzywinski M, 2014, NATURE METHODS;Krzywinski M, 2014, NATURE METHODS",,,OPENALEX,"Bzdok D, 2018, NATURE METHODS","Bzdok D, 2018, NATURE METHODS" +https://openalex.org/W2337082154,10.1038/nphys4035,Machine learning phases of matter,2017,en,article,1474,NATURE PHYSICS,Nature Physics,Juan Carrasquilla;Roger G. Melko,Juan Carrasquilla;Roger G. Melko,"Perimeter Institute for Theoretical Physics, Waterloo, Ontario N2L 2Y5, Canada;Department of Physics and Astronomy, University of Waterloo, Ontario N2L 3G1, Canada;Perimeter Institute for Theoretical Physics, Waterloo, Ontario N2L 2Y5, Canada","Juan Carrasquilla (corresponding author), Perimeter Institute for Theoretical Physics, Waterloo, Ontario N2L 2Y5, Canada",,13,5,431,434,Physics;Artificial neural network;Locality;Artificial intelligence;Convolutional neural network;Monte Carlo method;Statistical physics;Coulomb;Machine learning;Computer science;Quantum mechanics,CA,"LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Bellman R, 1966, SCIENCE;Goodfellow I, 2016, MIT PRESS EBOOKS;Kitaev A, 2003, ANNALS OF PHYSICS;Onsager L, 1944, PHYSICAL REVIEW;Kitaev A, 2006, PHYSICAL REVIEW LETTERS;Levin M, 2006, PHYSICAL REVIEW LETTERS;Kogut J, 1979, REVIEWS OF MODERN PHYSICS;Montaño J, 2011, CONFERENCE;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;Nieuwenburg E, 2017, NATURE PHYSICS;Wen X, 2007, ;Sandvik A, 2016, ;Sandvik A, 2010, AIP CONFERENCE PROCEEDINGS;Schoenholz S, 2016, NATURE PHYSICS;Kalinin S, 2015, NATURE MATERIALS;Kusne A, 2014, SCIENTIFIC REPORTS;Castelnovo C, 2007, PHYSICAL REVIEW B;Avella A, 2013, ;Arsenault L, 2014, PHYSICAL REVIEW B;Avella A, 2013, SPRINGER SERIES IN SOLID-STATE SCIENCES;Castelnovo C, 2007, PHYSICAL REVIEW B;Landon-Cardinal O, 2012, NEW JOURNAL OF PHYSICS;Newell G, 1950, PHYSICAL REVIEW;Prelovšek P, 2011, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Carrasquilla J, 2017, NATURE PHYSICS","Carrasquilla J, 2017, NATURE PHYSICS" +https://openalex.org/W2048266589,10.1109/micro.2014.58,DaDianNao: A Machine-Learning Supercomputer,2014,en,article,1506,,,Yunji Chen;Tao Luo;Shaoli Liu;Shijin Zhang;Liqiang He;Jia Wang;Ling Li;Tianshi Chen;Zhiwei Xu;Ninghui Sun;Olivier Temam,Yunji Chen;Tao Luo;Shaoli Liu;Shijin Zhang;Liqiang He;Jia Wang;Ling Li;Tianshi Chen;Zhiwei Xu;Ninghui Sun;Olivier Temam,"SKL of Computer Architecture, ICT, CAS, China;SKL of Computer Architecture, ICT, CAS, China;University of CAS, China;SKL of Computer Architecture, ICT, CAS, China;SKL of Computer Architecture, ICT, CAS, China;Inner Mongolia University, China;Inria, Scalay, France;SKL of Computer Architecture, ICT, CAS, China;SKL of Computer Architecture, ICT, CAS, China;SKL of Computer Architecture, ICT, CAS, China;SKL of Computer Architecture, ICT, CAS, China;SKL of Computer Architecture, ICT, CAS, China;Inria, Scalay, France",,"Many companies are deploying services, either for consumers or industry, which are largely based on machine-learning algorithms for sophisticated processing of large amounts of data. The state-of-the-art and most popular such machine-learning algorithms are Convolutional and Deep Neural Networks (CNNs and DNNs), which are known to be both computationally and memory intensive. A number of neural network accelerators have been recently proposed which can offer high computational capacity/area ratio, but which remain hampered by memory accesses. However, unlike the memory wall faced by processors on general-purpose workloads, the CNNs and DNNs memory footprint, while large, is not beyond the capability of the on chip storage of a multi-chip system. This property, combined with the CNN/DNN algorithmic characteristics, can lead to high internal bandwidth and low external communications, which can in turn enable high-degree parallelism at a reasonable area cost. In this article, we introduce a custom multi-chip machine-learning architecture along those lines. We show that, on a subset of the largest known neural network layers, it is possible to achieve a speedup of 450.65x over a GPU, and reduce the energy by 150.31x on average for a 64-chip system. We implement the node down to the place and route at 28nm, containing a combination of custom storage and computational units, with industry-grade interconnects.",,,609,622,Computer science;Supercomputer;Memory footprint;Convolutional neural network;Speedup;Parallel computing;Artificial neural network;Deep learning;Memory bandwidth;Bandwidth (computing);Auxiliary memory;Computer architecture;Artificial intelligence;Distributed computing;Computer engineering;Computer hardware;Operating system;Computer network,FR;CN,"Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Deng J, 2009, 2009 IEEE CONFERENCE ON COMPUTER VISION AND PATTERN RECOGNITION;LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Cireşan D, 2012, ;Bienia C, 2008, ;Dally W, 2004, ;Deng J, 2009, 2009 IEEE CONFERENCE ON COMPUTER VISION AND PATTERN RECOGNITION;Dean J, 2012, ;Jarrett K, 2009, ;Huang P, 2013, ;Le Q, 2013, ;Esmaeilzadeh H, 2011, ;Chen T, 2014, ;Dahl G, 2013, ;Cireşan D, 2011, ;, 2009, ;Larochelle H, 2007, ;Vanhoucke V, 2011, ;Ranzato M, 2012, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Esmaeilzadeh H, 2012, ;Coates A, 2013, ;Esmaeilzadeh H, 2012, IEEE MICRO;Salakhutdinov R, 2012, NEURAL COMPUTATION;Ferrucci D, 2012, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Merolla P, 2011, ;Hameed R, 2010, ;Farabet C, 2011, ;Mnih V, 2012, ;Hadsell R, 2009, JOURNAL OF FIELD ROBOTICS;Schemmel J, 2008, ;Khan M, 2008, ;Kahng A, 2011, IEEE TRANSACTIONS ON VERY LARGE SCALE INTEGRATION (VLSI) SYSTEMS;Qadeer W, 2013, ;Hong S, 2013, IEEE TRANSACTIONS ON INFORMATION THEORY;Temam O, 2012, ACM SIGARCH COMPUTER ARCHITECTURE NEWS;Venkatesh G, 2011, ;Chen D, 2011, IEEE MICRO;Temam O, 2012, ;Matick R, 2005, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Fan K, 2009, ;Belhadj B, 2013, ;Hashmi A, 2011, ;Wang G, 2009, ;Majumdar A, 2012, ACM TRANSACTIONS ON ARCHITECTURE AND CODE OPTIMIZATION;Hashmi A, 2011, ;Huang L, 2011, IEEE TRANSACTIONS ON COMPUTERS;Huang K, 2011, ;Maeda N, 2012, ;Deneroff M, 2008, ;, 2009, JOURNAL OF FIELD ROBOTICS",,,OPENALEX,"Chen Y, 2014, ","Chen Y, 2014, " +https://openalex.org/W2964303497,10.3390/electronics8080832,Machine Learning Interpretability: A Survey on Methods and Metrics,2019,en,article,1759,ELECTRONICS,Electronics,Diogo V. Carvalho;Eduardo M. Pereira;Jaime S. Cardoso,Diogo V. Carvalho;Eduardo M. Pereira;Jaime S. Cardoso,"Deloitte Portugal, Manuel Bandeira Street, 43, 4150-479 Porto, Portugal;Faculty of Engineering, University of Porto, Dr. Roberto Frias Street, 4200-465 Porto, Portugal;Deloitte Portugal, Manuel Bandeira Street, 43, 4150-479 Porto, Portugal;Faculty of Engineering, University of Porto, Dr. Roberto Frias Street, 4200-465 Porto, Portugal;INESC TEC, Dr. Roberto Frias Street, 4200-465 Porto, Portugal","Diogo V. Carvalho (corresponding author), Deloitte Portugal, Manuel Bandeira Street, 43, 4150-479 Porto, Portugal; Faculty of Engineering, University of Porto, Dr. Roberto Frias Street, 4200-465 Porto, Portugal","Machine learning systems are becoming increasingly ubiquitous. These systems’s adoption has been expanding, accelerating the shift towards a more algorithmic society, meaning that algorithmically informed decisions have greater potential for significant social impact. However, most of these accurate decision support systems remain complex black boxes, meaning their internal logic and inner workings are hidden to the user and even experts cannot fully understand the rationale behind their predictions. Moreover, new regulations and highly regulated domains have made the audit and verifiability of decisions mandatory, increasing the demand for the ability to question, understand, and trust machine learning systems, for which interpretability is indispensable. The research community has recognized this interpretability problem and focused on developing both interpretable models and explanation methods over the past few years. However, the emergence of these methods shows there is no consensus on how to assess the explanation quality. Which are the most suitable metrics to assess the quality of an explanation? The aim of this article is to provide a review of the current state of the research field on machine learning interpretability while focusing on the societal impact and on the developed methods and metrics. Furthermore, a complete literature review is presented in order to identify future directions of work on this field.",8,8,832,832,Interpretability;Computer science;Meaning (existential);Artificial intelligence;Field (mathematics);Audit;Quality (philosophy);Data science;Machine learning;Management science;Knowledge management;Risk analysis (engineering);Engineering;Psychology;Epistemology,PT,"He K, 2016, ;Maaten L, 2008, JOURNAL OF MACHINE LEARNING RESEARCH;Friedman J, 2001, THE ANNALS OF STATISTICS;Ribeiro M, 2016, ;Hartigan J, 1979, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);David F, 1977, BIOMETRICS;Nickerson R, 1998, REVIEW OF GENERAL PSYCHOLOGY;Breiman L, 2001, STATISTICAL SCIENCE;Heider F, 1944, THE AMERICAN JOURNAL OF PSYCHOLOGY;Goldstein A, 2014, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Herlocker J, 2000, ;Kahneman D, 1982, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Montavon G, 2016, PATTERN RECOGNITION;Friedman J, 2008, THE ANNALS OF APPLIED STATISTICS;Andrews R, 1995, KNOWLEDGE-BASED SYSTEMS;Cowan N, 2010, CURRENT DIRECTIONS IN PSYCHOLOGICAL SCIENCE;Lakkaraju H, 2016, ;Golovin D, 2017, ;Freitas A, 2014, ACM SIGKDD EXPLORATIONS NEWSLETTER;Kim B, 2016, NEURAL INFORMATION PROCESSING SYSTEMS;Lou Y, 2012, ;Cramer H, 2008, USER MODELING AND USER-ADAPTED INTERACTION;Kahng M, 2017, IEEE TRANSACTIONS ON VISUALIZATION AND COMPUTER GRAPHICS;Lent M, 2004, ;Wu M, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Wang T, 2017, ;Donnelly C, 2010, ASTIN BULLETIN;Goodman B, 2016, ARXIV (CORNELL UNIVERSITY);Temizer S, 2010, AIAA GUIDANCE, NAVIGATION, AND CONTROL CONFERENCE;Fisher A, 2018, ARXIV (CORNELL UNIVERSITY);Angelino E, 2017, ;Weller A, 2017, ARXIV (CORNELL UNIVERSITY);Rudziński F, 2015, APPLIED SOFT COMPUTING;Rüping S, 2006, ;Melle W, 2005, ;Keil F, 2004, ;Swartout W, 1983, COMPUTER COMPACTS;Hinton G, 2015, ARXIV (CORNELL UNIVERSITY);Lundberg S, 2017, ARXIV (CORNELL UNIVERSITY);Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY);Springenberg J, 2014, ARXIV (CORNELL UNIVERSITY);Smilkov D, 2017, ARXIV (CORNELL UNIVERSITY);Doran D, 2017, ARXIV (CORNELL UNIVERSITY);Frosst N, 2017, ARXIV (CORNELL UNIVERSITY);Polino A, 2018, ARXIV (CORNELL UNIVERSITY);Apley D, 2016, ARXIV (CORNELL UNIVERSITY);Bastani O, 2017, ARXIV (CORNELL UNIVERSITY);Kim B, 2015, ARXIV (CORNELL UNIVERSITY);Murdoch W, 2017, ARXIV (CORNELL UNIVERSITY);Tan S, 2017, ARXIV (CORNELL UNIVERSITY);Selvaraju R, 2017, ;Adadi A, 2018, IEEE ACCESS;Ribeiro M, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Holzinger A, 2019, WILEY INTERDISCIPLINARY REVIEWS DATA MINING AND KNOWLEDGE DISCOVERY;Wachter S, 2017, INTERNATIONAL DATA PRIVACY LAW;Thelisson E, 2018, ;Abdul A, 2018, ;Gunning D, 2019, ;Pope P, 2019, ;Molnar C, 2020, ;Zhang Q, 2019, ;Zhang J, 2018, IEEE TRANSACTIONS ON VISUALIZATION AND COMPUTER GRAPHICS;Robnik‐Šikonja M, 2018, HUMAN-COMPUTER INTERACTION SERIES;Doshi‐Velez F, 2018, ˜THE œSPRINGER SERIES ON CHALLENGES IN MACHINE LEARNING;Rudin C, 2018, ARXIV (CORNELL UNIVERSITY);Staniak M, 2018, LINCOLN (UNIVERSITY OF NEBRASKA);Gilpin L, 2018, ARXIV (CORNELL UNIVERSITY);Rudin C, 2018, INFORMS JOURNAL ON APPLIED ANALYTICS;Staniak M, 2019, THE R JOURNAL;Stoyanov D, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Bibal A, 2016, REPOSITORY OF THE UNIVERSITY OF NAMUR;Silva W, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Case N, 2018, JOURNAL OF DESIGN AND SCIENCE;Palangi H, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Müller H, 2021, ARTIFICIAL INTELLIGENCE;Fahner G, 2018, ;Shah N, 2020, INTERNATIONAL JOURNAL OF INNOVATIVE TECHNOLOGY AND EXPLORING ENGINEERING;Wagner J, 2019, ;Lim B, 2019, ;, , VIEW;Jolliffe I, 2014, WILEY STATSREF: STATISTICS REFERENCE ONLINE;Kim B, 2017, ARXIV (CORNELL UNIVERSITY);Kim B, 2017, ARXIV (CORNELL UNIVERSITY);Sundararajan M, 2017, ARXIV (CORNELL UNIVERSITY);Hardt M, 2016, ARXIV (CORNELL UNIVERSITY);Wu M, 2017, ARXIV (CORNELL UNIVERSITY);Kindermans P, 2017, ARXIV (CORNELL UNIVERSITY);Dash S, 2018, ARXIV (CORNELL UNIVERSITY);Lage I, 2018, ARXIV (CORNELL UNIVERSITY);Honegger M, 2018, ARXIV (CORNELL UNIVERSITY);Kirsch A, 2017, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Varshney K, 2018, ARXIV (CORNELL UNIVERSITY);Jolliffe I, 2011, INTERNATIONAL ENCYCLOPEDIA OF STATISTICAL SCIENCE;Lipton P, 1990, ROYAL INSTITUTE OF PHILOSOPHY SUPPLEMENT;Hossain M, 2021, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Yang H, 2016, ",,,OPENALEX,"Carvalho D, 2019, ELECTRONICS","Carvalho D, 2019, ELECTRONICS" +https://openalex.org/W2795867722,10.1016/j.cell.2018.03.034,Machine Learning Identifies Stemness Features Associated with Oncogenic Dedifferentiation,2018,en,article,2345,CELL,Cell,Tathiane M. Malta;Artem Sokolov;Andrew J. Gentles;Tomasz Burzykowski;Laila Poisson;John N. Weinstein;Bożena Kamińska;Joerg Huelsken;Larsson Omberg;Olivier Gevaert;Antonio Colaprico;Patrycja Czerwińska;Sylwia Mazurek;Lopa Mishra;Holger Heyn;A. Krasnitz;Andrew K. Godwin;Alexander J. Lazar;Rory Johnson;John A. Demchok;Ina Felau;Melpomeni Kasapi;Martin L. Ferguson;Carolyn M. Hutter;Heidi J. Sofia;Roy Tarnuzzer;Zhining Wang;Liming Yang;Jean C. Zenklusen;Jiashan Zhang;Sudha Chudamani;Jia Liu;Laxmi Lolla;Rashi Naresh;Todd Pihl;Qiang Sun;Yunhu Wan;Ye Wu;Juok Cho;Timothy Defreitas;Scott Frazer;Nils Gehlenborg;Gad Getz;David I. Heiman;Jaegil Kim;Michael S. Lawrence;Pei Lin;Sam Meier;Michael S. Noble;Gordon Saksena;Doug Voet;Hailei Zhang;Brady Bernard;Nyasha Chambwe;Varsha Dhankani;Theo Knijnenburg;Roger Kramer;Kalle Leinonen;Yuexin Liu;Michael B. Miller;Sheila M. Reynolds;Ilya Shmulevich;Vésteinn Thórsson;Wei Zhang;Rehan Akbani;Bradley M. Broom;Apurva M. Hegde;Zhenlin Ju;Rupa S. Kanchi;Anil Korkut;Jun Li;Han Liang;Shiyun Ling;Wenbin Liu;Yiling Lu;Gordon B. Mills;Kwok-Shing Ng;Arvind Rao;Michael T. Ryan;Jing Wang;John N. Weinstein;Jiexin Zhang;Adam Abeshouse;Joshua Armenia;Debyani Chakravarty;Walid K. Chatila;Ino de Bruijn;Galen F. Gao;Benjamin Groß;Zachary Heins;Ritika Kundra;Konnor La;Marc Ladanyi;Augustin Luna;Moriah G. Nissan;Angelica Ochoa;Sarah Phillips;Ed Reznik;Francisco Sánchez-Vega;Chris Sander,Tathiane M. Malta;Artem Sokolov;Andrew J. Gentles;Tomasz Burzykowski;Laila Poisson;John N. Weinstein;Bożena Kamińska;Joerg Huelsken;Larsson Omberg;Olivier Gevaert;Antonio Colaprico;Patrycja Czerwińska;Sylwia Mazurek;Lopa Mishra;Holger Heyn;Alex Krasnitz;Andrew K. Godwin;Alexander J. Lazar;Samantha J. Caesar-Johnson;John A. Demchok;Ina Felau;Melpomeni Kasapi;Martin L. Ferguson;Carolyn M. Hutter;Heidi J. Sofia;Roy Tarnuzzer;Zhining Wang;Liming Yang;Jean C. Zenklusen;Jiashan (Julia) Zhang;Sudha Chudamani;Jia Liu;Laxmi Lolla;Rashi Naresh;Todd Pihl;Qiang Sun;Yunhu Wan;Ye Wu;Juok Cho;Timothy DeFreitas;Scott Frazer;Nils Gehlenborg;Gad Getz;David I. Heiman;Jaegil Kim;Michael S. Lawrence;Pei Lin;Sam Meier;Michael S. Noble;Gordon Saksena;Doug Voet;Hailei Zhang;Brady Bernard;Nyasha Chambwe;Varsha Dhankani;Theo Knijnenburg;Roger Kramer;Kalle Leinonen;Yuexin Liu;Michael Miller;Sheila Reynolds;Ilya Shmulevich;Vesteinn Thorsson;Wei Zhang;Rehan Akbani;Bradley M. Broom;Apurva M. Hegde;Zhenlin Ju;Rupa S. Kanchi;Anil Korkut;Jun Li;Han Liang;Shiyun Ling;Wenbin Liu;Yiling Lu;Gordon B. Mills;Kwok-Shing Ng;Arvind Rao;Michael Ryan;Jing Wang;John N. Weinstein;Jiexin Zhang;Adam Abeshouse;Joshua Armenia;Debyani Chakravarty;Walid K. Chatila;Ino de Bruijn;Jianjiong Gao;Benjamin E. Gross;Zachary J. Heins;Ritika Kundra;Konnor La;Marc Ladanyi;Augustin Luna;Moriah G. Nissan;Angelica Ochoa;Sarah M. Phillips;Ed Reznik;Francisco Sanchez-Vega;Chris Sander,"Henry Ford Health System, Detroit, MI 48202, USA University of São Paulo, Ribeirão Preto-SP 14049, Brazil;Henry Ford Health System, Detroit, MI 48202, USA;University of São Paulo, Ribeirão Preto-SP 14049, Brazil;Harvard Medical School, Boston, MA 02115, USA;Stanford University, Palo Alto, CA 94305, USA;Hasselt University, 3590 Diepenbeek, Belgium;Henry Ford Health System, Detroit, MI 48202, USA;The University of Texas MD Anderson Cancer Center, Houston, TX 77030, USA;Nencki Institute of Experimental Biology of PAS, 02093 Warsaw, Poland;Swiss Federal Institute of Technology Lausanne (EPFL), CH-1015 Lausanne Switzerland;Swiss Federal Institute of Technology Lausanne (EPFL), CH-1015 Lausanne;Switzerland;Sage Bionetworks, Seattle, WA 98109 USA;Stanford University, Palo Alto, CA 94305, USA;Université Libre de Bruxelles, 1050 Bruxelles, Belgium Interuniversity Institute of Bioinformatics in Brussels (IB)(2), 1050 Bruxelles Belgium;Belgium;Interuniversity Institute of Bioinformatics in Brussels (IB)2, 1050 Bruxelles;Université Libre de Bruxelles, 1050 Bruxelles, Belgium;Poznań University of Medical Sciences, 61701 Poznań, Poland;Poznań University of Medical Sciences, 61701 Poznań, Poland Postgraduate School of Molecular Medicine, Medical University of Warsaw, 02109 Warsaw, Poland;Postgraduate School of Molecular Medicine, Medical University of Warsaw, 02109 Warsaw, Poland;Poznań University of Medical Sciences, 61701 Poznań, Poland;George Washington University, Washington, D.C. 20052, USA;Centre for Genomic Regulation (CNAG-CRG), 08003 Barcelona, Spain;Cold Spring Harbor Laboratory, Cold Spring Harbor, NY 11724, USA;University of Kansas Medical Center, Kansas City, KS 66160, USA;The University of Texas MD Anderson Cancer Center, Houston, TX 77030, USA;The University of Texas MD Anderson Cancer Center, Houston, TX 77030, USA","Antonio Colaprico (corresponding author), Université Libre de Bruxelles, 1050 Bruxelles, Belgium; Interuniversity Institute of Bioinformatics in Brussels (IB)(2), 1050 Bruxelles; Belgium; Belgium; Interuniversity Institute of Bioinformatics in Brussels (IB)2, 1050 Bruxelles; Université Libre de Bruxelles, 1050 Bruxelles, Belgium",,173,2,338,354.e15,Biology;Phenotype;Tumor microenvironment;Induced pluripotent stem cell;Epigenetics;Cancer stem cell;Transcriptome;Progenitor cell;Cancer research;Stem cell;Cancer;Computational biology;Genetics;Embryonic stem cell;Gene;Gene expression,US;BR;BE;PL;CH;IN;ES,"Benjamini Y, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hanahan D, 2011, CELL;Subramanian A, 2005, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Therneau T, 2000, STATISTICS IN THE HEALTH SCIENCES;Kundaje A, 2015, NATURE;Chen D, 2013, IMMUNITY;Lamb J, 2006, SCIENCE;Colaprico A, 2015, NUCLEIC ACIDS RESEARCH;Gregory P, 2008, NATURE CELL BIOLOGY;Gentles A, 2015, NATURE MEDICINE;Ben‐Porath I, 2008, NATURE GENETICS;Noushmehr H, 2010, CANCER CELL;Turcan Ş, 2012, NATURE;Lü C, 2012, NATURE;Sturm D, 2012, CANCER CELL;Gonzalez D, 2014, SCIENCE SIGNALING;Hoadley K, 2014, CELL;Visvader J, 2012, CELL STEM CELL;Young R, 2011, CELL;Eppert K, 2011, NATURE MEDICINE;Ivanova N, 2006, NATURE;Kim J, 2010, CELL;Mak M, 2015, CLINICAL CANCER RESEARCH;Friedmann‐Morvinski D, 2014, EMBO REPORTS;Sato N, 2003, DEVELOPMENTAL BIOLOGY;Nazor K, 2012, CELL STEM CELL;Gentles A, 2010, JAMA;Venezia T, 2004, PLOS BIOLOGY;Tellez C, 2011, CANCER RESEARCH;Agarwal S, 2005, ILLINOIS DIGITAL ENVIRONMENT FOR ACCESS TO LEARNING AND SCHOLARSHIP (UNIVERSITY OF ILLINOIS AT URBANA-CHAMPAIGN);Yao L, 2015, GENOME BIOLOGY;Bao B, 2011, JOURNAL OF CELLULAR BIOCHEMISTRY;Bai X, 2004, WORLD JOURNAL OF GASTROENTEROLOGY;Cheng H, 2015, CELL DEATH AND DISEASE;Pinto J, 2015, NUCLEIC ACIDS RESEARCH;Chiao M, 2013, AUTOPHAGY;Yan X, 2011, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Kim J, 2011, GENOME MEDICINE;Xue Y, 2012, JOURNAL OF TRANSLATIONAL MEDICINE;Wen N, 2014, JOURNAL OF TRANSLATIONAL MEDICINE;Wong C, 2015, ONCOTARGET;Bertucci F, 2015, ONCOIMMUNOLOGY;Volate S, 2010, MOLECULAR CANCER THERAPEUTICS;Palmer N, 2012, GENOME BIOLOGY;Li Z, 2015, ONCOTARGETS AND THERAPY;Mathur D, 2008, GENOME BIOLOGY;Gevaert O, 2013, INTERFACE FOCUS;Reyngold M, 2014, PLOS ONE;Therneau T, 2000, STATISTICS IN THE HEALTH SCIENCES;Kang W, 2012, THE KAOHSIUNG JOURNAL OF MEDICAL SCIENCES;Subramanian A, 2017, CELL;Tomczak K, 2015, WSPÓŁCZESNA ONKOLOGIA;Zaretsky J, 2016, NEW ENGLAND JOURNAL OF MEDICINE;Shibue T, 2017, NATURE REVIEWS CLINICAL ONCOLOGY;Ceccarelli M, 2016, CELL;Tirosh I, 2016, NATURE;Bradner J, 2017, CELL;Chung W, 2017, NATURE COMMUNICATIONS;Spitzer M, 2017, CELL;Robinson D, 2017, NATURE;Ng S, 2016, NATURE;Lyssiotis C, 2017, TRENDS IN CELL BIOLOGY;Zheng S, 2016, CANCER CELL;Ge Y, 2017, CELL;Papillon‐Cavanagh S, 2017, NATURE GENETICS;Dolma S, 2016, CANCER CELL;Souza C, 2018, CELL REPORTS;Kooreman N, 2018, CELL STEM CELL;Wang M, 2015, HEPATOLOGY;Fabregat I, 2016, JOURNAL OF CLINICAL MEDICINE;Salomonis N, 2016, STEM CELL REPORTS;Onuchic V, 2016, CELL REPORTS;Xu L, 2016, INTERNATIONAL JOURNAL OF ONCOLOGY;Sokolov A, 2015, ;Economopoulou P, 2016, ANNALS OF TRANSLATIONAL MEDICINE;Daily K, 2017, SCIENTIFIC DATA;Polónia A, 2017, JOURNAL OF CLINICAL PATHOLOGY;Tsai S, 2017, MOLECULAR CARCINOGENESIS;Yue H, 2016, BIOMED RESEARCH INTERNATIONAL;Gingold J, 2016, TRENDS IN CANCER;Fuereder T, 2016, MEMO - MAGAZINE OF EUROPEAN MEDICAL ONCOLOGY;Team R, 2000, ;Colaprico A, 2018, BIORXIV (COLD SPRING HARBOR LABORATORY);Silva T, 2017, BIORXIV (COLD SPRING HARBOR LABORATORY)",,,OPENALEX,"Malta T, 2018, CELL","Malta T, 2018, CELL" +https://openalex.org/W2601810315,10.1093/bioinformatics/btx180,Trainable Weka Segmentation: a machine learning tool for microscopy pixel classification,2017,en,article,2478,BIOINFORMATICS,Bioinformatics,Ignacio Arganda‐Carreras;Verena Kaynig;Curtis Rueden;Kevin W. Eliceiri;Johannes Schindelin;Albert Cardona;H. Sebastian Seung,Ignacio Arganda-Carreras;Verena Kaynig;Curtis Rueden;Kevin W Eliceiri;Johannes Schindelin;Albert Cardona;H Sebastian Seung,"Computer Science and Artificial Intelligence Department, Basque Country University, San Sebastian, Spain;Donostia International Physics Center, San Sebastian, Spain;Ikerbasque, Basque Foundation for Science, Bilbao, Spain;Harvard John A. Paulson School of Engineering and Applied Sciences, Harvard University, Cambridge, MA, USA;Laboratory for Optical and Computational Instrumentation, University of Wisconsin, Madison, WI, USA;Laboratory for Optical and Computational Instrumentation, University of Wisconsin, Madison, WI, USA;Laboratory for Optical and Computational Instrumentation, University of Wisconsin, Madison, WI, USA;Howard Hughes Medical Institute, Janelia Research Campus, Ashburn, VA, USA;Neuroscience Institute and Computer Science Department, Princeton University, NJ, USA","Ignacio Arganda-Carreras (corresponding author), Computer Science and Artificial Intelligence Department, Basque Country University, San Sebastian, Spain; Donostia International Physics Center, San Sebastian, Spain; Ikerbasque, Basque Foundation for Science, Bilbao, Spain","SUMMARY: State-of-the-art light and electron microscopes are capable of acquiring large image datasets, but quantitatively evaluating the data often involves manually annotating structures of interest. This process is time-consuming and often a major bottleneck in the evaluation pipeline. To overcome this problem, we have introduced the Trainable Weka Segmentation (TWS), a machine learning tool that leverages a limited number of manual annotations in order to train a classifier and segment the remaining data automatically. In addition, TWS can provide unsupervised segmentation learning schemes (clustering) and can be customized to employ user-designed image features or classifiers. AVAILABILITY AND IMPLEMENTATION: TWS is distributed as open-source software as part of the Fiji image processing distribution of ImageJ at http://imagej.net/Trainable_Weka_Segmentation . CONTACT: ignacio.arganda@ehu.eus. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.",33,15,2424,2426,Computer science;Segmentation;Artificial intelligence;Pipeline (software);Bottleneck;Cluster analysis;Classifier (UML);Software;Pixel;Pattern recognition (psychology);Image segmentation;Process (computing);Machine learning;Data mining,ES;US,"Schindelin J, 2012, NATURE METHODS;Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Kamentsky L, 2011, BIOINFORMATICS;Marée R, 2016, BIOINFORMATICS;Dietz C, 2016, ADVANCES IN ANATOMY, EMBRYOLOGY AND CELL BIOLOGY;Vos W, 2016, ADVANCES IN ANATOMY, EMBRYOLOGY AND CELL BIOLOGY",,,OPENALEX,"Arganda‐Carreras I, 2017, BIOINFORMATICS","Arganda‐Carreras I, 2017, BIOINFORMATICS" +https://openalex.org/W3011460294,,"Genetic Algorithms in Search, Optimization & Machine Learning",1989,en,article,2309,MEDICAL ENTOMOLOGY AND ZOOLOGY,Medical Entomology and Zoology,D. E. Goldberg,D. E. Goldberg,,"D. E. Goldberg (corresponding author), ",,,,,,Computer science;Artificial intelligence;Machine learning;Quality control and genetic algorithms;Algorithm;Genetic algorithm;Meta-optimization,,,,,OPENALEX,"Goldberg D, 1989, MEDICAL ENTOMOLOGY AND ZOOLOGY","Goldberg D, 1989, MEDICAL ENTOMOLOGY AND ZOOLOGY" +https://openalex.org/W2798701005,10.1126/science.aat8084,All-optical machine learning using diffractive deep neural networks,2018,en,article,2423,SCIENCE,Science,Xing Lin;Yair Rivenson;Nezih Tolga Yardimci;Muhammed Veli;Yi Luo;Mona Jarrahi;Aydogan Özcan,Xing Lin;Yair Rivenson;Nezih T. Yardimci;Muhammed Veli;Yi Luo;Mona Jarrahi;Aydogan Ozcan,"California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Bioengineering, University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Bioengineering, University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Bioengineering, University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Bioengineering, University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA;Department of Bioengineering, University of California, Los Angeles, CA 90095, USA;Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA;Department of Surgery, David Geffen School of Medicine, University of California, Los Angeles, CA 90095, USA","Aydogan Ozcan (corresponding author), California NanoSystems Institute (CNSI), University of California, Los Angeles, CA 90095, USA; Department of Bioengineering, University of California, Los Angeles, CA 90095, USA; Department of Electrical and Computer Engineering, University of California, Los Angeles, CA 90095, USA; Department of Surgery, David Geffen School of Medicine, University of California, Los Angeles, CA 90095, USA","All-optical deep learning Deep learning uses multilayered artificial neural networks to learn digitally from large datasets. It then performs advanced identification and classification tasks. To date, these multilayered neural networks have been implemented on a computer. Lin et al. demonstrate all-optical machine learning that uses passive optical components that can be patterned and fabricated with 3D-printing. Their hardware approach comprises stacked layers of diffractive optical elements analogous to an artificial neural network that can be trained to execute complex functions at the speed of light. Science , this issue p. 1004",361,6406,1004,1008,Computer science;Deep learning;Artificial intelligence;Artificial neural network;Feature (linguistics);Lithography;Deep neural networks;Pattern recognition (psychology);Computer architecture;Computer vision;Materials science;Optoelectronics,US,"LeCun Y, 2015, NATURE;LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Wang Z, 2004, IEEE TRANSACTIONS ON IMAGE PROCESSING;Srivastava N, 2014, ;Cho K, 2014, ;Silver D, 2016, NATURE;Litjens G, 2017, MEDICAL IMAGE ANALYSIS;Graves A, 2013, ;Yu N, 2014, NATURE MATERIALS;Sutton P, 1996, QUANTUM AND SEMICLASSICAL OPTICS JOURNAL OF THE EUROPEAN OPTICAL SOCIETY PART B;Cireşan D, 2012, ;Khorasaninejad M, 2016, SCIENCE;Shen Y, 2017, NATURE PHOTONICS;Kildishev A, 2013, SCIENCE;Chen Y, 2014, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;Jin K, 2017, IEEE TRANSACTIONS ON IMAGE PROCESSING;Grischkowsky D, 1990, JOURNAL OF THE OPTICAL SOCIETY OF AMERICA B;Kazhdan M, 2013, ACM TRANSACTIONS ON GRAPHICS;Hammernik K, 2017, MAGNETIC RESONANCE IN MEDICINE;Ranzato M, 2007, THE MIT PRESS EBOOKS;Rivenson Y, 2017, LIGHT SCIENCE & APPLICATIONS;Brunner D, 2013, NATURE COMMUNICATIONS;Sinha A, 2017, OPTICA;Rivenson Y, 2017, OPTICA;Greenbaum A, 2012, NATURE METHODS;Kamilov U, 2015, OPTICA;Özcan A, 2016, ANNUAL REVIEW OF BIOMEDICAL ENGINEERING;Greenbaum A, 2014, SCIENCE TRANSLATIONAL MEDICINE;Psaltis D, 1990, NATURE;Riedhammer K, 2012, INTERNATIONAL CONFERENCE ON ACOUSTICS, SPEECH, AND SIGNAL PROCESSING;Reinert B, 2013, ACM TRANSACTIONS ON GRAPHICS;Shen Y, 2017, ;Marini A, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Rivenson Y, 2018, ACS PHOTONICS;Greenbaum A, 2013, SCIENTIFIC REPORTS;Emons M, 2012, OPTICAL MATERIALS EXPRESS;Bianchi V, 2017, NATURE COMMUNICATIONS;Shastri B, 2018, ;Hermans M, 2015, NATURE COMMUNICATIONS;Xiao Y, 2018, ACS PHOTONICS;Yin X, 2012, OPTICS EXPRESS;Wagner K, 2017, ;, 1993, ;Cignoni P, 2008, ISTI OPEN PORTAL",,,OPENALEX,"Lin X, 2018, SCIENCE","Lin X, 2018, SCIENCE" +https://openalex.org/W2162651021,10.7551/mitpress/9780262170055.001.0001,Dataset Shift in Machine Learning,2008,en,book,1516,THE MIT PRESS EBOOKS,The MIT Press eBooks,,,,,"An overview of recent efforts in the machine learning community to deal with dataset and covariate shift, which occurs when test and training inputs and outputs have different distributions. Dataset shift is a common problem in predictive modeling that occurs when the joint distribution of inputs and outputs differs between training and test stages. Covariate shift, a particular case of dataset shift, occurs when only the input distribution changes. Dataset shift is present in most practical applications, for reasons ranging from the bias introduced by experimental design to the irreproducibility of the testing conditions at training time. (An example is -email spam filtering, which may fail to recognize spam that differs in form from the spam the automatic filter has been built on.) Despite this, and despite the attention given to the apparently similar problems of semi-supervised learning and active learning, dataset shift has received relatively little attention in the machine learning community until recently. This volume offers an overview of current efforts to deal with dataset and covariate shift. The chapters offer a mathematical and philosophical introduction to the problem, place dataset shift in relationship to transfer learning, transduction, local learning, active learning, and semi-supervised learning, provide theoretical views of dataset and covariate shift (including decision theoretic and Bayesian perspectives), and present algorithms for covariate shift. Contributors Shai Ben-David, Steffen Bickel, Karsten Borgwardt, Michael Brückner, David Corfield, Amir Globerson, Arthur Gretton, Lars Kai Hansen, Matthias Hein, Jiayuan Huang, Choon Hui Teo, Takafumi Kanamori, Klaus-Robert Müller, Sam Roweis, Neil Rubens, Tobias Scheffer, Marcel Schmittfull, Bernhard Schölkopf Hidetoshi Shimodaira, Alex Smola, Amos Storkey, Masashi Sugiyama",,,,,Artificial intelligence;Machine learning;Covariate;Computer science;Bayesian probability;Paradigm shift;Filter (signal processing),,"Gordon A, 1984, BIOMETRICS;Rissanen J, 1978, AUTOMATICA;Platt J, 1998, THE MIT PRESS EBOOKS;Madden E, 1955, PHILOSOPHY AND PHENOMENOLOGICAL RESEARCH;Ruppert D, 1987, TECHNOMETRICS;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Japkowicz N, 2002, INTELLIGENT DATA ANALYSIS;Heckman J, 1974, ECONOMETRICA;Elkan C, 2001, ;Dhanasekaran S, 2001, NATURE;Амари Ш, 1985, LECTURE NOTES IN STATISTICS;Shimodaira H, 2000, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Manski C, 1977, ECONOMETRICA;Akbani R, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Vella F, 1998, THE JOURNAL OF HUMAN RESOURCES;Kimeldorf G, 1970, THE ANNALS OF MATHEMATICAL STATISTICS;Welsh J, 2001, PUBMED;Wells P, 2001, PHYSIOLOGICAL MEASUREMENT;Zadrozny B, 2004, ;Sugiyama M, 2007, ;Veropoulos K, 1999, ;LeCun Y, 1995, ;Manski C, 1989, THE JOURNAL OF HUMAN RESOURCES;Lee L, 1982, THE REVIEW OF ECONOMIC STUDIES;Zadrozny B, 2001, ;Ben-David S, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Gilad-Bachrach R, 2004, ;Abe N, 1998, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Lowd D, 2005, CONFERENCE ON EMAIL AND ANTI-SPAM;Warnat P, 2005, BMC BIOINFORMATICS;Teo C, 2007, ;Wittel G, 2004, ;Dai Y, 2005, MATHEMATICAL PROGRAMMING;Gibbs M, 1997, ;Cohen S, 2005, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Steinwart I, 2002, JOURNAL OF COMPLEXITY;Basu A, 1994, ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS;Haussler D, 1997, THE ANNALS OF STATISTICS;Field C, 1994, INTERNATIONAL STATISTICAL REVIEW;Bonilla E, 2007, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Hein M, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Shimodaira H, 2007, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Horowitz J, 2005, JOURNAL OF ECONOMETRICS;Sugiyama M, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Kanamori T, 2003, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Lawrence N, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Sugiyama M, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Smith A, 2004, ;Warmuth M, 2003, MPG.PURE (MAX PLANCK SOCIETY);Sugiyama M, 2003, ;Chawla N, 2002, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Heckman J, 1979, ECONOMETRICA;Wu Y, 1999, TECHNOMETRICS;Stone M, 1974, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Schölkopf B, 2001, NEURAL COMPUTATION;Jacobs R, 1991, NEURAL COMPUTATION;Zhu X, 2003, ;Ben-David S, 2007, THE MIT PRESS EBOOKS;Tsochantaridis I, 2005, MPG.PURE (MAX PLANCK SOCIETY);Gretton A, 2007, THE MIT PRESS EBOOKS;Huang J, 2007, THE MIT PRESS EBOOKS;Shalev‐Shwartz S, 2010, MATHEMATICAL PROGRAMMING;Taskar B, 2003, ;Mackay D, 1992, NEURAL COMPUTATION;Weston J, 2000, ;Williams C, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Gruvberger S, 2001, PUBMED;Sugiyama M, 2007, ;Nedić A, 2001, SIAM JOURNAL ON OPTIMIZATION;Fine S, 2002, ;Xue Y, 2007, ;Zhu X, 2003, ;Pfeffermann D, 1998, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Fukumizu K, 2007, ;Lindsay B, 1994, THE ANNALS OF STATISTICS;Rasmussen C, 2001, ;Batu T, 2002, ;Watanabe S, 2001, NEURAL COMPUTATION;Tresp V, 2000, ;Mendelson S, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Kim S, 2005, ;Sugiyama M, 2006, JOURNAL OF MACHINE LEARNING RESEARCH;Bousquet O, 2003, ;Teo C, 2007, ;Fukumizu K, 2000, IEEE TRANSACTIONS ON NEURAL NETWORKS;Sugiyama M, 2001, NEURAL COMPUTATION;Wiens D, 2000, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Graepel T, 2003, ;Bach F, 2007, THE MIT PRESS EBOOKS;Rosset S, 2004, ;Kanamori T, 2007, NEUROCOMPUTING;Sugiyama M, 2000, NEURAL COMPUTATION;Sugiyama M, 2004, NEURAL COMPUTATION;He T, 2006, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Hansen L, 1999, ;Krupka E, 2005, ;Osuna E, 1997, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Sung H, 2004, RICE DIGITAL SCHOLARSHIP ARCHIVE (RICE UNIVERSITY);Sugiyama M, 2007, IEICE TRANSACTIONS ON FUNDAMENTALS OF ELECTRONICS COMMUNICATIONS AND COMPUTER SCIENCES;Lu Z, 2010, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Yang Y, 1997, ;Hoeffding W, 1963, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Dehnad K, 1987, TECHNOMETRICS;Zhou D, 2003, MPG.PURE (MAX PLANCK SOCIETY);Abe S, 2001, ;Rosenbaum P, 1983, BIOMETRIKA;, 2000, CHOICE REVIEWS ONLINE;Schölkopf B, 2000, NEURAL COMPUTATION;Singh D, 2002, CANCER CELL;Herzberg A, 1972, BIOMETRIKA;Lunceford J, 2004, STATISTICS IN MEDICINE;Blitzer J, 2006, ;Drucker H, 1999, IEEE TRANSACTIONS ON NEURAL NETWORKS;McDiarmid C, 1989, CAMBRIDGE UNIVERSITY PRESS EBOOKS;West M, 2001, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Dudley R, 2002, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Heckman J, 1990, AMERICAN ECONOMIC REVIEW;Dalvi N, 2004, ;Ziegel E, 1994, TECHNOMETRICS;Baxter J, 2000, ;Green P, 1984, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Skinner C, 1992, BIOMETRICS;Dubin J, 1989, SOCIOLOGICAL METHODS & RESEARCH;Smola A, 2005, ;Mackay D, 1992, ;Bush R, 1956, THE AMERICAN JOURNAL OF PSYCHOLOGY;Dudı́k M, 2005, ;R.Black T, 2002, ;Jordan M, 2005, ;Chawla N, 2005, ;Sugiyama M, 2005, STATISTICS & DECISIONS;Storkey A, 2007, THE MIT PRESS EBOOKS;Corfield D, 2003, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Schmidt M, 2002, ;KIFER D, 2004, ELSEVIER EBOOKS;Windham M, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Bickel S, 2007, THE MIT PRESS EBOOKS;Winship C, 1992, ANNUAL REVIEW OF SOCIOLOGY;Chapelle O, 2007, THE MIT PRESS EBOOKS;Bickel S, 2006, ;Sollich P, 1999, ;Sugiyama M, 2002, NEURAL NETWORKS;Leskovec J, 2003, ;Fukumizu K, 1995, ;Lawrence N, 2006, THE MIT PRESS EBOOKS;Rasmussen C, 2022, DIRECTORY OF OPEN ACCESS BOOKS (OAPEN FOUNDATION);Bishop C, 1995, ;Grimshaw S, 1995, TECHNOMETRICS;Cohn D, 1996, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"NA, 2008, THE MIT PRESS EBOOKS","NA, 2008, THE MIT PRESS EBOOKS" +https://openalex.org/W2498672755,10.1007/978-1-4419-9326-7,Ensemble Machine Learning,2012,en,book,1627,,,,,,,,,,,,Variety (cybernetics);Ensemble learning;Computer science;Process (computing);Artificial intelligence;Machine learning;Data science,,"Vapnik V, 1995, ;1959- B, 1994, CHOICE REVIEWS ONLINE;Ho T, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Ripley B, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Mackay D, 1992, NEURAL COMPUTATION;Neal R, 1996, LECTURE NOTES IN STATISTICS;Møller M, 1993, NEURAL NETWORKS;Krogh A, 1994, ;Krogh A, 1991, ;Liu Y, 1999, NEURAL NETWORKS;Wang S, 2009, ;Minku L, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Brown G, 2005, ;Islam M, 2003, IEEE TRANSACTIONS ON NEURAL NETWORKS;Gestel T, 2002, NEURAL COMPUTATION;Liu Y, 1999, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART B (CYBERNETICS);Chandra A, 2006, JOURNAL OF MATHEMATICAL MODELLING AND ALGORITHMS;Chen H, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Jin Y, 2004, ;Wang S, 2009, ;Chen H, 2012, ",,,OPENALEX,"NA, 2012, ","NA, 2012, " +https://openalex.org/W1994005439,10.1142/s0129065704001899,GAUSSIAN PROCESSES FOR MACHINE LEARNING,2004,en,review,1416,INTERNATIONAL JOURNAL OF NEURAL SYSTEMS,International Journal of Neural Systems,Matthias Seeger,MATTHIAS SEEGER,"Department of EECS, University of California at Berkeley, 485 Soda Hall, Berkeley CA 94720-1776, USA;Previously at: Institute for Adaptive and Neural Computation, +University of Edinburgh, UK","MATTHIAS SEEGER (corresponding author), Department of EECS, University of California at Berkeley, 485 Soda Hall, Berkeley CA 94720-1776, USA; Previously at: Institute for Adaptive and Neural Computation, +University of Edinburgh, UK","Gaussian processes (GPs) are natural generalisations of multivariate Gaussian random variables to infinite (countably or continuous) index sets. GPs have been applied in a large number of fields to a diverse range of ends, and very many deep theoretical analyses of various properties are available. This paper gives an introduction to Gaussian processes on a fairly elementary level with special emphasis on characteristics relevant in machine learning. It draws explicit connections to branches such as spline smoothing models and support vector machines in which similar ideas have been investigated. Gaussian process models are routinely used to solve hard machine learning problems. They are attractive because of their flexible non-parametric nature and computational simplicity. Treated within a Bayesian framework, very powerful statistical methods can be implemented which offer valid estimates of uncertainties in our predictions and generic model selection procedures cast as nonlinear optimization problems. Their main drawback of heavy computational scaling has recently been alleviated by the introduction of generic sparse approximations.13,78,31 The mathematical literature on GPs is large and often uses deep concepts which are not required to fully understand most machine learning applications. In this tutorial paper, we aim to present characteristics of GPs relevant to machine learning and to show up precise connections to other ""kernel machines"" popular in the community. Our focus is on a simple presentation, but references to more detailed sources are provided.",14,02,69,106,Computer science;Gaussian process;Machine learning;Artificial intelligence;Range (aeronautics);Smoothing;Gaussian;Support vector machine;Kriging;Algorithm,US;GB,", 2005, STATISTICS FOR BIOLOGY AND HEALTH;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Roweis S, 2000, SCIENCE;Tenenbaum J, 2000, SCIENCE;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;PE, 1996, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Schölkopf B, 1999, ;Platt J, 1998, THE MIT PRESS EBOOKS;Aronszajn N, 1950, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Platt J, 1999, ;Matheron G, 1963, ECONOMIC GEOLOGY;Neal R, 1996, LECTURE NOTES IN STATISTICS;, 1974, ELSEVIER EBOOKS;1950- G, 2004, ELSEVIER EBOOKS;Jordan M, 1998, ;Stein M, 1999, SPRINGER SERIES IN STATISTICS;Green P, 1994, ;, 2000, THE MIT PRESS EBOOKS;Brookes B, 1951, THE MATHEMATICAL GAZETTE;Matheron G, 1973, ADVANCES IN APPLIED PROBABILITY;Haussler D, 1999, ;Schoenberg I, 1938, ANNALS OF MATHEMATICS;Kimeldorf G, 1970, THE ANNALS OF MATHEMATICAL STATISTICS;Williams C, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Csató L, 2002, NEURAL COMPUTATION;Kondor R, 2002, ;Williams C, 1998, ;Smola A, 1998, NEURAL NETWORKS;Halmos P, 1998, ;Matthias S, 2000, APPLIED PHYSICS LETTERS;Herbrich R, 2002, NEURAL INFORMATION PROCESSING SYSTEMS;Schoenberg I, 1964, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Girard A, 2002, ;Ihara S, 1993, WORLD SCIENTIFIC EBOOKS;Solak E, 2002, ;Williams C, 1998, NEURAL COMPUTATION;Smola A, 2000, ;Zhu H, 1997, ASTON PUBLICATIONS EXPLORER (ASTON UNIVERSITY);Seeger M, 1999, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Sollich P, 1999, ;Wright S, 1999, SIAM JOURNAL ON OPTIMIZATION;Seeger M, 2001, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Minka T, 1999, ;Seeger M, 1999, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Cybenko G, 1990, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Ruján P, 1993, JOURNAL DE PHYSIQUE I;Minka T, 2001, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Mackay D, 1992, ;Neal R, 1997, ARXIV.ORG;Seeger M, 2003, ERA;Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Press W, 1994, ;, 2002, THE MIT PRESS EBOOKS;Cressie N, 1992, TERRA NOVA;Nelder J, 1972, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Cressie N, 1993, WILEY SERIES IN PROBABILITY AND STATISTICS;Müller H, 1991, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Wahba G, 1990, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Poggio T, 1990, PROCEEDINGS OF THE IEEE;Sollich P, 1999, MIT PRESS EBOOKS;Schölkopf B, 2001, MPG.PURE (MAX PLANCK SOCIETY);Krige D, 1951, JOURNAL OF THE SOUTHERN AFRICAN INSTITUTE OF MINING AND METALLURGY;Rathbun S, 2000, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;McCullagh P, 1983, ;Grimmett G, 2001, ;Hawkes J, 1987, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Smith J, 1993, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Kendall W, 1982, BULLETIN OF THE LONDON MATHEMATICAL SOCIETY;Iaglom A, 1987, SPRINGER EBOOKS;O’Hagan A, 1978, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Weiss Y, 2001, NEURAL COMPUTATION;David F, 1952, BIOMETRIKA;Yaglom A, 1987, SPRINGER SERIES IN STATISTICS;Jebara T, 2004, MACHINE LEARNING;Seeger M, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Opper M, 2000, NEURAL COMPUTATION;Jaakkola T, 1999, ;Ihara S, 1993, WORLD SCIENTIFIC PUBLISHING CO. PTE. LTD. EBOOKS;Platt J, 2001, ;Barber D, 1997, ;Cortes C, 2002, NEURAL INFORMATION PROCESSING SYSTEMS;Wainwright M, 2000, ",,,OPENALEX,"Seeger M, 2004, INTERNATIONAL JOURNAL OF NEURAL SYSTEMS","Seeger M, 2004, INTERNATIONAL JOURNAL OF NEURAL SYSTEMS" +https://openalex.org/W2168020168,10.1007/978-0-387-21579-2_9,The Boosting Approach to Machine Learning: An Overview,2003,en,book-chapter,2010,LECTURE NOTES IN STATISTICS,Lecture notes in statistics,Robert E. Schapire,Robert E. Schapire,"AT&T Labs — Research, Shannon Laboratory, 180 Park Avenue, Florham Park, NJ, 07932, USA","Robert E. Schapire (corresponding author), AT&T Labs — Research, Shannon Laboratory, 180 Park Avenue, Florham Park, NJ, 07932, USA",,,,149,171,Boosting (machine learning);Computer science;Machine learning;Artificial intelligence,US,"Friedman J, 2001, THE ANNALS OF STATISTICS;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Boser B, 1992, ;Blake C, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY;Friedman J, 2000, THE ANNALS OF STATISTICS;Salzberg S, 1994, ;Valiant L, 1984, ;Cohen W, 1995, ELSEVIER EBOOKS;Schapire R, 1990, MACHINE LEARNING;Vapnik V, 2015, ;Dietterich T, 2000, MACHINE LEARNING;Dietterich T, 1995, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Schapire R, 1998, ;Bartlett P, 1998, THE ANNALS OF STATISTICS;Vapnik V, 1995, MEDICAL ENTOMOLOGY AND ZOOLOGY;Schapire R, 2000, MACHINE LEARNING;Freund Y, 1995, INFORMATION AND COMPUTATION;Rätsch G, 2001, MACHINE LEARNING;Quinlan J, 1996, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Darroch J, 1972, THE ANNALS OF MATHEMATICAL STATISTICS;Bartlett P, 1998, IEEE TRANSACTIONS ON INFORMATION THEORY;Breiman L, 1998, THE ANNALS OF STATISTICS;Allwein E, 2000, ;Freund Y, 1998, ;Freund Y, 1999, ;Collins M, 2002, MACHINE LEARNING;Drucker H, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Koltchinskii V, 2002, THE ANNALS OF STATISTICS;Hofmann T, 1997, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Demiriz A, 2002, MACHINE LEARNING;Freund Y, 1996, ;Cohen W, 1999, ;Mason L, 2000, THE MIT PRESS EBOOKS;Freund Y, 2001, MACHINE LEARNING;Schapire R, 1997, ;Maclin R, 1997, ;Grove A, 1998, ;Drucker H, 1995, NEURAL INFORMATION PROCESSING SYSTEMS;Schapire R, 1998, ;Kivinen J, 1999, ;Walker M, 2001, ;Mason L, 1998, ;Abney S, 1999, ;Ridgeway G, 1999, ;Lafferty J, 1999, ;Iyer R, 2000, ;Merler S, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Haruno M, 1999, MACHINE LEARNING;Koltchinskii V, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Rochery M, 2007, ;Vapnik V, 1995, ;Cortes C, 1995, MACHINE LEARNING;Quinlan J, 1992, ;Freund Y, 1996, ;Valiant L, 1984, COMMUNICATIONS OF THE ACM;Bauer E, 1999, MACHINE LEARNING;Vapnik V, 1971, THEORY OF PROBABILITY AND ITS APPLICATIONS;Merz C, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY;Schapire R, 1999, MACHINE LEARNING;Blumer A, 1989, JOURNAL OF THE ACM;Baum E, 1989, NEURAL COMPUTATION;, 1978, IEEE TRANSACTIONS ON AEROSPACE AND ELECTRONIC SYSTEMS;Pietra S, 1997, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Kearns M, 1994, JOURNAL OF THE ACM;Mason L, 1999, ;Breiman L, 1999, NEURAL COMPUTATION;Freund Y, 1999, GAMES AND ECONOMIC BEHAVIOR;Collins M, 2000, ;Tieu K, 2002, ;Fürnkranz J, 1994, ELSEVIER EBOOKS;Mozer M, 2000, IEEE TRANSACTIONS ON NEURAL NETWORKS;Duffy N, 2002, MACHINE LEARNING;Drucker H, 1993, INTERNATIONAL JOURNAL OF PATTERN RECOGNITION AND ARTIFICIAL INTELLIGENCE;Lebanon G, 2002, THE MIT PRESS EBOOKS;Escudero G, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Kearns M, 1988, MEDICAL ENTOMOLOGY AND ZOOLOGY;Schwenk H, 1997, NEURAL INFORMATION PROCESSING SYSTEMS;Schapire R, 2001, MACHINE LEARNING;Duffy N, 1999, ;Koltchinskii V, 2000, ;Moreno P, 2001, ;Jackson J, 1995, ;Rätsch G, 2000, CONFERENCE ON LEARNING THEORY;Rochery M, 2002, IEEE INTERNATIONAL CONFERENCE ON ACOUSTICS SPEECH AND SIGNAL PROCESSING",,,OPENALEX,"Schapire R, 2003, LECTURE NOTES IN STATISTICS","Schapire R, 2003, LECTURE NOTES IN STATISTICS" +https://openalex.org/W2095577883,10.1145/2046684.2046692,Adversarial machine learning,2011,en,article,1196,,,Ling Huang;Anthony D. Joseph;Blaine Nelson;Benjamin I. P. Rubinstein;J. D. Tygar,Ling Huang;Anthony D. Joseph;Blaine Nelson;Benjamin I.P. Rubinstein;J. D. Tygar,"Intel Labs Berkeley, Berkeley, CA, USA;Intel Labs. Berkeley, Berkeley, CA, USA;UC Berkeley, Berkeley, CA, USA;University of Tubingen, Tubingen, Germany;Microsoft, Mountain View, CA, USA;UC Berkeley, Berkeley, CA, USA",,"In this paper (expanded from an invited talk at AISEC 2010), we discuss an emerging field of study: adversarial machine learning---the study of effective machine learning techniques against an adversarial opponent. In this paper, we: give a taxonomy for classifying attacks against online machine learning algorithms; discuss application-specific factors that limit an adversary's capabilities; introduce two models for modeling an adversary's capabilities; explore the limits of an adversary's knowledge about the algorithm, feature space, training, and input data; explore vulnerabilities in machine learning algorithms; discuss countermeasures against attacks; introduce the evasion challenge; and discuss privacy-preserving learning techniques.",,,43,58,Adversarial system;Adversary;Computer science;Adversarial machine learning;Artificial intelligence;Machine learning;Evasion (ethics);Field (mathematics);Computer security,US;DE,"Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Hastie T, 2013, ;Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Schölkopf B, 2001, THE MIT PRESS EBOOKS;Bishop C, 2006, SPRINGER EBOOKS;Dwork C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Jurafsky D, 2000, ;Cristianini N, 2000, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Cesa‐Bianchi N, 2006, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Paxson V, 1999, COMPUTER NETWORKS;Flum J, 2006, ;Maronna R, 2006, ;Hofmeyr S, 1998, JOURNAL OF COMPUTER SECURITY;Schohn G, 2000, ;Rieck K, 2011, JOURNAL OF COMPUTER SECURITY;Klimt B, 2004, CONFERENCE ON EMAIL AND ANTI-SPAM;Mahoney M, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Lowd D, 2005, CONFERENCE ON EMAIL AND ANTI-SPAM;Newsome J, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Wittel G, 2004, ;Kloft M, 2010, ;Hall R, 2011, ;Beimel A, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Tan K, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Chung S, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Biggio B, 2010, LECTURE NOTES IN COMPUTER SCIENCE;, 2004, ;Tan K, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Dredze M, 2007, CONFERENCE ON EMAIL AND ANTI-SPAM;Meyer T, 2004, CONFERENCE ON EMAIL AND ANTI-SPAM;Robinson G, 2003, LINUX JOURNAL;Wang Z, 2007, ;Venkataraman S, 2018, FIGSHARE;Sculley D, 2006, ;Nelson B, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Cormack G, 2005, ;Segal R, 2004, CONFERENCE ON EMAIL AND ANTI-SPAM;Chung S, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Rubinstein B, 2008, UC BERKELEY;Xu W, 2004, ;Saini U, 2008, UC BERKELEY;Hohm T, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Rubinstein B, 2010, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Stolfo S, 2003, ;Joseph A, 2013, DROPS (SCHLOSS DAGSTUHL – LEIBNIZ CENTER FOR INFORMATICS);Goyal N, 2009, ARXIV.ORG;Nelson B, 2010, ARXIV (CORNELL UNIVERSITY);Rivest R, 1983, COMMUNICATIONS OF THE ACM;PE, 1996, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Zhao W, 2003, ACM COMPUTING SURVEYS;Valiant L, 1984, ;Ruppert D, 1987, TECHNOMETRICS;Samuel G, 1974, MATHEMATICS OF COMPUTATION;Littlestone N, 1994, INFORMATION AND COMPUTATION;Sommer R, 2010, ;Walter R, 2006, ;Austrin P, 2007, ;Homer N, 2008, PLOS GENETICS;Kimeldorf G, 1971, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Jackson J, 1979, TECHNOMETRICS;Dwork C, 2010, COMMUNICATIONS OF THE ACM;Blum A, 2005, ;Shannon C, 1959, BELL SYSTEM TECHNICAL JOURNAL;Moore D, 2006, ACM TRANSACTIONS ON COMPUTER SYSTEMS;McSherry F, 2009, ;Devlin S, 1981, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Holz T, 2008, ;Kearns M, 1993, SIAM JOURNAL ON COMPUTING;Rubinstein B, 2009, ;Kasiviswanathan S, 2008, ;Croux C, 2004, JOURNAL OF MULTIVARIATE ANALYSIS;Li G, 1985, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Goldman S, 1995, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Smith A, 2011, ;Soule A, 2005, ;Mutz D, 2006, ACM TRANSACTIONS ON INFORMATION AND SYSTEM SECURITY;Rubinstein B, 2012, JOURNAL OF PRIVACY AND CONFIDENTIALITY;Bodík P, 2010, ;Bodík P, 2009, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Maronna R, 2005, TECHNOMETRICS;Fogla P, 2006, ;Laskov P, 2010, MACHINE LEARNING;Wyner A, 1965, BELL SYSTEM TECHNICAL JOURNAL;Rieck K, 2006, JOURNAL IN COMPUTER VIROLOGY;Hössjer O, 1995, JOURNAL OF NONPARAMETRIC STATISTICS;Sculley D, 2011, ;Rubinstein B, 2009, ACM SIGMETRICS PERFORMANCE EVALUATION REVIEW;Liu C, 2007, ;Lovász L, 2004, ;Barth A, 2011, IEEE TRANSACTIONS ON DEPENDABLE AND SECURE COMPUTING;Hall J, 2005, AMERICAN JOURNAL OF PHYSICS;Johnson P, 1955, AMERICAN JOURNAL OF PHYSICS;Feldman V, 2008, ;Kalai A, 2002, ;Duan Y, 2010, ;Brückner M, 2009, ;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Shannon C, 1949, BELL SYSTEM TECHNICAL JOURNAL;Machanavajjhala A, 2007, ACM TRANSACTIONS ON KNOWLEDGE DISCOVERY FROM DATA;Li N, 2007, ;Rahimi A, 2007, ;Plamondon R, 2000, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Narayanan A, 2008, PROCEEDINGS - IEEE SYMPOSIUM ON SECURITY AND PRIVACY/PROCEEDINGS OF THE ... IEEE SYMPOSIUM ON SECURITY AND PRIVACY;Jones D, 2001, JOURNAL OF GLOBAL OPTIMIZATION;Jones D, 1993, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Angluin D, 1988, MACHINE LEARNING;McSherry F, 2007, ;Cauwenberghs G, 2000, ;Warrender C, 2003, ;Dinur I, 2003, ;Chaudhuri K, 2011, PUBMED;Barreno M, 2010, MACHINE LEARNING;Mukkamala S, 2003, ;Wagner D, 2002, ;Newsome J, 2005, ;Dwork C, 2009, ;Chaudhuri K, 2008, ;Barak B, 2007, ;Kearns M, 1999, NEURAL COMPUTATION;Lakhina A, 2004, ;Cormode G, 2012, ;Wagner D, 2004, ;Dwork C, 2009, ;Bahl P, 2007, ;Ringberg H, 2007, ;Hardt M, 2010, ;Mahoney M, 2002, ;Globerson A, 2006, ;Ramachandran A, 2007, ;Bertsimas D, 2004, JOURNAL OF THE ACM;Lovász L, 2005, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Somayaji A, 2000, ;Devroye L, 1979, IEEE TRANSACTIONS ON INFORMATION THEORY;Dasgupta S, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Dwork C, 2007, ;Narayanan A, 2011, ;Christmann A, 2003, TECHNISCHE UNIVERSITÄT DORTMUND ELDORADO (TECHNISCHE UNIVERSITÄT DORTMUND);Cheng Y, 2007, ;Dwork C, 2008, LECTURE NOTES IN COMPUTER SCIENCE;De A, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Smith R, 1996, ;Kantchelian A, 2012, ;Blocki J, 2011, ;Kandula S, 2008, ACM SIGCOMM COMPUTER COMMUNICATION REVIEW;Kantarcıoğlu M, 2010, DATA MINING AND KNOWLEDGE DISCOVERY;Kloft M, 2010, ARXIV (CORNELL UNIVERSITY);Vapnik V, 1995, ;Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Nelder J, 1965, THE COMPUTER JOURNAL;S A, 2005, ;Diffie W, 1976, IEEE TRANSACTIONS ON INFORMATION THEORY;Pearson K, 1901, THE LONDON EDINBURGH AND DUBLIN PHILOSOPHICAL MAGAZINE AND JOURNAL OF SCIENCE;, 2006, ;Sweeney L, 2002, INTERNATIONAL JOURNAL OF UNCERTAINTY FUZZINESS AND KNOWLEDGE-BASED SYSTEMS;Huber P, 1981, WILEY SERIES IN PROBABILITY AND STATISTICS;Faires J, 1978, ;Dwork C, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Kolda T, 2003, SIAM REVIEW;Narayanan A, 2009, ;Bennett J, 2007, ;Lakhina A, 2005, ;Lakhina A, 2004, ;Dalvi N, 2004, ;Barreno M, 2006, ;Forrest S, 2002, ;Lowd D, 2005, ;Blum A, 2008, ;Peressini A, 1988, UNDERGRADUATE TEXTS IN MATHEMATICS;Zhang J, 2012, PROCEEDINGS OF THE VLDB ENDOWMENT;Nelson B, 2008, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Croux C, 2007, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Li X, 2006, ;Zhang⋆ Y, 2005, ;Huang L, 2007, THE MIT PRESS EBOOKS;Denning D, 1979, ACM COMPUTING SURVEYS;Brauckhoff D, 2009, ;Nelson B, 2009, ;Stolfo S, 2006, ACM TRANSACTIONS ON INTERNET TECHNOLOGY;Nelson B, 2012, ;Hall R, 2012, ARXIV (CORNELL UNIVERSITY);Laskov P, 2009, ;Korolova A, 2011, JOURNAL OF PRIVACY AND CONFIDENTIALITY;RobinsonGary, 2003, LINUX JOURNAL;Biggio B, 2012, ARXIV (CORNELL UNIVERSITY);Hardt M, 2010, ARXIV (CORNELL UNIVERSITY);Rubinstein B, 2009, ARXIV.ORG;Gottlieb L, 2010, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Huang L, 2011, ","Huang L, 2011, " +https://openalex.org/W2793927960,10.1080/01431161.2018.1433343,Implementation of machine-learning classification in remote sensing: an applied review,2018,en,article,1936,INTERNATIONAL JOURNAL OF REMOTE SENSING,International Journal of Remote Sensing,Aaron E. Maxwell;Timothy A. Warner;Fang Fang,Aaron E. Maxwell;Timothy A. Warner;Fang Fang,"Department of Geology and Geography, West Virginia University, Morgantown, WV, USA;Department of Geology and Geography, West Virginia University, Morgantown, WV, USA;Department of Geology and Geography, West Virginia University, Morgantown, WV, USA","Aaron E. Maxwell (corresponding author), Department of Geology and Geography, West Virginia University, Morgantown, WV, USA","Machine learning offers the potential for effective and efficient classification of remotely sensed imagery. The strengths of machine learning include the capacity to handle data of high dimensionality and to map classes with very complex characteristics. Nevertheless, implementing a machine-learning classification is not straightforward, and the literature provides conflicting advice regarding many key issues. This article therefore provides an overview of machine learning from an applied perspective. We focus on the relatively mature methods of support vector machines, single decision trees (DTs), Random Forests, boosted DTs, artificial neural networks, and k-nearest neighbours (k-NN). Issues considered include the choice of algorithm, training data requirements, user-defined parameter selection and optimization, feature space impacts and reduction, and computational costs. We illustrate these issues through applying machine-learning classification to two publically available remotely sensed data sets.",39,9,2784,2817,Machine learning;Computer science;Artificial intelligence;Random forest;Dimensionality reduction;Decision tree;Artificial neural network;Support vector machine;Curse of dimensionality;Key (lock);Focus (optics);Perspective (graphical);Data mining,US,"Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Altman N, 1992, THE AMERICAN STATISTICIAN;Mountrakis G, 2010, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Hughes G, 1968, IEEE TRANSACTIONS ON INFORMATION THEORY;Chen Y, 2014, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;Gislason P, 2005, PATTERN RECOGNITION LETTERS;Huang C, 2002, INTERNATIONAL JOURNAL OF REMOTE SENSING;Homer C, 2004, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Friedl M, 1997, REMOTE SENSING OF ENVIRONMENT;Khalid S, 2014, ;Pal M, 2003, REMOTE SENSING OF ENVIRONMENT;Pal M, 2005, INTERNATIONAL JOURNAL OF REMOTE SENSING;Duro D, 2011, REMOTE SENSING OF ENVIRONMENT;Stumpf A, 2011, REMOTE SENSING OF ENVIRONMENT;MTW, 1999, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Kavzoğlu T, 2009, INTERNATIONAL JOURNAL OF APPLIED EARTH OBSERVATION AND GEOINFORMATION;Yue J, 2015, REMOTE SENSING LETTERS;Guo L, 2010, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Adam E, 2014, INTERNATIONAL JOURNAL OF REMOTE SENSING;Murphy M, 2010, ECOLOGY;Hansen M, 1996, INTERNATIONAL JOURNAL OF REMOTE SENSING;Li C, 2014, REMOTE SENSING;Ghosh A, 2013, INTERNATIONAL JOURNAL OF APPLIED EARTH OBSERVATION AND GEOINFORMATION;Waske B, 2009, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Mallinis G, 2007, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Rogan J, 2008, REMOTE SENSING OF ENVIRONMENT;Archibald R, 2007, IEEE GEOSCIENCE AND REMOTE SENSING LETTERS;Coulston J, 2012, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Wright C, 2007, REMOTE SENSING OF ENVIRONMENT;Petropoulos G, 2011, COMPUTERS & GEOSCIENCES;Ghimire B, 2012, GISCIENCE & REMOTE SENSING;Pal M, 2013, REMOTE SENSING LETTERS;Duro D, 2012, INTERNATIONAL JOURNAL OF REMOTE SENSING;Yu L, 2014, INTERNATIONAL JOURNAL OF REMOTE SENSING;Johnson B, 2013, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Laliberte A, 2011, INTERNATIONAL JOURNAL OF APPLIED EARTH OBSERVATION AND GEOINFORMATION;Pal M, 2012, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;Strobl C, 2009, ;Johnson B, 2012, REMOTE SENSING LETTERS;Pal M, 2006, INTERNATIONAL JOURNAL OF REMOTE SENSING;Maselli F, 2005, INTERNATIONAL JOURNAL OF REMOTE SENSING;Maxwell A, 2015, INTERNATIONAL JOURNAL OF REMOTE SENSING;Zhang R, 2009, INTERNATIONAL JOURNAL OF REMOTE SENSING;Pal M, 2008, INTERNATIONAL JOURNAL OF REMOTE SENSING;Zhang C, 2013, WETLANDS;Lawrence R, 2015, REMOTE SENSING OF ENVIRONMENT;Maxwell A, 2014, GISCIENCE & REMOTE SENSING;Wang X, 2011, INTERNATIONAL JOURNAL OF REMOTE SENSING;Su L, 2009, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Waske B, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Breiman L, 2001, MACHINE LEARNING;Vapnik V, 1995, ;Chawla N, 2002, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Quinlan J, 1986, MACHINE LEARNING;He H, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Freund Y, 1996, ;Belgiu M, 2016, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Kursa M, 2010, JOURNAL OF STATISTICAL SOFTWARE;Cutler D, 2007, ECOLOGY;Melgani F, 2004, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;, 2000, APPLIED PHYSICS LETTERS;Lu D, 2007, INTERNATIONAL JOURNAL OF REMOTE SENSING;Pal M, 2005, INTERNATIONAL JOURNAL OF REMOTE SENSING;Strobl C, 2008, BMC BIOINFORMATICS;Rodríguez‐Galiano V, 2011, ISPRS JOURNAL OF PHOTOGRAMMETRY AND REMOTE SENSING;Zhang L, 2016, IEEE GEOSCIENCE AND REMOTE SENSING MAGAZINE;Dudani S, 1976, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Davis S, 1978, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Atkinson P, 1997, INTERNATIONAL JOURNAL OF REMOTE SENSING;Foody G, 2004, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Chan J, 2008, REMOTE SENSING OF ENVIRONMENT;Pal M, 2010, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Yu Q, 2006, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Lawrence R, 2005, REMOTE SENSING OF ENVIRONMENT;Ghamisi P, 2017, IEEE GEOSCIENCE AND REMOTE SENSING MAGAZINE;Liu H, 1998, ;Kavzoğlu T, 2003, INTERNATIONAL JOURNAL OF REMOTE SENSING;Bazi Y, 2006, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Hansen M, 2000, INTERNATIONAL JOURNAL OF REMOTE SENSING;Foody G, 1997, INTERNATIONAL JOURNAL OF REMOTE SENSING;Tuia D, 2011, IEEE GEOSCIENCE AND REMOTE SENSING LETTERS;Blagus R, 2010, BMC BIOINFORMATICS;Rogan J, 2003, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Bert R, 2004, CIVIL ENGINEERING;Camps‐Valls G, 2006, IEEE GEOSCIENCE AND REMOTE SENSING LETTERS;He Y, 2017, REMOTE SENSING OF ENVIRONMENT;Hayes M, 2014, REMOTE SENSING LETTERS;Yang X, 2011, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Foody G, 2016, ISPRS INTERNATIONAL JOURNAL OF GEO-INFORMATION;García F, 2017, COMPUTERS & GEOSCIENCES;Townshend J, 1988, GEOGRAPHICAL JOURNAL;Chan J, 2001, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Maxwell A, 2016, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Samadzadegan F, 2012, CANADIAN JOURNAL OF REMOTE SENSING;Shi D, 2016, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Lippitt C, 2008, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Maxwell A, 2014, PHOTOGRAMMETRIC ENGINEERING & REMOTE SENSING;Team R, 2000, ;Baumgardner M, 2015, ;Cortes C, 1995, MACHINE LEARNING;Rumelhart D, 1986, THE MIT PRESS EBOOKS;Quinlan J, 1986, MACHINE LEARNING;, 1978, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS;Stehman S, 2013, ;Karatzoglou A, 2019, ;Therneau T, 1999, ;Strobl C, 2009, THE R JOURNAL;Ripley B, 2009, ;Wright M, 2015, ",,,OPENALEX,"Maxwell A, 2018, INTERNATIONAL JOURNAL OF REMOTE SENSING","Maxwell A, 2018, INTERNATIONAL JOURNAL OF REMOTE SENSING" +https://openalex.org/W3101749733,,KERNEL METHODS IN MACHINE LEARNING,2008,en,article,1406,,,Thomas Hofmann;Bernhard Schölkopf;Alexander J. Smola,Thomas Hofmann;Bernhard Schölkopf;Alexander J. Smola,"Darmstadt University of Technology, Max Planck Institute for Biological Cybernetics and National ICT Australia;DARMSTADT UNIVERSITY OF TECHNOLOGY DEPARTMENT OF COMPUTER SCIENCE DARMSTADT GERMANY;MAX PLANCK INSTITUTE FOR BIOLOGICAL CYBERNETICS TÜBINGEN GERMANY;Darmstadt University of Technology, Max Planck Institute for Biological Cybernetics and National ICT Australia",,"We review machine learning methods employing positive definite kernels. These methods formulate learning and estimation problems in a reproducing kernel Hilbert space (RKHS) of functions defined on the data domain, expanded in terms of a kernel. Working in linear spaces of function has the benefit of facilitating the construction and analysis of learning algorithms while at the same time allowing large classes of functions. The latter include nonlinear functions as well as functions defined on nonvectorial data. We cover a wide range of methods, ranging from binary classifiers to sophisticated methods for estimation with structured data.",,,,,Reproducing kernel Hilbert space;Kernel embedding of distributions;Kernel method;Kernel (algebra);Computer science;Tree kernel;Artificial intelligence;Machine learning;Hilbert space;Binary classification;Radial basis function kernel;Representer theorem;Domain (mathematical analysis);Range (aeronautics);Polynomial kernel;Mathematics;Support vector machine;Discrete mathematics,,", 2005, STATISTICS FOR BIOLOGY AND HEALTH;Rasmussen C, 2005, THE MIT PRESS EBOOKS;Aapo H, 2004, THE MIT PRESS EBOOKS;Chen S, 1998, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Platt J, 1998, THE MIT PRESS EBOOKS;Aronszajn N, 1950, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Cristianini N, 2000, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Fiedler M, 1973, CZECHOSLOVAK MATHEMATICAL JOURNAL;Wendland H, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Vapnik V, 2006, INFORMATION SCIENCE AND STATISTICS;, 2000, THE MIT PRESS EBOOKS;Gretton A, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Айзерман М, 1964, AUTOMATION AND REMOTE CONTROL;Bamber D, 1975, JOURNAL OF MATHEMATICAL PSYCHOLOGY;Baum L, 1972, MEDICAL ENTOMOLOGY AND ZOOLOGY;Morozov V, 1984, ;Vapnik V, 1963, AUTOMATION AND REMOTE CONTROL;Berg C, 1984, GRADUATE TEXTS IN MATHEMATICS;Bartlett P, 2006, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Vert J, 2004, THE MIT PRESS EBOOKS;Joachims T, 2002, ;Allwein E, 2000, ;Karush W, 2013, ;Smola A, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Friedman J, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Kettenring J, 1971, BIOMETRIKA;Kashima H, 2003, ;Jensen F, 1990, VBN FORSKNINGSPORTAL (AALBORG UNIVERSITET);Bach F, 2004, 2003 IEEE INTERNATIONAL CONFERENCE ON ACOUSTICS, SPEECH, AND SIGNAL PROCESSING, 2003. PROCEEDINGS. (ICASSP '03).;Schölkopf B, 1997, ;Ham J, 2004, ;, 2007, THE MIT PRESS EBOOKS;Kondor R, 2002, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Bochner S, 1933, MATHEMATISCHE ANNALEN;, 2000, APPLIED PHYSICS LETTERS;Taskar B, 2004, EMPIRICAL METHODS IN NATURAL LANGUAGE PROCESSING;Stitson M, 1998, THE MIT PRESS EBOOKS;Stewart J, 1976, ROCKY MOUNTAIN JOURNAL OF MATHEMATICS;Jebara T, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Green P, 1985, LECTURE NOTES IN STATISTICS;Gretton A, 2005, MAX PLANCK INSTITUTE FOR PLASMA PHYSICS;Culotta A, 2005, SCHOLARWORKS@UMASSAMHERST (UNIVERSITY OF MASSACHUSETTS AMHERST);Magerman D, 1996, LECTURE NOTES IN COMPUTER SCIENCE;Bennet K, 2000, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Oliver N, 2015, ;Zettlemoyer L, 2012, ARXIV (CORNELL UNIVERSITY);Bloomfield P, 1983, ;McCallum A, 2005, ;Boser B, 1992, ;Freund Y, 1996, ;Hoerl A, 2000, TECHNOMETRICS;Fletcher R, 2000, ;Hotelling H, 1936, BIOMETRIKA;Vapnik V, 2015, ;Тихонов А, 1963, MEDICAL ENTOMOLOGY AND ZOOLOGY;Mercer J, 1909, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY OF LONDON SERIES A CONTAINING PAPERS OF A MATHEMATICAL OR PHYSICAL CHARACTER;Tsochantaridis I, 2005, MPG.PURE (MAX PLANCK SOCIETY);Friedman J, 1974, IEEE TRANSACTIONS ON COMPUTERS;Kimeldorf G, 1971, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Taskar B, 2003, ;Darroch J, 1972, THE ANNALS OF MATHEMATICAL STATISTICS;Hettich R, 1993, SIAM REVIEW;Schoenberg I, 1938, ANNALS OF MATHEMATICS;Leslie C, 2001, ;Joachims T, 2005, ;Bennett K, 1992, OPTIMIZATION METHODS & SOFTWARE;Rényi A, 1959, ACTA MATHEMATICA ACADEMIAE SCIENTIARUM HUNGARICAE;Smola A, 1998, NEURAL NETWORKS;, 2000, APPLIED PHYSICS LETTERS;Mercer J, 1909, PROCEEDINGS OF THE ROYAL SOCIETY OF LONDON SERIES A CONTAINING PAPERS OF A MATHEMATICAL AND PHYSICAL CHARACTER;Hilbert D, 1989, TEUBNER-ARCHIV ZUR MATHEMATIK;Breneman J, 2005, TECHNOMETRICS;Boucheron S, 2005, ESAIM PROBABILITY AND STATISTICS;Zien A, 2000, BIOINFORMATICS;Koltchinskii V, 2001, IEEE TRANSACTIONS ON INFORMATION THEORY;Lee T, 2000, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Alon N, 1997, JOURNAL OF THE ACM;Basilico J, 2004, ;Mangasarian O, 1965, OPERATIONS RESEARCH;Smola A, 1998, ALGORITHMICA;O’Sullivan F, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Wahba G, 1995, THE ANNALS OF STATISTICS;Weston J, 2002, ;Vishwanathan S, 2006, INTERNATIONAL JOURNAL OF COMPUTER VISION;Yang H, 1997, NEURAL COMPUTATION;Murray M, 2017, ;Steinwart I, 2002, JOURNAL OF COMPLEXITY;Lafferty J, 2004, ;Chapelle O, 2004, ;Einmahl J, 1992, THE ANNALS OF STATISTICS;Cook D, 1993, VIEW;Altün Y, 2004, ;Dauxois J, 1998, THE ANNALS OF STATISTICS;Das S, 1994, LINEAR ALGEBRA AND ITS APPLICATIONS;Crammer K, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Minsky M, 1969, ;, 2018, ;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Vapnik V, 1995, ;Cortes C, 1995, MACHINE LEARNING;Wu Y, 1999, TECHNOMETRICS;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Lafferty J, 2001, SCHOLARLY COMMONS (UNIVERSITY OF PENNSYLVANIA);Schölkopf B, 1998, NEURAL COMPUTATION;Schölkopf B, 2001, NEURAL COMPUTATION;Müller H, 1991, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Poggio T, 1990, PROCEEDINGS OF THE IEEE;Wainwright M, 2007, NOW PUBLISHERS, INC. EBOOKS;Schölkopf B, 2000, NEURAL COMPUTATION;Vapnik V, 1996, ;Bartlett P, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Cardoso J, 1998, PROCEEDINGS OF THE IEEE;Goodall C, 1991, TECHNOMETRICS;Crammer K, 2002, ;Borgwardt K, 2006, BIOINFORMATICS;Elisseeff A, 2002, THE MIT PRESS EBOOKS;Sha F, 2003, ;Pietra S, 1997, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Cristianini N, 2006, STUDIES IN FUZZINESS AND SOFT COMPUTING;Collins M, 2002, THE MIT PRESS EBOOKS;, 2000, APPLIED PHYSICS LETTERS;DeCoste D, 2002, MACHINE LEARNING;Mathai A, 1980, TECHNOMETRICS;Altün Y, 2003, ;Herbrich R, 2001, THE MIT PRESS EBOOKS;Gärtner T, 2003, ACM SIGKDD EXPLORATIONS NEWSLETTER;Kwa, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Vishwanathan S, 2004, THE MIT PRESS EBOOKS;Dawid A, 1992, STATISTICS AND COMPUTING;Jaakkola T, 1999, ;Mika S, 2003, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Fortet R, 1953, ANNALES SCIENTIFIQUES DE L ÉCOLE NORMALE SUPÉRIEURE;Mendelson S, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Dekel O, 2003, ;Watkins C, 2000, THE MIT PRESS EBOOKS;Orey S, 1979, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Parzen E, 1970, ;Cortes C, 2005, ;Rätsch G, 2007, PLOS COMPUTATIONAL BIOLOGY;Chen A, 2005, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Poggio T, 1975, BIOLOGICAL CYBERNETICS;Fitzgerald C, 1995, LINEAR ALGEBRA AND ITS APPLICATIONS;Panik M, 1996, APPLIED OPTIMIZATION;Altün Y, 2004, UNCERTAINTY IN ARTIFICIAL INTELLIGENCE;Searle S, 1971, ;Alon N, 2002, ;Cortes C, 1995, MACHINE LEARNING;Bertsekas D, 1997, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;Hoerl A, 1970, TECHNOMETRICS;Wahba G, 1990, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Witzgall C, 1989, MATHEMATICS OF COMPUTATION;Schölkopf B, 2001, MPG.PURE (MAX PLANCK SOCIETY);Vapnik V, 1971, THEORY OF PROBABILITY AND ITS APPLICATIONS;, 2001, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Hoerl A, 1970, TECHNOMETRICS;Kuhn H, 1951, ;Wainwright M, 2008, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Hotelling H, 1936, BIOMETRIKA;Kingmán J, 1979, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Huber P, 1985, THE ANNALS OF STATISTICS;Herbrich R, 2000, THE MIT PRESS EBOOKS;CrammerKoby, 2002, JOURNAL OF MACHINE LEARNING RESEARCH;Shoesmith E, 1984, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES D (THE STATISTICIAN);Hammersley J, 1971, ;Aronszajn N, 1950, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Freund Y, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY;Cristianini N, 2002, THE MIT PRESS EBOOKS;Collins M, 2000, ;Kettenring J, 1971, BIOMETRIKA;Friedman J, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Williams D, 1984, BIOMETRICS;Murray M, 1993, ;Cook D, 1993, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;, 2000, APPLIED PHYSICS LETTERS;Murray M, 1995, BIOMETRICS;Amati G, 2018, ENCYCLOPEDIA OF DATABASE SYSTEMS;O’Sullivan F, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Rasmussen C, 2022, DIRECTORY OF OPEN ACCESS BOOKS (OAPEN FOUNDATION)",,,OPENALEX,"Hofmann T, 2008, ","Hofmann T, 2008, " +https://openalex.org/W2789894922,10.1001/jama.2017.18391,Big Data and Machine Learning in Health Care,2018,en,article,2147,JAMA,JAMA,Andrew L. Beam;Isaac S. Kohane,Andrew L. Beam;Isaac S. Kohane,"Department of Biomedical Informatics, Harvard Medical School, Boston, Massachusetts;Department of Biomedical Informatics, Harvard Medical School, Boston, Massachusetts",,"Our website uses cookies to enhance your experience. By continuing to use our site, or clicking ""Continue,"" you are agreeing to our Cookie Policy | Continue JAMA HomeNew OnlineCurrent IssueFor Authors Publications JAMA JAMA Network Open JAMA Cardiology JAMA Dermatology JAMA Health Forum JAMA Internal Medicine JAMA Neurology JAMA Oncology JAMA Ophthalmology JAMA Otolaryngology–Head & Neck Surgery JAMA Pediatrics JAMA Psychiatry JAMA Surgery Archives of Neurology & Psychiatry (1919-1959) Podcasts Clinical Reviews Editors' Summary Medical News Author Interviews More JN Learning / CMESubscribeJobsInstitutions / LibrariansReprints & Permissions Terms of Use | Privacy Policy | Accessibility Statement 2023 American Medical Association. All Rights Reserved Search All JAMA JAMA Network Open JAMA Cardiology JAMA Dermatology JAMA Forum Archive JAMA Health Forum JAMA Internal Medicine JAMA Neurology JAMA Oncology JAMA Ophthalmology JAMA Otolaryngology–Head & Neck Surgery JAMA Pediatrics JAMA Psychiatry JAMA Surgery Archives of Neurology & Psychiatry Input Search Term Sign In Individual Sign In Sign inCreate an Account Access through your institution Sign In Purchase Options: Buy this article Rent this article Subscribe to the JAMA journal",319,13,1317,1317,Medicine;Otorhinolaryngology;Family medicine;Sign (mathematics);Psychiatry,US,"Silver D, 2016, NATURE;Silver D, 2017, NATURE;Gulshan V, 2016, JAMA;Weber G, 2014, JAMA;Brand R, 1976, CIRCULATION",,,OPENALEX,"Beam A, 2018, JAMA","Beam A, 2018, JAMA" +https://openalex.org/W2792946961,10.1103/physrevlett.122.040504,Quantum Machine Learning in Feature Hilbert Spaces,2019,en,article,1649,PHYSICAL REVIEW LETTERS,Physical Review Letters,Maria Schuld;Nathan Killoran,Maria Schuld;Nathan Killoran,"Xanadu, 372 Richmond Street West, Toronto M5V 2L7, Canada;Xanadu, 372 Richmond Street West, Toronto M5V 2L7, Canada",,"A basic idea of quantum computing is surprisingly similar to that of kernel methods in machine learning, namely, to efficiently perform computations in an intractably large Hilbert space. In this Letter we explore some theoretical foundations of this link and show how it opens up a new avenue for the design of quantum machine learning algorithms. We interpret the process of encoding inputs in a quantum state as a nonlinear feature map that maps data to quantum Hilbert space. A quantum computer can now analyze the input data in this feature space. Based on this link, we discuss two approaches for building a quantum model for classification. In the first approach, the quantum device estimates inner products of quantum states to compute a classically intractable kernel. The kernel can be fed into any classical kernel method such as a support vector machine. In the second approach, we use a variational quantum circuit as a linear model that classifies data explicitly in Hilbert space. We illustrate these ideas with a feature map based on squeezing in a continuous-variable system, and visualize the working principle with two-dimensional minibenchmark datasets.",122,4,040504,040504,Hilbert space;Kernel (algebra);Computer science;Quantum computer;Reproducing kernel Hilbert space;Quantum machine learning;Quantum state;Feature vector;Kernel method;Feature (linguistics);Quantum algorithm;Quantum information;POVM;Support vector machine;Quantum process;Algorithm;Quantum;Theoretical computer science;Artificial intelligence;Quantum operation;Open quantum system;Mathematics;Quantum mechanics;Physics;Quantum dynamics;Pure mathematics,CA,"Preskill J, 2018, QUANTUM;Aronszajn N, 1950, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Williams C, 2003, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Braunstein S, 2005, REVIEWS OF MODERN PHYSICS;Weedbrook C, 2012, REVIEWS OF MODERN PHYSICS;McClean J, 2016, NEW JOURNAL OF PHYSICS;Mercer J, 1909, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY OF LONDON SERIES A CONTAINING PAPERS OF A MATHEMATICAL OR PHYSICAL CHARACTER;Rebentrost P, 2014, PHYSICAL REVIEW LETTERS;Berlinet A, 2004, ;Hofmann T, 2008, THE ANNALS OF STATISTICS;Gottesman D, 2001, PHYSICAL REVIEW A;Hofmann T, 2008, ;Barnett S, 2002, ;Klauder J, 1985, WORLD SCIENTIFIC EBOOKS;Wiebe N, 2012, PHYSICAL REVIEW LETTERS;Bremner M, 2016, PHYSICAL REVIEW LETTERS;L. B, 2003, ;Schuld M, 2016, PHYSICAL REVIEW. A/PHYSICAL REVIEW, A;Wan K, 2017, NPJ QUANTUM INFORMATION;, 2006, ;Schuld M, 2017, EUROPHYSICS LETTERS (EPL);Madrid R, 2005, EUROPEAN JOURNAL OF PHYSICS;Bartlett S, 2002, PHYSICAL REVIEW A;Amin M, 2015, PHYSICAL REVIEW A;, 2005, ;Benedetti M, 2017, PHYSICAL REVIEW X;Low G, 2014, PHYSICAL REVIEW A;O’Gorman B, 2015, THE EUROPEAN PHYSICAL JOURNAL SPECIAL TOPICS;Douce T, 2017, PHYSICAL REVIEW LETTERS;Kivinen J, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Flamini F, 2017, SCIENTIFIC REPORTS;Wittek P, 2017, SCIENTIFIC REPORTS;Wang S, 2015, JOURNAL OF MATHEMATICS RESEARCH;Langford J, 2012, ARXIV (CORNELL UNIVERSITY);Chatterjee R, 2017, QUANTUM INFORMATION AND COMPUTATION",,,OPENALEX,"Schuld M, 2019, PHYSICAL REVIEW LETTERS","Schuld M, 2019, PHYSICAL REVIEW LETTERS" +https://openalex.org/W2701059868,10.1109/sp.2017.12,SecureML: A System for Scalable Privacy-Preserving Machine Learning,2017,en,article,1847,,,Payman Mohassel;Yupeng Zhang,Payman Mohassel;Yupeng Zhang,Visa Research;University of Maryland,,"Machine learning is widely used in practice to produce predictive models for applications such as image processing, speech and text recognition. These models are more accurate when trained on large amount of data collected from different sources. However, the massive data collection raises privacy concerns. In this paper, we present new and efficient protocols for privacy preserving machine learning for linear regression, logistic regression and neural network training using the stochastic gradient descent method. Our protocols fall in the two-server model where data owners distribute their private data among two non-colluding servers who train various models on the joint data using secure two-party computation (2PC). We develop new techniques to support secure arithmetic operations on shared decimal numbers, and propose MPC-friendly alternatives to non-linear functions such as sigmoid and softmax that are superior to prior work. We implement our system in C++. Our experiments validate that our protocols are several orders of magnitude faster than the state of the art implementations for privacy preserving linear and logistic regressions, and scale to millions of data samples with thousands of features. We also implement the first privacy preserving system for training neural networks.",,,19,38,Computer science;Softmax function;Scalability;Server;Machine learning;Artificial neural network;Information privacy;Artificial intelligence;Stochastic gradient descent;MNIST database;Implementation;Data mining;Computer security;Computer network;Database,GB;US,"Lu Z, 2010, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Paillier P, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Abadi M, 2016, ;Canetti R, 2001, ;Yao A, 1982, ;Shokri R, 2015, ;Agrawal R, 2000, ;Dowlin N, 2016, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Lindell Y, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Demmler D, 2015, ;Malkhi D, 2004, ;Kolesnikov V, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Ishai Y, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Peikert C, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Lindell Y, 2008, JOURNAL OF CRYPTOLOGY;Guyon I, 2004, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Bellare M, 2012, ;Song S, 2013, ;Chaudhuri K, 2008, ;Nikolaenko V, 2013, ;Jagannathan G, 2005, ;Du W, 2004, ;Asharov G, 2013, ;Bellare M, 2013, ;Nikolaenko V, 2013, ;Damgård I, 2008, INTERNATIONAL JOURNAL OF APPLIED CRYPTOGRAPHY;Bunn P, 2007, ;Livni R, 2014, ;Yu H, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Aono Y, 2016, ;Vaidya J, 2007, KNOWLEDGE AND INFORMATION SYSTEMS;Sanil A, 2004, ;Nayak K, 2015, ;Du W, 2005, ;Slavković A, 2007, ;Wu S, 2013, 人工知能学会全国大会論文集;Kamara S, 2011, IACR CRYPTOLOGY EPRINT ARCHIVE;Gascón A, 2016, IACR CRYPTOLOGY EPRINT ARCHIVE;Gilad-Bachrach R, 2016, ;Wang X, 2016, IACR CRYPTOLOGY EPRINT ARCHIVE",,,OPENALEX,"Mohassel P, 2017, ","Mohassel P, 2017, " +https://openalex.org/W3159649695,10.2139/ssrn.3399990,Foundations of Machine Learning,2019,en,article,1159,SSRN ELECTRONIC JOURNAL,SSRN Electronic Journal,Ajit Singh,Ajit Singh,"INSTITUT de DIPLOMATIE PUBLIQUE, United Kingdom;Patna University","Ajit Singh (corresponding author), INSTITUT de DIPLOMATIE PUBLIQUE, United Kingdom; Patna University",,,,,,Computer science;Artificial intelligence;Machine learning,IN,,,,OPENALEX,"Singh A, 2019, SSRN ELECTRONIC JOURNAL","Singh A, 2019, SSRN ELECTRONIC JOURNAL" +https://openalex.org/W1944672,,Foundations of Machine Learning,2012,nl,book,1162,MEDICAL ENTOMOLOGY AND ZOOLOGY,Medical Entomology and Zoology,Mehryar Mohri;Afshin Rostamizadeh;Ameet Talwalkar,Mehryar Mohri;Afshin Rostamizadeh;Ameet Talwalkar,University of California at Berkeley;New York University;New York University;University of California at Berkeley,,Fundamental topics in machine learning are presented along with theoretical and conceptual tools for the discussion and proof of algorithms.,135,43,2040,2040,Mathematical proof;Computer science;Artificial intelligence;Class (philosophy);Ranking (information retrieval);Key (lock);Machine learning;Management science;Mathematics;Engineering,US,", 2005, STATISTICS FOR BIOLOGY AND HEALTH;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Salzberg S, 1994, ;McCullagh P, 1980, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Cesa‐Bianchi N, 2006, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Dietterich T, 2000, MACHINE LEARNING;Lothaire M, 1984, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Kearns M, 1994, THE MIT PRESS EBOOKS;Egan J, 1975, ;Schölkopf B, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Айзерман М, 1964, AUTOMATION AND REMOTE CONTROL;Anthony M, 1999, LONDON SCHOOL OF ECONOMICS AND POLITICAL SCIENCE RESEARCH ONLINE (LONDON SCHOOL OF ECONOMICS AND POLITICAL SCIENCE);Bertsekas D, 1987, ;Rätsch G, 2001, MACHINE LEARNING;Ledoux M, 1991, ;Haussler D, 1999, ;Karush W, 2013, ;Weston J, 1999, THE EUROPEAN SYMPOSIUM ON ARTIFICIAL NEURAL NETWORKS;Sutton R, 1984, SCHOLARWORKS@UMASSAMHERST (UNIVERSITY OF MASSACHUSETTS AMHERST);Collins M, 2002, MACHINE LEARNING;Schapire R, 1997, QUT EPRINTS (QUEENSLAND UNIVERSITY OF TECHNOLOGY);Koltchinskii V, 2002, THE ANNALS OF STATISTICS;Berg C, 1984, ;Lothaire M, 2005, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Haagerup U, 1981, STUDIA MATHEMATICA;Littman M, 1996, ;Weinberger K, 2006, ;Koltchinskii V, 2000, BIRKHÄUSER BOSTON EBOOKS;Takimoto E, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Bartlett P, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Dvoretzky A, 1956, ;Trakhtenbrot B, 1973, MEDICAL ENTOMOLOGY AND ZOOLOGY;Vidyasagar M, 1997, SPRINGER EBOOKS;Kearns M, 1998, ;Agarwal S, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Cortes C, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Rudin C, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Mansour Y, 2000, ;Cesa‐Bianchi N, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Shalev‐Shwartz S, 2009, CONFERENCE ON LEARNING THEORY;Cortes C, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Singh S, 1993, CHEMICO-BIOLOGICAL INTERACTIONS;Kontorovich L, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Allauzen C, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Bergadano F, 1995, LECTURE NOTES IN COMPUTER SCIENCE;Cesa‐Bianchi N, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Tomaszewski B, 1982, COLLOQUIUM MATHEMATICUM;Widrow B, 1960, ;Ailon N, 2007, ARXIV.ORG;Friedman J, 2001, THE ANNALS OF STATISTICS;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Tenenbaum J, 2000, SCIENCE;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;, 1981, MATHEMATICS AND COMPUTERS IN SIMULATION;Robbins H, 1951, THE ANNALS OF MATHEMATICAL STATISTICS;Friedman J, 2000, THE ANNALS OF STATISTICS;Valiant L, 1984, ;Gold E, 1967, INFORMATION AND CONTROL;Vapnik V, 2015, ;Dietterich T, 1995, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Schapire R, 1998, ;Robinson P, 1985, ECONOMICA;Kiefer J, 1952, THE ANNALS OF MATHEMATICAL STATISTICS;Sion M, 1958, PACIFIC JOURNAL OF MATHEMATICS;Järvelin K, 2017, ACM SIGIR FORUM;Kimeldorf G, 1971, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Azuma K, 1967, TOHOKU MATHEMATICAL JOURNAL;Freund Y, 1998, ;Sauer N, 1972, JOURNAL OF COMBINATORIAL THEORY SERIES A;Saunders C, 1998, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Gold E, 1978, INFORMATION AND CONTROL;Bennett G, 1962, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Schoenberg I, 1938, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Cesa‐Bianchi N, 1997, JOURNAL OF THE ACM;Ham J, 2004, ;Shelah S, 1972, PACIFIC JOURNAL OF MATHEMATICS;Angluin D, 1978, INFORMATION AND CONTROL;Khintchine A, 1923, MATHEMATISCHE ZEITSCHRIFT;Mohri M, 2009, MONOGRAPHS IN THEORETICAL COMPUTER SCIENCE;Ye Y, 2011, MATHEMATICS OF OPERATIONS RESEARCH;Dudley R, 1984, LECTURE NOTES IN MATHEMATICS;Bernstein S, 1927, MATHEMATISCHE ANNALEN;Hoffgen K, 1995, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Dudley R, 2010, ;Ehrenfeucht A, 1988, CONFERENCE ON LEARNING THEORY;Littlestone N, 1989, CONFERENCE ON LEARNING THEORY;Kivinen J, 1999, ;Pollard D, 1989, STATISTICAL SCIENCE;Kearns M, 1999, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Rudin C, 2004, ;Stoltz G, 2005, MACHINE LEARNING;Slud E, 1977, THE ANNALS OF PROBABILITY;Vijayakumar S, 1999, ;Kushner H, 2009, WILEY INTERDISCIPLINARY REVIEWS COMPUTATIONAL STATISTICS;Devroye L, 1995, PATTERN RECOGNITION;Lafferty J, 1999, ;Cesa‐Bianchi N, 1999, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Tate R, 1953, THE ANNALS OF MATHEMATICAL STATISTICS;Freund Y, 1993, ;Roweis S, 2000, SCIENCE;Boser B, 1992, ;Efron B, 2004, THE ANNALS OF STATISTICS;Hazeghi K, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Hoeffding W, 1994, SPRINGER SERIES IN STATISTICS;Hoerl A, 2000, TECHNOMETRICS;Spencer J, 1991, MEDICAL ENTOMOLOGY AND ZOOLOGY;Joachims T, 2002, ;Schapire R, 1990, MACHINE LEARNING;Nocedal J, 1980, MATHEMATICS OF COMPUTATION;Neumann J, 1928, MATHEMATISCHE ANNALEN;Schapire R, 2000, MACHINE LEARNING;Littlestone N, 1994, INFORMATION AND COMPUTATION;Tsochantaridis I, 2005, MPG.PURE (MAX PLANCK SOCIETY);Dwork C, 2001, ;Freund Y, 1995, INFORMATION AND COMPUTATION;Cucker F, 2001, BULLETIN OF THE AMERICAN MATHEMATICAL SOCIETY;Taskar B, 2003, ;Dasgupta S, 2002, RANDOM STRUCTURES AND ALGORITHMS;Haussler D, 1992, INFORMATION AND COMPUTATION;Mika S, 1998, ;Mason L, 1999, ;Freund Y, 1999, GAMES AND ECONOMIC BEHAVIOR;Angluin D, 1982, JOURNAL OF THE ACM;Shawe‐Taylor J, 1998, IEEE TRANSACTIONS ON INFORMATION THEORY;Cortes C, 2003, ;, 2000, APPLIED PHYSICS LETTERS;Crites R, 1995, ;Dudley R, 2010, ;Cesa‐Bianchi N, 2004, IEEE TRANSACTIONS ON INFORMATION THEORY;, 2000, APPLIED PHYSICS LETTERS;Freund Y, 1996, ;Koltchinskii V, 2001, IEEE TRANSACTIONS ON INFORMATION THEORY;Foster D, 1997, GAMES AND ECONOMIC BEHAVIOR;Even-Dar E, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Haussler D, 1995, JOURNAL OF COMBINATORIAL THEORY SERIES A;Grove A, 1998, ;Agarwal S, 2005, ILLINOIS DIGITAL ENVIRONMENT FOR ACCESS TO LEARNING AND SCHOLARSHIP (UNIVERSITY OF ILLINOIS AT URBANA-CHAMPAIGN);Drucker H, 1995, NEURAL INFORMATION PROCESSING SYSTEMS;Cortes C, 2010, ;Blum A, 2007, ALGORITHMIC GAME THEORY;Rogers W, 1978, THE ANNALS OF STATISTICS;Pitt L, 1993, JOURNAL OF THE ACM;Ron D, 1995, ;Beimel A, 2000, JOURNAL OF THE ACM;Devroye L, 1979, IEEE TRANSACTIONS ON INFORMATION THEORY;Balcan M, 2008, MACHINE LEARNING;Blum A, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Allauzen C, 2009, INTERNATIONAL JOURNAL OF FOUNDATIONS OF COMPUTER SCIENCE;Minsky M, 1969, ;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Hanley J, 1982, RADIOLOGY;Quinlan J, 1986, MACHINE LEARNING;Lafferty J, 2001, SCHOLARLY COMMONS (UNIVERSITY OF PENNSYLVANIA);Pearson K, 1901, THE LONDON EDINBURGH AND DUBLIN PHILOSOPHICAL MAGAZINE AND JOURNAL OF SCIENCE;Belkin M, 2002, THE MIT PRESS EBOOKS;Bartlett P, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Schapire R, 2003, LECTURE NOTES IN STATISTICS;Blumer A, 1989, JOURNAL OF THE ACM;Crammer K, 2002, ;Zinkevich M, 2003, ;, 1998, KYBERNETES;Tesauro G, 1995, COMMUNICATIONS OF THE ACM;Rifkin R, 2004, ;Littlestone N, 1988, MACHINE LEARNING;Hsieh C, 2008, ;Jaakkola T, 1994, NEURAL COMPUTATION;Kearns M, 1994, JOURNAL OF THE ACM;, 2000, APPLIED PHYSICS LETTERS;Tsitsiklis J, 1994, MACHINE LEARNING;Kalai A, 2004, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Vovk V, 1990, CONFERENCE ON LEARNING THEORY;Breiman L, 1999, NEURAL COMPUTATION;Kearns M, 1999, NEURAL COMPUTATION;Foster D, 1999, GAMES AND ECONOMIC BEHAVIOR;Mohri M, 2002, ;Massart P, 2000, ANNALES DE LA FACULTÉ DES SCIENCES DE TOULOUSE MATHÉMATIQUES;Devroye L, 1979, IEEE TRANSACTIONS ON INFORMATION THEORY;Long P, 2009, MACHINE LEARNING;Cossock D, 2008, IEEE TRANSACTIONS ON INFORMATION THEORY;Cortes C, 2004, ;Cortes C, 2004, NEURAL INFORMATION PROCESSING SYSTEMS;Cortes C, 2010, ;Mohri M, 2010, JOURNAL OF MACHINE LEARNING RESEARCH;Watkins C, 2000, THE MIT PRESS EBOOKS;Singh S, 1996, ;Oncina J, 1993, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Drucker H, 1993, INTERNATIONAL JOURNAL OF PATTERN RECOGNITION AND ARTIFICIAL INTELLIGENCE;Lebanon G, 2002, THE MIT PRESS EBOOKS;Assouad P, 1983, ANNALES DE L’INSTITUT FOURIER;Cortes C, 2008, ;Cortes C, 2008, ;Lehrer E, 2003, GAMES AND ECONOMIC BEHAVIOR;Rätsch G, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Cortes C, 2007, THE MIT PRESS EBOOKS;Duffy N, 1999, ;Crammer K, 2000, ;Xu H, 2008, ;Mansour Y, 1999, ;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Bertsekas D, 1997, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;Watkins C, 1989, OPENGREY (INSTITUT DE L'INFORMATION SCIENTIFIQUE ET TECHNIQUE);McDiarmid C, 1989, CAMBRIDGE UNIVERSITY PRESS EBOOKS;, 2005, INTERDISCIPLINARY MATHEMATICAL SCIENCES;Bartlett P, 2002, MACHINE LEARNING;Johnson W, 1984, CONTEMPORARY MATHEMATICS - AMERICAN MATHEMATICAL SOCIETY",,,OPENALEX,"Mohri M, 2012, MEDICAL ENTOMOLOGY AND ZOOLOGY","Mohri M, 2012, MEDICAL ENTOMOLOGY AND ZOOLOGY" +https://openalex.org/W2170282673,10.1109/tpami.2008.275,Faster and Better: A Machine Learning Approach to Corner Detection,2008,en,article,1834,IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE,IEEE Transactions on Pattern Analysis and Machine Intelligence,Edward Rosten;Reid Porter;Tom Drummond,E. Rosten;R. Porter;T. Drummond,"Department of Engineering, Cambridge University, Cambridge, UK. er258@cam.ac.uk;Los Alamos National Laboratory, ISR2 Space and Remote Sensing, Los Alamos, NM, USA;Department of Engineering, University of Cambridge, Cambridge, UK",,"The repeatability and efficiency of a corner detector determines how likely it is to be useful in a real-world application. The repeatability is important because the same scene viewed from different positions should yield features which correspond to the same real-world 3D locations. The efficiency is important because this determines whether the detector combined with further processing can operate at frame rate. Three advances are described in this paper. First, we present a new heuristic for feature detection and, using machine learning, we derive a feature detector from this which can fully process live PAL video using less than 5 percent of the available processing time. By comparison, most other detectors cannot even operate at frame rate (Harris detector 115 percent, SIFT 195 percent). Second, we generalize the detector, allowing it to be optimized for repeatability, with little loss of efficiency. Third, we carry out a rigorous comparison of corner detectors based on the above repeatability criterion applied to 3D scenes. We show that, despite being principally constructed for speed, on these stringent tests, our heuristic detector significantly outperforms existing feature detectors. Finally, the comparison demonstrates that using machine learning produces significant improvements in repeatability, yielding a detector that is both very fast and of very high quality.",32,1,105,119,Detector;Repeatability;Frame rate;Artificial intelligence;Heuristic;Computer science;Scale-invariant feature transform;Computer vision;Corner detection;Feature (linguistics);Feature extraction;Frame (networking);Pattern recognition (psychology);Mathematics;Image (mathematics);Statistics,GB;US,"Rosten E, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Ballard D, 1981, PATTERN RECOGNITION;Haralock R, 1991, ADDISON-WESLEY LONGMAN PUBLISHING CO., INC. EBOOKS;Schmid C, 2000, INTERNATIONAL JOURNAL OF COMPUTER VISION;Mikolajczyk K, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Moravec H, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Lepetit V, 2006, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Kitchen L, 1982, PATTERN RECOGNITION LETTERS;Brown M, 2002, ;Schaffalitzky F, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Rosenfeld A, 1973, IEEE TRANSACTIONS ON COMPUTERS;Trajković M, 1998, IMAGE AND VISION COMPUTING;Freeman, 1977, IEEE TRANSACTIONS ON COMPUTERS;Moreels P, 2006, INTERNATIONAL JOURNAL OF COMPUTER VISION;Rattarangsi A, 1992, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Schmid C, 2002, ;Wang H, 1995, IMAGE AND VISION COMPUTING;Trucco E, 1995, AI COMMUNICATIONS;Deriche R, 1993, INTERNATIONAL JOURNAL OF COMPUTER VISION;Rosenfeld A, 1975, IEEE TRANSACTIONS ON COMPUTERS;Tissainayagam P, 2004, IMAGE AND VISION COMPUTING;Rohr K, 1997, IMAGE AND VISION COMPUTING;Rosten E, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Medioni G, 1987, COMPUTER VISION GRAPHICS AND IMAGE PROCESSING;Robbins B, 1997, IMAGE AND VISION COMPUTING;Shen F, 2002, PATTERN RECOGNITION LETTERS;Paler K, 1984, PATTERN RECOGNITION;Ray B, 2002, PATTERN RECOGNITION;Mohanna F, 2001, ;Urdiales C, 2003, ELECTRONICS LETTERS;Rohr K, 1992, INTERNATIONAL JOURNAL OF COMPUTER VISION;Sankar P, 1978, COMPUTER GRAPHICS AND IMAGE PROCESSING;Singh A, 1990, COMPUTER VISION GRAPHICS AND IMAGE PROCESSING;Bae S, 2002, PATTERN RECOGNITION LETTERS;Arrebola F, 1997, ELECTRONICS LETTERS;Guiducci A, 1988, PATTERN RECOGNITION LETTERS;Luo B, 1999, PATTERN RECOGNITION LETTERS;Bandera A, 2000, ELECTRONICS LETTERS;Liu S, 1990, PATTERN RECOGNITION;Langridge D, 1982, COMPUTER GRAPHICS AND IMAGE PROCESSING;Seeger U, 1994, PATTERN RECOGNITION LETTERS;Cheng F, 1988, PATTERN RECOGNITION LETTERS;Rangarajan K, 2005, ;Arrebola F, 1999, ELECTRONICS LETTERS;Wu Z, 1983, PATTERN RECOGNITION;Ogawa H, 1989, PATTERN RECOGNITION;Lee J, 2003, ;Lowe D, 2004, INTERNATIONAL JOURNAL OF COMPUTER VISION;Canny J, 1986, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Press W, 1994, ;Quinlan J, 1986, MACHINE LEARNING;Harris C, 1988, ;Quinlan J, 1986, MACHINE LEARNING;Shi J, 1994, ;Mikolajczyk K, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Duda R, 1972, COMMUNICATIONS OF THE ACM;Smith S, 1997, INTERNATIONAL JOURNAL OF COMPUTER VISION;Tomasi C, 1991, ;Rosten E, 2005, ;Mikolajczyk K, 2002, ;Sklar B, 1987, ;Teh C, 1989, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Mokhtarian F, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Saint-Marc P, 1991, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;He X, 2004, PROCEEDINGS OF THE 17TH INTERNATIONAL CONFERENCE ON PATTERN RECOGNITION, 2004. ICPR 2004.;Loy G, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Noble J, 1988, IMAGE AND VISION COMPUTING;Schaffalitzky F, 2002, ;Ansari N, 1991, PATTERN RECOGNITION;Trujillo L, 2006, ;Triggs B, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Beus H, 1987, PATTERN RECOGNITION;Mehrotra R, 1990, PATTERN RECOGNITION;Lee J, 1995, IEEE TRANSACTIONS ON IMAGE PROCESSING;Kenney C, 2005, ;Davies E, 1988, IEE PROCEEDINGS E COMPUTERS AND DIGITAL TECHNIQUES;Dias P, 2002, ;Ghosal S, 2002, ;Cooper D, 1993, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Li L, 1999, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Zuliani M, 2005, ;Kienzle W, 2006, ;Kenney C, 2003, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Quddus A, 1999, ELECTRONICS LETTERS;Beymer D, 2002, ;Chen W, 2002, ;Rajan P, 2003, ;Xie X, 1993, PATTERN RECOGNITION;Giraudon G, 2002, ;O’Gorman L, 2003, ;Zhang X, 2002, ;Cooper D, 1991, ;Sohn K, 2003, ;Luo B, 2004, ",,,OPENALEX,"Rosten E, 2008, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE","Rosten E, 2008, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE" +https://openalex.org/W2588978745,10.1148/rg.2017160130,Machine Learning for Medical Imaging,2017,en,review,1635,RADIOGRAPHICS,Radiographics,Bradley J. Erickson;Panagiotis Korfiatis;Zeynettin Akkus;Timothy L. Kline,Bradley J. Erickson;Panagiotis Korfiatis;Zeynettin Akkus;Timothy L. Kline,"From the Department of Radiology, Mayo Clinic, 200 First St SW, Rochester, MN 55905;From the Department of Radiology, Mayo Clinic, 200 First St SW, Rochester, MN 55905;From the Department of Radiology, Mayo Clinic, 200 First St SW, Rochester, MN 55905;From the Department of Radiology, Mayo Clinic, 200 First St SW, Rochester, MN 55905",,"RSNA, 2017.",37,2,505,515,Artificial intelligence;Machine learning;Medical diagnosis;Computer science;Medical imaging;Rendering (computer graphics);Feature (linguistics);Process (computing);Identification (biology);Deep learning;Metric (unit);Medicine,US,"Breiman L, 2001, MACHINE LEARNING;LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Szegedy C, 2015, ;Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Srivastava N, 2014, ;Hornik K, 1989, NEURAL NETWORKS;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Duda R, 1973, ;Comaniciu D, 2002, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Bezdek J, 1984, COMPUTERS & GEOSCIENCES;Dunn J, 1973, JOURNAL OF CYBERNETICS;Saeys Y, 2007, BIOINFORMATICS;Johnson S, 1967, PSYCHOMETRIKA;Arlot S, 2010, STATISTICS SURVEYS;Krishna K, 1999, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART B (CYBERNETICS);Kononenko I, 2001, ARTIFICIAL INTELLIGENCE IN MEDICINE;Birant D, 2006, DATA & KNOWLEDGE ENGINEERING;Dahl G, 2013, ;Mitchell T, 2008, SCIENCE;Vapnik V, 1963, AUTOMATION AND REMOTE CONTROL;Bauer S, 2013, PHYSICS IN MEDICINE AND BIOLOGY;Hand D, 2001, INTERNATIONAL STATISTICAL REVIEW;Zhou Y, 1988, IEEE TRANSACTIONS ON ACOUSTICS SPEECH AND SIGNAL PROCESSING;Schoepf U, 2004, RADIOLOGY;Davatzikos C, 2006, NEUROBIOLOGY OF AGING;Jalalian A, 2012, CLINICAL IMAGING;Roberts S, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Lowd D, 2005, ;Dueck D, 2007, ;, 2012, ELSEVIER EBOOKS;Chan H, 1995, MEDICAL PHYSICS;Kohavi R, 1995, ELSEVIER EBOOKS;Prajapati G, 2010, ;Chellappa R, 1993, ACADEMIC PRESS EBOOKS;Suzuki K, 2012, INTERNATIONAL JOURNAL OF BIOMEDICAL IMAGING;Kühn M, 2013, ;Way T, 2010, MEDICAL PHYSICS;Yoshida H, 2007, COMPUTERIZED MEDICAL IMAGING AND GRAPHICS;Schoepf U, 2007, JOURNAL OF THORACIC IMAGING;Kim D, 2008, NEUROIMAGE;Dündar M, 2008, IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING;Zhou C, 2005, PATTERN RECOGNITION;Summers R, 2010, GASTROINTESTINAL ENDOSCOPY CLINICS OF NORTH AMERICA;Lee H, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Zhang T, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Brosch T, 2016, CIRCLE (UNIVERSITY OF BRITISH COLUMBIA);Abadi M, 2016, ARXIV (CORNELL UNIVERSITY);Buhmann M, 2003, ;Hosmer D, 2000, ;Breiman L, 1996, MACHINE LEARNING;Quinlan J, 1986, MACHINE LEARNING;Arlot S, 2009, ;Flach P, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Rokach L, 2013, SERIES IN MACHINE PERCEPTION AND ARTIFICIAL INTELLIGENCE;Cun Y, 1990, NEUROCOMPUTING",,,OPENALEX,"Erickson B, 2017, RADIOGRAPHICS","Erickson B, 2017, RADIOGRAPHICS" +https://openalex.org/W2607662938,,Proceedings of The 32nd International Conference on Machine Learning,2015,en,article,2038,,,Mathieu Germain;Karol Gregor;Iain Murray;Hugo Larochelle,Mathieu Germain;Karol Gregor;Iain Murray;Hugo Larochelle,School of Informatics,,,,,,,Engineering ethics;Computer science;Engineering,,,,,OPENALEX,"Germain M, 2015, ","Germain M, 2015, " +https://openalex.org/W3112020351,,UCI Repository of Machine Learning Databases,1994,en,article,1812,MEDICAL ENTOMOLOGY AND ZOOLOGY,Medical Entomology and Zoology,Patrick M. Murphy,Patrick M. Murphy,,"Patrick M. Murphy (corresponding author), ",,,,,,Computer science;Database;Artificial intelligence,,,,,OPENALEX,"Murphy P, 1994, MEDICAL ENTOMOLOGY AND ZOOLOGY","Murphy P, 1994, MEDICAL ENTOMOLOGY AND ZOOLOGY" +https://openalex.org/W2158485497,10.1016/j.neuroimage.2008.11.007,Machine learning classifiers and fMRI: A tutorial overview,2008,en,review,1673,NEUROIMAGE,NeuroImage,Francisco Pereira;Tom M. Mitchell;Matthew Botvinick,Francisco Pereira;Tom Mitchell;Matthew Botvinick,"Princeton Neuroscience Institute/Psychology Department, Princeton University, Princeton, NJ 08540, USA. fpereira@princeton.edu;Princeton Neuroscience Institute/Psychology Department, Princeton University, Princeton, NJ 08540, USA;Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA 15213, USA;Princeton Neuroscience Institute/Psychology Department, Princeton University, Princeton, NJ 08540, USA","Francisco Pereira (corresponding author), Princeton Neuroscience Institute/Psychology Department, Princeton University, Princeton, NJ 08540, USA. fpereira@princeton.edu; Princeton Neuroscience Institute/Psychology Department, Princeton University, Princeton, NJ 08540, USA",,45,1,S199,S209,Computer science;Artificial intelligence;Machine learning;Popularity;Key (lock);Variable (mathematics);Point (geometry);Pattern recognition (psychology);Multivariate statistics;Psychology;Mathematics,US,"Hastie T, 2013, ;Lu Z, 2010, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Schölkopf B, 1999, ;Genovese C, 2002, NEUROIMAGE;Joachims T, 2006, TECHNICAL REPORTS;Haxby J, 2001, SCIENCE;, 2000, APPLIED PHYSICS LETTERS;Penny W, 2007, UCL DISCOVERY (UNIVERSITY COLLEGE LONDON);Brown L, 2001, STATISTICAL SCIENCE;Dietterich T, 1995, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Norman K, 2006, TRENDS IN COGNITIVE SCIENCES;Kriegeskorte N, 2006, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Kamitani Y, 2005, NATURE NEUROSCIENCE;Haynes J, 2006, NATURE REVIEWS. NEUROSCIENCE;Ledoit O, 2003, JOURNAL OF EMPIRICAL FINANCE;Schölkopf B, 1997, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Kay K, 2008, NATURE;Mitchell T, 2008, SCIENCE;McIntosh A, 2004, NEUROIMAGE;Chalupa L, 2004, ;Polyn S, 2005, SCIENCE;Good P, 2005, SPRINGER SERIES IN STATISTICS;Friston K, 2003, ;Mitchell T, 2004, MACHINE LEARNING;Davatzikos C, 2005, NEUROIMAGE;Golland P, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Haynes J, 2005, CURRENT BIOLOGY;O’Toole A, 2005, JOURNAL OF COGNITIVE NEUROSCIENCE;Wasserman L, 2021, ;Lowry R, 2014, ;Langford J, 2005, ;Hanson S, 2004, NEUROIMAGE;Carlson T, 2003, JOURNAL OF COGNITIVE NEUROSCIENCE;Strother S, 2002, NEUROIMAGE;O’Toole A, 2007, JOURNAL OF COGNITIVE NEUROSCIENCE;Strother S, 2000, NEUROIMAGE;Carlson T, 2003, JOURNAL OF COGNITIVE NEUROSCIENCE;Aguirre G, 2007, NEUROIMAGE;Strother S, 2006, IEEE ENGINEERING IN MEDICINE AND BIOLOGY MAGAZINE;Mourão-Miranda J, 2007, NEUROIMAGE;Calhoun V, 2003, TECHNICAL UNIVERSITY OF DENMARK, DTU ORBIT (TECHNICAL UNIVERSITY OF DENMARK, DTU);Hansen L, 1999, NEUROIMAGE;Hanson S, 2007, NEURAL COMPUTATION;LangfordJohn, 2005, JOURNAL OF MACHINE LEARNING RESEARCH;Pereira F, 2006, ;Mitchell T, 2007, ;Hutchinson R, 2006, ;Kohavi R, 1995, ;, 2009, ",,,OPENALEX,"Pereira F, 2008, NEUROIMAGE","Pereira F, 2008, NEUROIMAGE" +https://openalex.org/W2015811642,10.1080/00107514.2014.964942,An introduction to quantum machine learning,2014,en,article,1244,CONTEMPORARY PHYSICS,Contemporary Physics,Maria Schuld;Ilya Sinayskiy;Francesco Petruccione,Maria Schuld;Ilya Sinayskiy;Francesco Petruccione,"Quantum Research Group, School of Chemistry and Physics, University of KwaZulu-Natal, Durban 4001, South Africa;Quantum Research Group, School of Chemistry and Physics, University of KwaZulu-Natal, Durban 4001, South Africa. National Institute for Theoretical Physics (NITheP), KwaZulu-Natal, South Africa;Quantum Research Group, School of Chemistry and Physics, University of KwaZulu-Natal, Durban 4001, South Africa. National Institute for Theoretical Physics (NITheP), KwaZulu-Natal, South Africa","Maria Schuld (corresponding author), Quantum Research Group, School of Chemistry and Physics, University of KwaZulu-Natal, Durban 4001, South Africa","Machine learning algorithms learn a desired input-output relation from examples in order to interpret new inputs. This is important for tasks such as image and speech recognition or strategy optimisation, with growing applications in the IT industry. In the last couple of years, researchers investigated if quantum computing can help to improve classical machine learning algorithms. Ideas range from running computationally costly algorithms or their subroutines efficiently on a quantum computer to the translation of stochastic methods into the language of quantum theory. This contribution gives a systematic overview of the emerging field of quantum machine learning. It presents the approaches as well as technical details in an accessible way, and discusses the potential of a future theory of quantum learning.",56,2,172,185,Quantum machine learning;Subroutine;Machine translation;Quantum computer;Field (mathematics);Quantum;Relation (database);Computational learning theory;Range (aeronautics),ZA,"Rumelhart D, 1986, NATURE;Rabiner L, 1989, PROCEEDINGS OF THE IEEE;Hinton G, 2006, NEURAL COMPUTATION;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Nielsen M, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Hamming R, 1950, BELL SYSTEM TECHNICAL JOURNAL;Harrow A, 2009, PHYSICAL REVIEW LETTERS;Georgescu I, 2014, REVIEWS OF MODERN PHYSICS;Rebentrost P, 2014, PHYSICAL REVIEW LETTERS;Hilbert M, 2011, SCIENCE;Samuel A, 2000, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Buhrman H, 2001, PHYSICAL REVIEW LETTERS;Eisert J, 1999, PHYSICAL REVIEW LETTERS;Schuld M, 2014, QUANTUM INFORMATION PROCESSING;Du J, 2002, PHYSICAL REVIEW LETTERS;Ventura D, 2000, INFORMATION SCIENCES;Plenio M, 2001, CONTEMPORARY PHYSICS;Aı̈meur E, 2012, MACHINE LEARNING;Gupta S, 2001, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Piotrowski E, 2003, INTERNATIONAL JOURNAL OF THEORETICAL PHYSICS;Aı̈meur E, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Bisio A, 2010, PHYSICAL REVIEW A;Trugenberger C, 2001, PHYSICAL REVIEW LETTERS;Lu S, 2013, QUANTUM INFORMATION PROCESSING;Gammelmark S, 2013, PHYSICAL REVIEW A;Purushothaman G, 1997, IEEE TRANSACTIONS ON NEURAL NETWORKS;Pudenz K, 2012, QUANTUM INFORMATION PROCESSING;Trugenberger C, 2002, QUANTUM INFORMATION PROCESSING;Schützhold R, 2003, PHYSICAL REVIEW A;Tóth G, 1996, SUPERLATTICES AND MICROSTRUCTURES;Wiebe N, 2014, PHYSICAL REVIEW A;Panella M, 2009, INTERNATIONAL JOURNAL OF CIRCUIT THEORY AND APPLICATIONS;Barry J, 2014, ARXIV (CORNELL UNIVERSITY);Sasaki M, 2001, PHYSICAL REVIEW A;Sentís G, 2012, SCIENTIFIC REPORTS;Neigovzen R, 2009, PHYSICAL REVIEW A;Silva A, 2011, NEUROCOMPUTING;Rigatos G, 2007, INTEGRATED COMPUTER-AIDED ENGINEERING;Guţă M, 2010, NEW JOURNAL OF PHYSICS;Clark L, 2014, EMERGENCE, COMPLEXITY AND COMPUTATION;Gammelmark S, 2009, NEW JOURNAL OF PHYSICS;Wiesner K, 2008, PHYSICA D NONLINEAR PHENOMENA;Hunziker M, 2009, QUANTUM INFORMATION PROCESSING;Landsburg S, 2011, WILEY ENCYCLOPEDIA OF OPERATIONS RESEARCH AND MANAGEMENT SCIENCE",,,OPENALEX,"Schuld M, 2014, CONTEMPORARY PHYSICS","Schuld M, 2014, CONTEMPORARY PHYSICS" +https://openalex.org/W2996061341,10.1145/3359786,Techniques for interpretable machine learning,2019,en,article,1240,COMMUNICATIONS OF THE ACM,Communications of the ACM,Mengnan Du;Ninghao Liu;Xia Hu,Mengnan Du;Ninghao Liu;Xia Hu,"Texas A&M University, College Station, TX;Texas A&M University, College Station, TX;Texas A&M University, College Station, TX",,Uncovering the mysterious ways machine learning models make decisions.,63,1,68,77,Computer science;Artificial intelligence;Machine learning,US,"Chen T, 2016, ;McCullagh P, 1989, ;Flach P, 2015, ;Teymur O, 2016, ;Ribeiro M, 2016, ;Bach S, 2015, PLOS ONE;Nguyen A, 2015, ;Altmann A, 2010, BIOINFORMATICS;Quinlan J, 1987, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Ribeiro M, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Caruana R, 2015, ;Fong R, 2017, ;Zhang Q, 2018, ;Molnar C, 2018, THE JOURNAL OF OPEN SOURCE SOFTWARE;Freitas A, 2014, ACM SIGKDD EXPLORATIONS NEWSLETTER;Molnar C, 2020, ;Ancona M, 2018, REPOSITORY FOR PUBLICATIONS AND RESEARCH DATA (ETH ZURICH);Kádár Á, 2017, COMPUTATIONAL LINGUISTICS;Du M, 2018, ;Liu N, 2018, ;Du M, 2019, ;Liu N, 2019, ;Bahdanau D, 2014, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2014, ARXIV (CORNELL UNIVERSITY);Xu K, 2015, ARXIV (CORNELL UNIVERSITY);Doshi‐Velez F, 2017, ARXIV (CORNELL UNIVERSITY);Springenberg J, 2014, ARXIV (CORNELL UNIVERSITY);, , VIEW;A V, 2017, OXFORD UNIVERSITY RESEARCH ARCHIVE (ORA) (UNIVERSITY OF OXFORD);Karpathy A, 2015, ARXIV (CORNELL UNIVERSITY);Simonyan K, 2013, ARXIV (CORNELL UNIVERSITY);Wachter S, 2017, SSRN ELECTRONIC JOURNAL;Nguyen A, 2016, ARXIV (CORNELL UNIVERSITY);Nguyen A, 2016, ARXIV (CORNELL UNIVERSITY);Tomsett R, 2018, ORCA ONLINE RESEARCH @CARDIFF (CARDIFF UNIVERSITY)",,,OPENALEX,"Du M, 2019, COMMUNICATIONS OF THE ACM","Du M, 2019, COMMUNICATIONS OF THE ACM" +https://openalex.org/W2569349941,10.1016/j.renene.2016.12.095,Machine learning methods for solar radiation forecasting: A review,2017,en,review,1703,RENEWABLE ENERGY,Renewable Energy,Cyril Voyant;Gilles Notton;Soteris A. Kalogirou;Marie Laure Nivet;Christophe Paoli;Fabrice Motte;Alexis Fouilloy,Cyril Voyant;Gilles Notton;Soteris Kalogirou;Marie-Laure Nivet;Christophe Paoli;Fabrice Motte;Alexis Fouilloy,"University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France;CHD Castelluccio, Radiophysics Unit, B.P85 20177, Ajaccio, France;University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France;Department of Mechanical Engineering and Materials Science and Engineering, Cyprus University of Technology, P.O. Box 50329, Limassol, 3401, Cyprus;University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France;University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France;Galatasaray University, Çırağan Cad. No: 36, 34349, Ortaköy, İstanbul, Turkey;University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France;University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France","Cyril Voyant (corresponding author), University of Corsica/CNRS UMR SPE 6134, Campus Grimaldi, 20250, Corte, France; CHD Castelluccio, Radiophysics Unit, B.P85 20177, Ajaccio, France",,105,,569,582,Computer science;Solar irradiance;Probabilistic forecasting;Random forest;Gradient boosting;Context (archaeology);Artificial neural network;Machine learning;Support vector machine;Solar power;Solar energy;Ensemble forecasting;Ensemble learning;Artificial intelligence;Data mining;Meteorology;Probabilistic logic;Engineering;Power (physics),FR;CY;TR,"Liaw A, 2007, ;Rasmussen C, 2005, THE MIT PRESS EBOOKS;Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Tso K, 2007, ENERGY;Diagne M, 2013, RENEWABLE AND SUSTAINABLE ENERGY REVIEWS;Taieb S, 2012, EXPERT SYSTEMS WITH APPLICATIONS;Reikard G, 2008, SOLAR ENERGY;Bouveyron C, 2012, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Perez R, 2010, SOLAR ENERGY;Mellit A, 2008, RENEWABLE AND SUSTAINABLE ENERGY REVIEWS;Bouzerdoum M, 2013, SOLAR ENERGY;Marquez R, 2011, SOLAR ENERGY;Hammer A, 1999, SOLAR ENERGY;Lara-Fanego V, 2011, SOLAR ENERGY;Lauret P, 2015, SOLAR ENERGY;Zamo M, 2014, SOLAR ENERGY;Almeida M, 2015, SOLAR ENERGY;Paulescu M, 2012, GREEN ENERGY AND TECHNOLOGY;Chu Y, 2013, SOLAR ENERGY;Salcedo‐Sanz S, 2014, SOLAR ENERGY;Long H, 2014, APPLIED ENERGY;Lorenz E, 2009, EU PVSEC;Perez R, 2006, SOLAR ENERGY;Bădescu V, 2014, ;Pedro H, 2015, RENEWABLE ENERGY;Wu Y, 2014, INTERNATIONAL JOURNAL OF PHOTOENERGY;Gala Y, 2015, NEUROCOMPUTING;Moreno A, 2011, SOLAR ENERGY;Aggarwal S, 2014, ENERGY;Bilionis I, 2014, SOLAR ENERGY;Kemmoku Y, 1999, SOLAR ENERGY;Trapero J, 2015, ENERGY;Chaabene M, 2007, RENEWABLE ENERGY;Remund J, 2008, 23RD EUROPEAN PHOTOVOLTAIC SOLAR ENERGY CONFERENCE AND EXHIBITION, 1-5 SEPTEMBER 2008, VALENCIA, SPAIN;Felice M, 2015, RENEWABLE ENERGY;Huang J, 2014, RENEWABLE ENERGY;Lazzaroni M, 2014, MEASUREMENT;Hejase H, 2012, ISRN RENEWABLE ENERGY;Chakraborty P, 2021, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Alobaidi M, 2014, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;McGovern A, 2015, BULLETIN OF THE AMERICAN METEOROLOGICAL SOCIETY;Demirtaş M, 2012, ;Mori H, 2012, ;Mori H, 2002, 2001 IEEE POWER ENGINEERING SOCIETY WINTER MEETING. CONFERENCE PROCEEDINGS (CAT. NO.01CH37194);Zarzo M, 2011, APPLIED ENERGY;Şen Z, 2008, ENERGY CONVERSION AND MANAGEMENT;Ferrari S, 2012, ;Krömer P, 2014, ;Fernández Á, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Saguan M, 2009, REVUE D ÉCONOMIE INDUSTRIELLE;Breiman L, 2001, MACHINE LEARNING;Breiman L, 1996, MACHINE LEARNING;Breiman L, 1996, MACHINE LEARNING;Ziegel E, 1992, TECHNOMETRICS;Zhang G, 2003, NEUROCOMPUTING;Fernández-Delgado M, 2014, JOURNAL OF MACHINE LEARNING RESEARCH;Vilariño D, 2017, JOURNAL OF APPLIED REMOTE SENSING;Gooijer J, 2006, INTERNATIONAL JOURNAL OF FORECASTING;Kalogirou S, 2001, RENEWABLE AND SUSTAINABLE ENERGY REVIEWS;Inman R, 2013, PROGRESS IN ENERGY AND COMBUSTION SCIENCE;Bacher P, 2009, SOLAR ENERGY;Oger R, 1991, BIOMETRICS;Yang H, 2014, IEEE TRANSACTIONS ON SUSTAINABLE ENERGY;Ray W, 1990, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Paoli C, 2010, SOLAR ENERGY;Hand D, 1998, THE AMERICAN STATISTICIAN;Pelland S, 2011, PROGRESS IN PHOTOVOLTAICS RESEARCH AND APPLICATIONS;David M, 2016, SOLAR ENERGY;Cao J, 2008, ENERGY CONVERSION AND MANAGEMENT;Anderson D, 2003, ENERGY POLICY;Folorunsho O, 2012, INTERNATIONAL JOURNAL OF INFORMATION ENGINEERING AND ELECTRONIC BUSINESS;Chu Y, 2015, SOLAR ENERGY;Dong Z, 2015, ENERGY;Podestá G, 2003, AGRICULTURAL AND FOREST METEOROLOGY;Lorenz E, 2004, OSTI OAI (U.S. DEPARTMENT OF ENERGY OFFICE OF SCIENTIFIC AND TECHNICAL INFORMATION);Mihalakakou G, 2000, THEORETICAL AND APPLIED CLIMATOLOGY;Troncoso A, 2015, RENEWABLE ENERGY;Heinemann D, 2006, ;Chaouachi A, 2010, JOURNAL OF ADVANCED COMPUTATIONAL INTELLIGENCE AND INTELLIGENT INFORMATICS;Wold H, 1948, THE ANNALS OF MATHEMATICAL STATISTICS;Moreno‐Muñoz A, 2008, CONFERENCE RECORD OF THE IEEE PHOTOVOLTAIC SPECIALISTS CONFERENCE;Wu J, 2013, RENEWABLE ENERGY;Diagne H, 2012, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Sperati S, 2015, ENERGIES;Lorenz E, 2012, EU PVSEC;Espinar B, 2010, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Prokop L, 2013, NEURAL NETWORK WORLD;Burrows W, 1997, JOURNAL OF APPLIED METEOROLOGY;Podesta G, 2004, AGRICULTURAL METEOROLOGY;Cheng H, 2016, RENEWABLE ENERGY;Gaillard L, 2015, ENERGY PROCEDIA;Gastón M, 2010, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Voyant C, 2013, ;Rasmussen C, 2022, DIRECTORY OF OPEN ACCESS BOOKS (OAPEN FOUNDATION);Voyant C, 2013, ARXIV (CORNELL UNIVERSITY);Join C, 2016, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Voyant C, 2017, RENEWABLE ENERGY","Voyant C, 2017, RENEWABLE ENERGY" +https://openalex.org/W2892741787,10.1038/s41551-018-0304-0,Explainable machine-learning predictions for the prevention of hypoxaemia during surgery,2018,en,article,1993,NATURE BIOMEDICAL ENGINEERING,Nature Biomedical Engineering,Scott Lundberg;Bala G. Nair;Monica S. Vavilala;Mayumi Horibe;Michael J. Eisses;Trevor Adams;David E. Liston;Daniel King‐Wai Low;Shu-Fang Newman;Jerry W. Kim;Su‐In Lee,Scott M. Lundberg;Bala Nair;Monica S. Vavilala;Mayumi Horibe;Michael J. Eisses;Trevor Adams;David E. Liston;Daniel King-Wai Low;Shu-Fang Newman;Jerry Kim;Su-In Lee,"Paul G. Allen School of Computer Science and Engineering, University of Washington, Seattle, WA, USA;Center for Perioperative and Pain initiatives in Quality Safety Outcome, University of Washington, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Harborview Injury Prevention and Research Center, University of Washington, Seattle, WA, USA;Center for Perioperative and Pain initiatives in Quality Safety Outcome, University of Washington, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Harborview Injury Prevention and Research Center, University of Washington, Seattle, WA, USA;Veterans Affairs Puget Sound Health Care System, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Seattle Children's Hospital, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Seattle Children's Hospital, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Seattle Children's Hospital, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Seattle Children's Hospital, Seattle, WA, USA;Center for Perioperative and Pain initiatives in Quality Safety Outcome, University of Washington, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Department of Anesthesiology and Pain Medicine, University of Washington, Seattle, WA, USA;Seattle Children's Hospital, Seattle, WA, USA;Paul G. Allen School of Computer Science and Engineering, University of Washington, Seattle, WA, USA. suinlee@cs.washington.edu;Paul G. Allen School of Computer Science and Engineering, University of Washington, Seattle, WA, USA","Su-In Lee (corresponding author), Paul G. Allen School of Computer Science and Engineering, University of Washington, Seattle, WA, USA. suinlee@cs.washington.edu; Paul G. Allen School of Computer Science and Engineering, University of Washington, Seattle, WA, USA",,2,10,749,760,Hypoxemia;Medicine;Intensive care medicine;Anesthesia,US,"Chen T, 2016, ;Friedman J, 2001, THE ANNALS OF STATISTICS;Ribeiro M, 2016, ;Deo R, 2015, CIRCULATION;Garg A, 2005, JAMA;Štrumbelj E, 2013, KNOWLEDGE AND INFORMATION SYSTEMS;Caruana R, 2015, ;Myles P, 2004, THE LANCET;Gawande A, 1999, SURGERY;Avidan M, 2008, NEW ENGLAND JOURNAL OF MEDICINE;Weiser T, 2015, THE LANCET;Roth A, 1991, ;Henry K, 2015, SCIENCE TRANSLATIONAL MEDICINE;Maier‐Hein L, 2017, NATURE BIOMEDICAL ENGINEERING;Kable A, 2002, INTERNATIONAL JOURNAL FOR QUALITY IN HEALTH CARE;Mędrzycka‐Dąbrowska W, 2017, EUROPEAN NEUROLOGY;Tarassenko L, 2006, BRITISH JOURNAL OF ANAESTHESIA;Saria S, 2010, SCIENCE TRANSLATIONAL MEDICINE;Kooij F, 2008, ANESTHESIA & ANALGESIA;Memarian N, 2015, COMPUTERS IN BIOLOGY AND MEDICINE;Guay J, 2015, COCHRANE DATABASE OF SYSTEMATIC REVIEWS;Nair B, 2016, ANESTHESIA & ANALGESIA;Ehrenfeld J, 2010, CANADIAN JOURNAL OF ANESTHESIA/JOURNAL CANADIEN D ANESTHÉSIE;Guay J, 2018, COCHRANE DATABASE OF SYSTEMATIC REVIEWS;Epstein R, 2015, ANESTHESIA & ANALGESIA;Dunham C, 2014, BMC ANESTHESIOLOGY;ElMoaqet H, 2016, IEEE TRANSACTIONS ON CYBERNETICS;Dyagilev K, 2015, MACHINE LEARNING;Kendale S, 2016, JOURNAL OF CLINICAL ANESTHESIA;Strachan L, 2001, PUBMED;Lumachi F, 2011, PUBMED;Summers R, 2014, PUBMED;Lundberg S, 2017, ARXIV (CORNELL UNIVERSITY);Lipton Z, 2015, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Lundberg S, 2018, NATURE BIOMEDICAL ENGINEERING","Lundberg S, 2018, NATURE BIOMEDICAL ENGINEERING" +https://openalex.org/W1529533208,10.1109/jproc.2015.2483592,A Review of Relational Machine Learning for Knowledge Graphs,2015,en,review,1648,PROCEEDINGS OF THE IEEE,Proceedings of the IEEE,Maximilian Nickel;Kevin Murphy;Volker Tresp;Evgeniy Gabrilovich,Maximilian Nickel;Kevin Murphy;Volker Tresp;Evgeniy Gabrilovich,"Laboratory for Computational and Statistical Learning (LCSL), Istituto Italiano di Tecnologia, Genova, MA, Italy;[Laboratory for Computational and Statistical Learning (LCSL), Massachusetts Institute of Technology, Cambridge, MA, USA];Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA USA;Siemens AG, Corporate Technology, Ludwig Maximilian University of Munich, Munich, Germany;Siemens AG, Corporate Technology, Munich, Germany#TAB#;Google Inc., Mountain View, CA, USA;Google Inc., Mountain View, CA USA",,"Relational machine learning studies methods for the statistical analysis of relational, or graph-structured, data. In this paper, we provide a review of how such statistical models can be “trained” on large knowledge graphs, and then used to predict new facts about the world (which is equivalent to predicting new edges in the graph). In particular, we discuss two fundamentally different kinds of statistical relational models, both of which can scale to massive data sets. The first is based on latent feature models such as tensor factorization and multiway neural networks. The second is based on mining observable patterns in the graph. We also show how to combine these latent and observable models to get improved modeling power at decreased computational cost. Finally, we discuss how such statistical models of graphs can be combined with text-based information extraction methods for automatically constructing knowledge graphs from the Web. To this end, we also discuss Google's knowledge vault project as an example of such combination.",104,1,11,33,Statistical relational learning;Computer science;Knowledge graph;Artificial intelligence;Machine learning;Graph;Theoretical computer science;Data mining;Observable;Relational model;Relational database,IT;US;DE,"Barabási A, 1999, SCIENCE;Murphy K, 2012, ;Wolpert D, 1992, NEURAL NETWORKS;Koller D, 2009, ;Davis J, 2006, ;Bottou L, 2010, ;Auer S, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Koren Y, 2008, ;McGuinness D, 2004, MEDICAL ENTOMOLOGY AND ZOOLOGY;Rahm E, 2001, THE VLDB JOURNAL;Lü L, 2010, PHYSICA A STATISTICAL MECHANICS AND ITS APPLICATIONS;Richardson M, 2006, MACHINE LEARNING;Bishan Y, 2014, ARXIV (CORNELL UNIVERSITY);Carlson J, 2010, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Nickel M, 2011, ;, 2007, THE MIT PRESS EBOOKS;Quinlan J, 1990, MACHINE LEARNING;Halford G, 1998, BEHAVIORAL AND BRAIN SCIENCES;Riedel S, 2013, SCHOLARWORKS@UMASSAMHERST (UNIVERSITY OF MASSACHUSETTS AMHERST);Lao N, 2018, FIGSHARE;Sun Y, 2012, SYNTHESIS LECTURES ON DATA MINING AND KNOWLEDGE DISCOVERY;Nakashole N, 2012, MPG.PURE (MAX PLANCK SOCIETY);Halpin H, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Li X, 2012, PROCEEDINGS OF THE VLDB ENDOWMENT;Poon H, 2006, ;Suh B, 2009, ;Dong X, 2014, PROCEEDINGS OF THE VLDB ENDOWMENT;Franz T, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Orbanz P, 2014, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Dong X, 2015, PROCEEDINGS OF THE VLDB ENDOWMENT;Kimmig A, 2012, LIRIAS (KU LEUVEN);Jensen D, 2002, ;Kolda T, 2006, ;Rettinger A, 2012, DATA MINING AND KNOWLEDGE DISCOVERY;Lösch U, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Nickel M, 2013, LECTURE NOTES IN COMPUTER SCIENCE;Culotta A, 2005, ;Miettinen P, 2011, ;Angeli G, 2013, ;Ji H, 2013, KNOWLEDGE AND INFORMATION SYSTEMS;Krompaß D, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Pujara J, 2015, AI MAGAZINE;Hong L, 2015, ;d’Amato C, 2006, CINECA IRIS INSTITUTIONAL RESEARCH INFORMATION SYSTEM (UNIVERSITY OF BARI ALDO MORO);Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Sowa J, 2000, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Hogan A, 2010, ARROW@DIT (DUBLIN INSTITUTE OF TECHNOLOGY);Bach S, 2015, ARXIV (CORNELL UNIVERSITY);Nickel M, 2013, ARXIV (CORNELL UNIVERSITY);Nickel M, 2013, ELECTRONIC THESES OF LMU MUNICH (LUDWIG-MAXIMILIANS-UNIVERSITÄT MÜNCHEN);Brin S, 1998, COMPUTER NETWORKS AND ISDN SYSTEMS;Miller G, 1995, COMMUNICATIONS OF THE ACM;Koren Y, 2009, COMPUTER;Fortunato S, 2009, PHYSICS REPORTS;Kolda T, 2009, SIAM REVIEW;Bordes A, 2015, ;Bollacker K, 2008, ;Bizer C, 2009, INTERNATIONAL JOURNAL ON SEMANTIC WEB AND INFORMATION SYSTEMS;Newman M, 2001, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Suchanek F, 2007, ;Katz L, 1953, PSYCHOMETRIKA;Vrandečić D, 2014, COMMUNICATIONS OF THE ACM;Holland P, 1983, SOCIAL NETWORKS;Hoff P, 2002, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Lenat D, 1995, COMMUNICATIONS OF THE ACM;Socher R, 2013, ;Dong X, 2014, ;Muggleton S, 1995, NEW GENERATION COMPUTING;Rendle S, 2012, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Hoffart J, 2012, ARTIFICIAL INTELLIGENCE;Getoor L, 2005, ACM SIGKDD EXPLORATIONS NEWSLETTER;Smolensky P, 1990, ARTIFICIAL INTELLIGENCE;Leicht E, 2006, PHYSICAL REVIEW E;Newcombe H, 1959, SCIENCE;Rendle S, 2010, ;Lao N, 2010, MACHINE LEARNING;Liu W, 2010, EUROPHYSICS LETTERS (EPL);Kemp C, 2006, ;Minsky M, 1997, THE MIT PRESS EBOOKS;Džeroski S, 2006, ;Galárraga L, 2015, THE VLDB JOURNAL;Taskar B, 2003, ;Nickel M, 2012, ;Neville J, 2007, THE MIT PRESS EBOOKS;Kok S, 2007, ;Sutskever I, 2009, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Nakashole N, 2011, ;Chang K, 2014, ;Wang D, 2008, PROCEEDINGS OF THE VLDB ENDOWMENT;Anderson C, 1992, SOCIAL NETWORKS;Ruttenberg A, 2009, BRIEFINGS IN BIOINFORMATICS;Drumond L, 2012, ;Zhang C, 2013, ;Whang S, 2012, ;Fan J, 2010, ;Erdös D, 2013, ;Jenatton R, 2012, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Xu Z, 2012, ARXIV (CORNELL UNIVERSITY);James G, 2013, SPRINGER TEXTS IN STATISTICS;Minsky M, 1988, ELSEVIER EBOOKS;Bodenreider O, 2003, NUCLEIC ACIDS RESEARCH;Adamic L, 2003, SOCIAL NETWORKS;Bleiholder J, 2009, ACM COMPUTING SURVEYS;Ferrucci D, 2010, AI MAGAZINE;Fader A, 2011, ;Platt J, 2000, THE MIT PRESS EBOOKS;Toutanova K, 2015, ;Muggleton S, 1991, NEW GENERATION COMPUTING;Bordes A, 2011, ;Lee N, 2024, ENCYCLOPEDIA OF COMPUTER GRAPHICS AND GAMES;Belleau F, 2008, JOURNAL OF BIOMEDICAL INFORMATICS;Schmitz M, 2012, ;Davis R, 1993, ;Quinlan J, 1990, MACHINE LEARNING;Galárraga L, 2013, ;Etzioni O, 2011, ;Barnden J, 1998, ;Singla P, 2006, PROCEEDINGS;Raedt L, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Suciu D, 2008, ACM SIGACT NEWS;Tejada S, 2001, INFORMATION SYSTEMS;West R, 2014, ;Globerson A, 2007, ;Kolda T, 2006, ;Lehmann J, 2009, ;Sun Y, 2012, SYNTHESIS LECTURES ON DATA MINING AND KNOWLEDGE DISCOVERY;Weikum G, 2010, ;Xu Z, 2006, ;Rendle S, 2013, PROCEEDINGS OF THE VLDB ENDOWMENT;Niu F, 2012, INTERNATIONAL JOURNAL ON SEMANTIC WEB AND INFORMATION SYSTEMS;Nickel M, 2014, ;Tresp V, 2009, ;Tibshirani R, 2007, ;LehmannJens, 2009, JOURNAL OF MACHINE LEARNING RESEARCH;Biega J, 2013, ;Jiang S, 2012, ;Huang Y, 2014, SEMANTIC WEB;Jiang X, 2012, ;Krompaß D, 2014, ;Lisi F, 2010, THEORY AND PRACTICE OF LOGIC PROGRAMMING;Krompaß D, 2014, ;Minervini P, 2014, ;Hoff P, 2007, ARXIV.ORG;Li X, 2015, ARXIV (CORNELL UNIVERSITY);Liben‐Nowell D, 2007, JOURNAL OF THE AMERICAN SOCIETY FOR INFORMATION SCIENCE AND TECHNOLOGY;Klyne G, 2004, ;Ooi V, 2019, EDINBURGH UNIVERSITY PRESS EBOOKS;Minsky M, 1979, ;, 2004, ELSEVIER EBOOKS;Bottou L, 2011, CHAPMAN & HALL/CRC SERIES IN COMPUTER SCIENCE & DATA ANALYSIS/SERIES IN COMPUTER SCIENCE AND DATA ANALYSIS;, 2001, ;De R, 2008, COGNITIVE TECHNOLOGIES;Suciu D, 2011, SYNTHESIS LECTURES ON DATA MANAGEMENT;Suciu D, 2011, SYNTHESIS LECTURES ON DATA MANAGEMENT;Smolensky P, 1991, THE MIT PRESS EBOOKS;Dězeroski S, 2001, SPRINGER EBOOKS;Newman M, 2011, PRINCETON UNIVERSITY PRESS EBOOKS;Hoff P, 2001, ;Sowa J, 2005, ENCYCLOPEDIA OF COGNITIVE SCIENCE;Kenny J, 2005, ",,,OPENALEX,"Nickel M, 2015, PROCEEDINGS OF THE IEEE","Nickel M, 2015, PROCEEDINGS OF THE IEEE" +https://openalex.org/W2469230926,10.1109/tsp.2017.2690524,Tensor Decomposition for Signal Processing and Machine Learning,2017,en,article,1599,IEEE TRANSACTIONS ON SIGNAL PROCESSING,IEEE Transactions on Signal Processing,Nicholas D. Sidiropoulos;Lieven De Lathauwer;Xiao Fu;Kejun Huang;Evangelos E. Papalexakis;Christos Faloutsos,Nicholas D. Sidiropoulos;Lieven De Lathauwer;Xiao Fu;Kejun Huang;Evangelos E. Papalexakis;Christos Faloutsos,"Department of Electrical and Computer Engineering, University of Minnesota, Minneapolis, MN, USA;KU Leuven, Leuven, Belgium;Department of Electrical and Computer Engineering, University of Minnesota, Minneapolis, MN, USA;Department of Electrical and Computer Engineering, University of Minnesota, Minneapolis, MN, USA;Department of Computer Science, University of California, Riverside, CA, USA;Department of Computer Science, Carnegie Mellon University, Pittsburgh, PA, USA",,"Tensors or multiway arrays are functions of three or more indices (i, j, k, . . . )-similar to matrices (two-way arrays), which are functions of two indices (r, c) for (row, column). Tensors have a rich history, stretching over almost a century, and touching upon numerous disciplines; but they have only recently become ubiquitous in signal and data analytics at the confluence of signal processing, statistics, data mining, and machine learning. This overview article aims to provide a good starting point for researchers and practitioners interested in learning about and working with tensors. As such, it focuses on fundamentals and motivation (using various application examples), aiming to strike an appropriate balance of breadth and depth that will enable someone having taken first graduate courses in matrix algebra and probability to get started doing research and/or developing tensor algorithms and software. Some background in applied optimization is useful but not strictly required. The material covered includes tensor rank and rank decomposition; basic tensor factorization models and their relationships and properties (including fairly good coverage of identifiability); broad coverage of algorithms ranging from alternating optimization to stochastic gradient; statistical performance analysis; and applications ranging from source separation to collaborative filtering, mixture and topic modeling, classification, and multilinear subspace learning.",65,13,3551,3582,Tensor (intrinsic definition);Identifiability;Multilinear algebra;Multilinear map;Computer science;Matrix decomposition;Signal processing;Rank (graph theory);Ranging;Subspace topology;Statistical signal processing;Analytics;Artificial intelligence;Theoretical computer science;Machine learning;Algebra over a field;Data mining;Mathematics;Digital signal processing,US;BE,"Sengijpta S, 1995, TECHNOMETRICS;Carroll J, 1970, PSYCHOMETRIKA;Lathauwer L, 2000, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Oseledets I, 2011, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Lathauwer L, 2000, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Comon P, 2010, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Lahat D, 2015, PROCEEDINGS OF THE IEEE;Duchi J, 2008, ;Hackbusch W, 2012, SPRINGER SERIES IN COMPUTATIONAL MATHEMATICS;Xiong L, 2010, ;Grasedyck L, 2013, GAMM-MITTEILUNGEN;Kroonenberg P, 2007, WILEY SERIES IN PROBABILITY AND STATISTICS;Lathauwer L, 2008, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Sorber L, 2013, SIAM JOURNAL ON OPTIMIZATION;Cattell R, 1944, PSYCHOMETRIKA;Hsu D, 2013, ;Mørup M, 2011, WILEY INTERDISCIPLINARY REVIEWS DATA MINING AND KNOWLEDGE DISCOVERY;Stegeman A, 2006, LINEAR ALGEBRA AND ITS APPLICATIONS;Smith S, 2015, ;Oseledets I, 2008, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Papalexakis E, 2012, LECTURE NOTES IN COMPUTER SCIENCE;Huang K, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Uschmajew A, 2012, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Kolda T, 2006, ;Phan A, 2013, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Papalexakis E, 2012, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Lathauwer L, 2011, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Savas B, 2010, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Chiantini L, 2012, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Sørensen M, 2015, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Liavas A, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Domanov I, 2014, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Caiafa C, 2010, LINEAR ALGEBRA AND ITS APPLICATIONS;Acar E, 2013, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Beutel A, 2014, ;Papalexakis E, 2014, ;Acar E, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Paatero P, 2000, JOURNAL OF CHEMOMETRICS;Bro R, 1998, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Sørensen M, 2012, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Papalexakis E, 2014, ;Yang Q, 2016, IEEE TRANSACTIONS ON INFORMATION THEORY;Sørensen M, 2015, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Tichavský P, 2013, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Vannieuwenhoven N, 2015, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Stegeman A, 2009, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Sorber L, 2015, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Derksen H, 2011, LINEAR ALGEBRA AND ITS APPLICATIONS;Acar E, 2011, ARXIV (CORNELL UNIVERSITY);Kofidis E, 2001, CONTEMPORARY MATHEMATICS - AMERICAN MATHEMATICAL SOCIETY;Kolda T, 2009, SIAM REVIEW;Recht B, 2010, SIAM REVIEW;Bro R, 1997, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Kruskal J, 1977, LINEAR ALGEBRA AND ITS APPLICATIONS;Cichocki A, 2015, IEEE SIGNAL PROCESSING MAGAZINE;Razaviyayn M, 2013, SIAM JOURNAL ON OPTIMIZATION;Andersson C, 2000, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Hillar C, 2013, JOURNAL OF THE ACM;Roy R, 1986, IEEE TRANSACTIONS ON ACOUSTICS SPEECH AND SIGNAL PROCESSING;Vasilescu M, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Anandkumar A, 2014, CALTECHAUTHORS (CALIFORNIA INSTITUTE OF TECHNOLOGY);Smilde A, 2004, ;Karatzoglou A, 2010, ;Landsberg J, 2011, GRADUATE STUDIES IN MATHEMATICS;Sidiropoulos N, 2000, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Håstad J, 1990, JOURNAL OF ALGORITHMS;Veen A, 1996, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Bader B, 2007, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Goreinov S, 1997, LINEAR ALGEBRA AND ITS APPLICATIONS;Lathauwer L, 2006, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Tomasi G, 2004, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Lu H, 2011, PATTERN RECOGNITION;Yan S, 2006, IEEE TRANSACTIONS ON IMAGE PROCESSING;Stoica P, 1998, IEEE SIGNAL PROCESSING LETTERS;Huang K, 2013, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Kang U, 2012, ;Stoica P, 2001, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Carroll J, 1980, PSYCHOMETRIKA;Rajih M, 2008, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Sorber L, 2015, IEEE JOURNAL OF SELECTED TOPICS IN SIGNAL PROCESSING;Nion D, 2009, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Strassen V, 1983, LINEAR ALGEBRA AND ITS APPLICATIONS;Vervliet N, 2014, IEEE SIGNAL PROCESSING MAGAZINE;Domanov I, 2013, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Bro R, 2009, JOURNAL OF CHEMOMETRICS;Ishteva M, 2011, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Phan A, 2013, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Swami A, 2002, SIGNAL PROCESSING;Mahoney M, 2008, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Krijnen W, 2008, PSYCHOMETRIKA;Chiantini L, 2014, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Sidiropoulos N, 2014, IEEE SIGNAL PROCESSING MAGAZINE;Savas B, 2011, LINEAR ALGEBRA AND ITS APPLICATIONS;Ravindran N, 2014, 2014 48TH ASILOMAR CONFERENCE ON SIGNALS, SYSTEMS AND COMPUTERS;Huang K, 2014, IEEE SIGNAL PROCESSING MAGAZINE;Swami A, 1996, SIGNAL PROCESSING;Kolda T, 2003, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Sidiropoulos N, 2001, IEEE TRANSACTIONS ON INFORMATION THEORY;Bergqvist G, 2011, LINEAR ALGEBRA AND ITS APPLICATIONS;Choi J, 2014, ARXIV (CORNELL UNIVERSITY);Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Boyd S, 2010, NOW PUBLISHERS, INC. EBOOKS;Rendle S, 2010, ;Belouchrani A, 1997, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Harshman R, 1970, ;Petersen K, 2012, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Hitchcock F, 1927, JOURNAL OF MATHEMATICS AND PHYSICS;Gasca M, 2000, ADVANCES IN COMPUTATIONAL MATHEMATICS;Sidiropoulos N, 2000, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Magnus J, 1979, THE ANNALS OF STATISTICS;Sidiropoulos N, 2000, JOURNAL OF CHEMOMETRICS;Acar E, 2008, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Yeredor A, 2002, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Papalexakis E, 2016, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Comon P, 2014, IEEE SIGNAL PROCESSING MAGAZINE;Gorman J, 1990, IEEE TRANSACTIONS ON INFORMATION THEORY;Nion D, 2010, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Tomasi G, 2004, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Boor C, 1994, ;Kolda T, 2006, ;Liu X, 2001, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Jiang T, 2004, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Lim L, 2009, JOURNAL OF CHEMOMETRICS;Sidiropoulos N, 2012, IEEE SIGNAL PROCESSING LETTERS;Vorobyov S, 2005, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Nion D, 2009, IEEE TRANSACTIONS ON AUDIO SPEECH AND LANGUAGE PROCESSING;Vervliet N, 2016, ;Vervliet N, 2015, IEEE JOURNAL OF SELECTED TOPICS IN SIGNAL PROCESSING;Domanov I, 2015, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Huang K, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Jiang T, 2001, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Harshman R, 1972, ;Ben‐Haim Z, 2009, IEEE SIGNAL PROCESSING LETTERS;Fu X, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Chiantini L, 2015, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Li Z, 2015, SIAM JOURNAL ON OPTIMIZATION;Guo X, 2012, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Mohlenkamp M, 2011, LINEAR ALGEBRA AND ITS APPLICATIONS;Qian C, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Fu X, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Sidiropoulos N, 2001, IEEE SIGNAL PROCESSING LETTERS;Bresler Y, 2000, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Huang K, 2016, ;Stegeman A, 2012, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Berge J, 2008, JOURNAL OF CHEMOMETRICS;Vannieuwenhoven N, 2014, IMA JOURNAL OF NUMERICAL ANALYSIS;Qian C, 2016, ;Espig M, 2015, RWTH PUBLICATIONS (RWTH AACHEN);Boyd S, 2011, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Nocedal J, 2006, ;Parlar M, 2000, BIRKHÄUSER BOSTON EBOOKS;Strassen V, 1983, LINEAR ALGEBRA AND ITS APPLICATIONS;Tomasi G, 2006, ;Bro R, 1998, JOURNAL OF CHEMOMETRICS;AnandkumarAnimashree, 2015, JOURNAL OF MACHINE LEARNING RESEARCH;Espig M, 2015, ARXIV (CORNELL UNIVERSITY);Savas B, 2010, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Sidiropoulos N, 2017, IEEE TRANSACTIONS ON SIGNAL PROCESSING","Sidiropoulos N, 2017, IEEE TRANSACTIONS ON SIGNAL PROCESSING" +https://openalex.org/W2295598076,10.1145/2939672.2939785,XGBoost,2016,en,article,48425,,,Tianqi Chen;Carlos Guestrin,Tianqi Chen;Carlos Guestrin,"University of Washington, Seattle, WA, USA;University of Washington, Seattle, WA, USA",,"Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.",,,785,794,Boosting (machine learning);Scalability;Sketch;Tree (set theory);Decision tree;Gradient boosting;Cache,US,"Breiman L, 2001, MACHINE LEARNING;Friedman J, 2001, THE ANNALS OF STATISTICS;PedregosaFabian, 2011, JOURNAL OF MACHINE LEARNING RESEARCH;Friedman J, 2002, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Friedman J, 2000, THE ANNALS OF STATISTICS;He X, 2014, ;Ridgeway G, 2006, ;Greenwald M, 2001, ;Li P, 2007, ;FanRong-En, 2008, JOURNAL OF MACHINE LEARNING RESEARCH;Ye J, 2009, ;Panda B, 2009, PROCEEDINGS OF THE VLDB ENDOWMENT;Bekkerman R, 2011, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Johnson R, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Tyree S, 2011, ;Zhang Q, 2007, INTERNATIONAL CONFERENCE ON SCIENTIFIC AND STATISTICAL DATABASE MANAGEMENT",,,OPENALEX,"Chen T, 2016, ","Chen T, 2016, " +https://openalex.org/W2963784900,10.1038/s41524-017-0056-5,Machine learning in materials informatics: recent applications and prospects,2017,en,article,1760,NPJ COMPUTATIONAL MATERIALS,npj Computational Materials,Rampi Ramprasad;Rohit Batra;Ghanshyam Pilania;Arun Mannodi‐Kanakkithodi;Chiho Kim,Rampi Ramprasad;Rohit Batra;Ghanshyam Pilania;Arun Mannodi-Kanakkithodi;Chiho Kim,"Department of Materials Science & Engineering and Institute of Materials Science, University of Connecticut, 97 North Eagleville Rd., Unit 3136, Storrs, CT, 06269-3136, USA;Department of Materials Science & Engineering and Institute of Materials Science, University of Connecticut, 97 North Eagleville Rd., Unit 3136, Storrs, CT, 06269-3136, USA;Fritz-Haber-Institut der Max-Planck-Gesellschaft, Faradayweg 4-6, 14195, Berlin, Germany;Materials Science and Technology Division, Los Alamos National Laboratory, Los Alamos, NM, 87545, USA;Center for Nanoscale Materials, Lamont National Laboratory, 9700 S. Cass Ave., Lemont, IL, 60439, USA;Department of Materials Science & Engineering and Institute of Materials Science, University of Connecticut, 97 North Eagleville Rd., Unit 3136, Storrs, CT, 06269-3136, USA;Department of Materials Science & Engineering and Institute of Materials Science, University of Connecticut, 97 North Eagleville Rd., Unit 3136, Storrs, CT, 06269-3136, USA","Rampi Ramprasad (corresponding author), Department of Materials Science & Engineering and Institute of Materials Science, University of Connecticut, 97 North Eagleville Rd., Unit 3136, Storrs, CT, 06269-3136, USA","Abstract Propelled partly by the Materials Genome Initiative, and partly by the algorithmic developments and the resounding successes of data-driven efforts in other domains, informatics strategies are beginning to take shape within materials science. These approaches lead to surrogate machine learning models that enable rapid predictions based purely on past data rather than by direct experimentation or by computations/simulations in which fundamental equations are explicitly solved. Data-centric informatics methods are becoming useful to determine material properties that are hard to measure or compute using traditional methods—due to the cost, time or effort involved—but for which reliable data either already exists or can be generated for at least a subset of the critical cases. Predictions are typically interpolative, involving fingerprinting a material numerically first, and then following a mapping (established via a learning algorithm) between the fingerprint and the property of interest. Fingerprints, also referred to as “descriptors”, may be of many types and scales, as dictated by the application domain and needs. Predictions may also be extrapolative—extending into new materials spaces—provided prediction uncertainties are properly taken into account. This article attempts to provide an overview of some of the recent successful data-driven “materials informatics” strategies undertaken in the last decade, with particular emphasis on the fingerprint or descriptor choices. The review also identifies some challenges the community is facing and those that should be overcome in the near future.",3,1,,,Computer science;Informatics;Data science;Domain (mathematical analysis);Fingerprint (computing);Property (philosophy);Artificial intelligence;Machine learning;Materials informatics;Computation;Data mining;Management science;Health informatics;Engineering informatics;Algorithm;Mathematics;Engineering,US;DE,"Jordan M, 2015, SCIENCE;Behler J, 2007, PHYSICAL REVIEW LETTERS;Feynman R, 1939, PHYSICAL REVIEW;Bartók A, 2010, PHYSICAL REVIEW LETTERS;Schmidt M, 2009, SCIENCE;Bartók A, 2013, PHYSICAL REVIEW B;Sánchez J, 1984, PHYSICA A STATISTICAL MECHANICS AND ITS APPLICATIONS;, 1991, CHOICE REVIEWS ONLINE;Ercolessi F, 1994, EUROPHYSICS LETTERS (EPL);Thompson A, 2014, JOURNAL OF COMPUTATIONAL PHYSICS;Forrester A, 2007, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Walle A, 2002, JOURNAL OF PHASE EQUILIBRIA AND DIFFUSION;Pilania G, 2013, SCIENTIFIC REPORTS;Snyder J, 2012, PHYSICAL REVIEW LETTERS;Li Z, 2015, PHYSICAL REVIEW LETTERS;Bartók A, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Hautier G, 2010, CHEMISTRY OF MATERIALS;Fontaine D, 1994, SOLID STATE PHYSICS;, 2008, CHOICE REVIEWS ONLINE;Seko A, 2015, PHYSICAL REVIEW LETTERS;Behler J, 2014, JOURNAL OF PHYSICS CONDENSED MATTER;Faber F, 2016, PHYSICAL REVIEW LETTERS;Rupp M, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Laks D, 1992, PHYSICAL REVIEW. B, CONDENSED MATTER;Lee J, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Szlachta W, 2014, PHYSICAL REVIEW B;Fernández M, 2014, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Behler J, 2008, PHYSICAL REVIEW LETTERS;Nelson L, 2013, PHYSICAL REVIEW B;Gupta A, 2015, ACTA MATERIALIA;Petch N, 1986, ACTA METALLURGICA;Aryal S, 2014, PHYSICA STATUS SOLIDI (B);Ryzhov I, 2012, OPERATIONS RESEARCH;Huan T, 2015, PHYSICAL REVIEW B;Dey P, 2013, COMPUTATIONAL MATERIALS SCIENCE;Seko A, 2009, PHYSICAL REVIEW B;Snyder J, 2013, THE JOURNAL OF CHEMICAL PHYSICS;Lorenzini R, 2013, POLYMER;Theodoridis S, 2015, MACHINE LEARNING;Zunger A, 1994, NATO ASI SERIES. SERIES B : PHYSICS;Hume-Rothery W, 1947, ;Chatterjee S, 2007, MATERIALS SCIENCE AND TECHNOLOGY;Dudiy S, 2006, PHYSICAL REVIEW LETTERS;Mueller T, 2010, PHYSICAL REVIEW B;Cockayne E, 2010, PHYSICAL REVIEW B;Kalidindi S, 2012, ISRN MATERIALS SCIENCE;Adamson G, 1974, NATURE;Sanders J, 2015, ACS CENTRAL SCIENCE;Adamson G, 1974, JOURNAL OF CHEMICAL DOCUMENTATION;Powell W, 2011, WILEY ENCYCLOPEDIA OF OPERATIONS RESEARCH AND MANAGEMENT SCIENCE;Hall E, 1951, PROCEEDINGS OF THE PHYSICAL SOCIETY SECTION B;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Ward L, 2016, NPJ COMPUTATIONAL MATERIALS;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;De S, 2016, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Xue D, 2016, NATURE COMMUNICATIONS;Meredig B, 2014, PHYSICAL REVIEW B;Micchelli C, 2004, NEURAL COMPUTATION;Mannodi‐Kanakkithodi A, 2016, SCIENTIFIC REPORTS;Botu V, 2014, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Oliynyk A, 2016, CHEMISTRY OF MATERIALS;Huan T, 2016, PROGRESS IN MATERIALS SCIENCE;Álvarez M, 2012, FOUNDATIONS AND TRENDS® IN MACHINE LEARNING;Panchal J, 2012, COMPUTER-AIDED DESIGN;Sharma V, 2014, NATURE COMMUNICATIONS;Hong W, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY C;Seko A, 2014, PHYSICAL REVIEW B;Kusne A, 2014, SCIENTIFIC REPORTS;Emery A, 2016, CHEMISTRY OF MATERIALS;Kim C, 2016, CHEMISTRY OF MATERIALS;Li Z, 2016, CATALYSIS TODAY;Ward L, 2016, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Huan T, 2016, SCIENTIFIC DATA;Pilania G, 2016, FRONTIERS IN MATERIALS;Kim C, 2016, THE JOURNAL OF PHYSICAL CHEMISTRY C;Botu V, 2015, PHYSICAL REVIEW B;Mannodi‐Kanakkithodi A, 2016, ADVANCED MATERIALS;Lookman T, 2015, SPRINGER SERIES IN MATERIALS SCIENCE;Perdikaris P, 2015, PROCEEDINGS OF THE ROYAL SOCIETY A MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES;Powell W, 2012, WILEY SERIES IN PROBABILITY AND STATISTICS;Deml A, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Hattrick‐Simpers J, 2016, APL MATERIALS;Ashton M, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Kusne A, 2015, NANOTECHNOLOGY;Pilania G, 2015, PHYSICAL REVIEW B;Liu C, 2012, THE JOURNAL OF PHYSICAL CHEMISTRY A;Kalidindi S, 2016, MRS BULLETIN;Brough D, 2016, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Pilania G, 2017, CHEMISTRY OF MATERIALS;Kalidindi S, 2015, NANOTECHNOLOGY;Bunn J, 2016, JOM;Bialon A, 2016, CHEMISTRY OF MATERIALS;Srinivasan S, 2004, UNIVERSITY PRESS EBOOKS;Snyder J, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Fancher C, 2016, SCIENTIFIC REPORTS;Bianchini F, 2016, MODELLING AND SIMULATION IN MATERIALS SCIENCE AND ENGINEERING;Felsenstein J, 2008, SPRINGER SERIES IN STATISTICS;Mueller T, 2016, REVIEWS IN COMPUTATIONAL CHEMISTRY;Chmiela S, 2017, SCIENCE ADVANCES;Ercolessi F, 1994, ;Krevelen D, 2009, ELSEVIER EBOOKS;Deringer V, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Botu V, 2016, THE JOURNAL OF PHYSICAL CHEMISTRY C;Pilania G, 2016, COMPUTATIONAL MATERIALS SCIENCE;Green M, 2017, APPLIED PHYSICS REVIEWS;Jong M, 2016, SCIENTIFIC REPORTS;Glielmo A, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Xue D, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Legrain F, 2017, CHEMISTRY OF MATERIALS;Medasani B, 2016, NPJ COMPUTATIONAL MATERIALS;Ghiringhelli L, 2017, NEW JOURNAL OF PHYSICS;Lookman T, 2016, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Goldsmith B, 2017, NEW JOURNAL OF PHYSICS;Brough D, 2017, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Theodoridis S, 2015, MACHINE LEARNING;L. G, 2017, MAX PLANCK DIGITAL LIBRARY;Mannodi‐Kanakkithodi A, 2017, CHEMISTRY OF MATERIALS;Jindal S, 2017, THE JOURNAL OF CHEMICAL PHYSICS;Botu V, 2017, COMPUTATIONAL MATERIALS SCIENCE;Brough D, 2017, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Treich G, 2017, IEEE TRANSACTIONS ON DIELECTRICS AND ELECTRICAL INSULATION;J.W.C., 1961, JOURNAL OF THE LESS COMMON METALS;Gopnik A, 2017, SCIENTIFIC AMERICAN",,,OPENALEX,"Ramprasad R, 2017, NPJ COMPUTATIONAL MATERIALS","Ramprasad R, 2017, NPJ COMPUTATIONAL MATERIALS" +https://openalex.org/W2284729062,10.1177/1745691617693393,Choosing Prediction Over Explanation in Psychology: Lessons From Machine Learning,2017,en,review,1997,PERSPECTIVES ON PSYCHOLOGICAL SCIENCE,Perspectives on Psychological Science,Tal Yarkoni;Jacob Westfall,Tal Yarkoni;Jacob Westfall,University of Texas at Austin;University of Texas at Austin,"Tal Yarkoni (corresponding author), University of Texas at Austin","Psychology has historically been concerned, first and foremost, with explaining the causal mechanisms that give rise to behavior. Randomized, tightly controlled experiments are enshrined as the gold standard of psychological research, and there are endless investigations of the various mediating and moderating variables that govern various behaviors. We argue that psychology's near-total focus on explaining the causes of behavior has led much of the field to be populated by research programs that provide intricate theories of psychological mechanism but that have little (or unknown) ability to predict future behaviors with any appreciable accuracy. We propose that principles and techniques from the field of machine learning can help psychology become a more predictive science. We review some of the fundamental concepts and tools of machine learning and point out examples where these concepts have been used to conduct interesting and important psychological research that focuses on predictive research questions. We suggest that an increased focus on prediction, rather than explanation, can ultimately lead us to greater understanding of behavior.",12,6,1100,1122,Psychology;Field (mathematics);Psychological research;Mechanism (biology);Focus (optics);Psychological science;Cognitive psychology;Basic science;Experimental psychology;Contrast (vision);Point (geometry);Cognitive science;Epistemology;Social psychology;Artificial intelligence;Cognition;Computer science,US,"Cohen J, 1992, PSYCHOLOGICAL BULLETIN;Friedman J, 2001, THE ANNALS OF STATISTICS;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Schmidhuber J, 2014, NEURAL NETWORKS;Zeiler M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Gelman A, 2006, CAMBRIDGE UNIVERSITY PRESS EBOOKS;C. P, 2001, TECHNOMETRICS;Ekman P, 1992, COGNITION & EMOTION;Aarts A, 2015, SCIENCE;Essen D, 2013, NEUROIMAGE;Biswal B, 2010, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Brysbaert M, 2009, BEHAVIOR RESEARCH METHODS;Bond R, 2012, NATURE;Dawes R, 1979, AMERICAN PSYCHOLOGIST;Vrieze S, 2012, PSYCHOLOGICAL METHODS;Pennebaker J, 1999, JOURNAL OF PERSONALITY AND SOCIAL PSYCHOLOGY;Shao J, 1993, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Ebstein R, 1996, NATURE GENETICS;Browne M, 2000, JOURNAL OF MATHEMATICAL PSYCHOLOGY;Klein R, 2014, SOCIAL PSYCHOLOGY;Ripke S, 2012, MOLECULAR PSYCHIATRY;Brewer J, 1998, SCIENCE;Meehl P, 1990, PSYCHOLOGICAL REPORTS;Du S, 2014, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Wainer H, 1976, PSYCHOLOGICAL BULLETIN;Gosling S, 2011, CYBERPSYCHOLOGY BEHAVIOR AND SOCIAL NETWORKING;Yarkoni T, 2010, JOURNAL OF RESEARCH IN PERSONALITY;Baayen R, 2011, PSYCHOLOGICAL REVIEW;Yarkoni T, 2009, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Whelan R, 2014, NATURE;McNeish D, 2015, MULTIVARIATE BEHAVIORAL RESEARCH;Fast L, 2008, JOURNAL OF PERSONALITY AND SOCIAL PSYCHOLOGY;Browne M, 2002, PSYCHOLOGICAL METHODS;Fleet D, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Munafò M, 2011, TRENDS IN COGNITIVE SCIENCES;Horn J, 2013, BRAIN IMAGING AND BEHAVIOR;Gelman A, 2009, AMERICAN SCIENTIST;Kurtz A, 1948, PERSONNEL PSYCHOLOGY;Wu S, 2007, THE CANADIAN JOURNAL OF CHEMICAL ENGINEERING;Mosier C, 1951, EDUCATIONAL AND PSYCHOLOGICAL MEASUREMENT;Hagerty M, 1991, PSYCHOMETRIKA;Strube M, 2006, BEHAVIOR RESEARCH METHODS;Davis‐Stober C, 2013, BEHAVIOR RESEARCH METHODS;Shear B, 2013, EDUCATIONAL AND PSYCHOLOGICAL MEASUREMENT;Jonas K, 2015, COMPREHENSIVE RESULTS IN SOCIAL PSYCHOLOGY;Yarkoni T, 2015, PEERJ;Wherry R, 1975, PERSONNEL PSYCHOLOGY;Schmitt N, 1977, PSYCHOLOGICAL BULLETIN;Wherry R, 1951, EDUCATIONAL AND PSYCHOLOGICAL MEASUREMENT;, 2017, ANNUAL REVIEW OF PSYCHOLOGY;Deng J, 2009, 2009 IEEE CONFERENCE ON COMPUTER VISION AND PATTERN RECOGNITION;Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Cohen P, 2014, PSYCHOLOGY PRESS EBOOKS;Wolpert D, 1997, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Burnham K, 2004, SOCIOLOGICAL METHODS & RESEARCH;Simmons J, 2011, PSYCHOLOGICAL SCIENCE;Lesch K, 1996, SCIENCE;Breiman L, 2001, STATISTICAL SCIENCE;Tibshirani R, 2011, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Domingos P, 2012, COMMUNICATIONS OF THE ACM;Kosiński M, 2013, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Balota D, 2007, BEHAVIOR RESEARCH METHODS;Poropat A, 2009, PSYCHOLOGICAL BULLETIN;John L, 2012, PSYCHOLOGICAL SCIENCE;Wood A, 2014, NATURE GENETICS;Varma S, 2006, BMC BIOINFORMATICS;Cawley G, 2010, ;Cohen J, 1962, JOURNAL OF ABNORMAL & SOCIAL PSYCHOLOGY;Rentfrow P, 2003, JOURNAL OF PERSONALITY AND SOCIAL PSYCHOLOGY;Dwan K, 2008, PLOS ONE;Ioannidis J, 2008, EPIDEMIOLOGY;Breiman L, 2001, ;Back M, 2010, PSYCHOLOGICAL SCIENCE;Wagenmakers E, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Krstajić D, 2014, JOURNAL OF CHEMINFORMATICS;New B, 2004, BEHAVIOR RESEARCH METHODS, INSTRUMENTS, & COMPUTERS;Westfall J, 2014, JOURNAL OF EXPERIMENTAL PSYCHOLOGY GENERAL;Bakker M, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Sedlmeier P, 1989, PSYCHOLOGICAL BULLETIN;Nosek B, 2014, SOCIAL PSYCHOLOGY;Yarkoni T, 2008, PSYCHONOMIC BULLETIN & REVIEW;Miller G, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Ferguson C, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Ioannidis J, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Westfall J, 2016, PLOS ONE;Judd C, 2016, ANNUAL REVIEW OF PSYCHOLOGY;Vazire S, 2004, JOURNAL OF PERSONALITY AND SOCIAL PSYCHOLOGY;Rowe M, 2008, DEVELOPMENTAL SCIENCE;Perry C, 2010, COGNITIVE PSYCHOLOGY;Holbert R, 2002, HUMAN COMMUNICATION RESEARCH;Apté C, 1997, FUTURE GENERATION COMPUTER SYSTEMS;Vitaro F, 1999, ADDICTION;Greenwald A, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Yap M, 2011, JOURNAL OF EXPERIMENTAL PSYCHOLOGY HUMAN PERCEPTION & PERFORMANCE;Bunea F, 2010, NEUROIMAGE;Rissman J, 2010, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Rawlings D, 1997, PSYCHOLOGY OF MUSIC;Xu K, 2014, JOURNAL OF OPEN PSYCHOLOGY DATA;Yarkoni T, 2012, CURRENT DIRECTIONS IN PSYCHOLOGICAL SCIENCE;Bentley R, 2014, BEHAVIORAL AND BRAIN SCIENCES;Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Hastie T, 2009, SPRINGER SERIES IN STATISTICS;Pinheiro J, 2000, STATISCTICS AND COMPUTING/STATISTICS AND COMPUTING;Cohen J, 1992, PSYCHOLOGICAL BULLETIN;Wood S, 2006, ;Cohen ⁄, 2013, ;Shmueli G, 2010, STATISTICAL SCIENCE;Nosek B, 2012, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE;Ebersole C, 2016, JOURNAL OF EXPERIMENTAL SOCIAL PSYCHOLOGY;Pennebaker J, 1999, JOURNAL OF PERSONALITY AND SOCIAL PSYCHOLOGY;Smıth D, 2016, MOLECULAR PSYCHIATRY;Browne M, 2002, PSYCHOLOGICAL METHODS;Loftus E, 1996, ;Ebersole C, 2016, ;Smıth D, 2016, MOLECULAR PSYCHIATRY",,,OPENALEX,"Yarkoni T, 2017, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE","Yarkoni T, 2017, PERSPECTIVES ON PSYCHOLOGICAL SCIENCE" +https://openalex.org/W2911964244,10.1023/a:1010933404324,Random Forests,2001,en,article,125821,MACHINE LEARNING,Machine Learning,Leo Breiman,Leo Breiman,"Statistics Department, University of California, Berkeley, CA, 94720","Leo Breiman (corresponding author), Statistics Department, University of California, Berkeley, CA, 94720",,45,1,5,32,Random forest;Mathematics;AdaBoost;Statistics;Tree (set theory);Generalization;Support vector machine;Generalization error;Measure (data warehouse);Artificial intelligence;Pattern recognition (psychology);Computer science;Artificial neural network;Data mining;Combinatorics,US,"Breiman L, 1996, MACHINE LEARNING;Freund Y, 1996, ;Ho T, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Dietterich T, 2000, MACHINE LEARNING;Bauer E, 1999, MACHINE LEARNING;Bartlett P, 1998, THE ANNALS OF STATISTICS;Amit Y, 1997, NEURAL COMPUTATION;Breiman L, 1998, THE ANNALS OF STATISTICS;Acuña A, 2012, ;Breiman L, 2000, MACHINE LEARNING;Grove A, 1998, ;Wolpert D, 1999, MACHINE LEARNING;Kleinberg E, 2000, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Wolpert D, 1996, REPEC: RESEARCH PAPERS IN ECONOMICS",,,OPENALEX,"Breiman L, 2001, MACHINE LEARNING","Breiman L, 2001, MACHINE LEARNING" +https://openalex.org/W1985987493,10.1109/sp.2010.25,Outside the Closed World: On Using Machine Learning for Network Intrusion Detection,2010,en,article,1860,,,Robin Sommer;Vern Paxson,Robin Sommer;Vern Paxson,"Lawrence Berkeley National Laboratory, International Computer Science Institute, USA;Int. Comput. Sci. Inst., Lawrence Berkeley Nat. Lab., Berkeley, CA, USA;International Computer Science Institute, University of California, Berkeley, USA;Int. Comput. Sci. Inst., Univ. of California, Berkeley, CA, USA",,"In network intrusion detection research, one popular strategy for finding attacks is monitoring a network's activity for anomalies: deviations from profiles of normality previously learned from benign traffic, typically identified using tools borrowed from the machine learning community. However, despite extensive academic research one finds a striking gap in terms of actual deployments of such systems: compared with other intrusion detection approaches, machine learning is rarely employed in operational ""real world"" settings. We examine the differences between the network intrusion detection problem and other areas where machine learning regularly finds much more success. Our main claim is that the task of finding attacks is fundamentally different from these other applications, making it significantly harder for the intrusion detection community to employ machine learning effectively. We support this claim by identifying challenges particular to network intrusion detection, and provide a set of guidelines meant to strengthen future research on anomaly detection.",,,305,316,Intrusion detection system;Computer science;Machine learning;Artificial intelligence;Anomaly detection;Anomaly-based intrusion detection system;Task (project management);Intrusion;Set (abstract data type);Intrusion prevention system;Learning network;Data mining;Engineering,US,"Witten I, 2011, ELSEVIER EBOOKS;Chandola V, 2009, ACM COMPUTING SURVEYS;Linden G, 2003, IEEE INTERNET COMPUTING;Denning D, 1987, IEEE TRANSACTIONS ON SOFTWARE ENGINEERING;Paxson V, 1999, COMPUTER NETWORKS;Narayanan A, 2008, PROCEEDINGS - IEEE SYMPOSIUM ON SECURITY AND PRIVACY/PROCEEDINGS OF THE ... IEEE SYMPOSIUM ON SECURITY AND PRIVACY;Smith R, 2007, PROCEEDINGS OF THE INTERNATIONAL CONFERENCE ON DOCUMENT ANALYSIS AND RECOGNITION;Duda R, 2000, WILEY-INTERSCIENCE EBOOKS;Halevy A, 2009, IEEE INTELLIGENT SYSTEMS;McHugh J, 2000, ACM TRANSACTIONS ON INFORMATION AND SYSTEM SECURITY;Och F, 2004, COMPUTATIONAL LINGUISTICS;Lippmann R, 2000, COMPUTER NETWORKS;Gill P, 2007, ;Barreno M, 2006, ;Floyd S, 2001, IEEE/ACM TRANSACTIONS ON NETWORKING;Gu G, 2007, ;Ptacek T, 1998, DEFENSE TECHNICAL INFORMATION CENTER (DTIC);Kruegel C, 2003, ;Allauzen C, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Lee W, 2002, ;Mahoney M, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Ko C, 2002, ;Axelsson S, 1999, ;Provos N, 2007, ;Forrest S, 1999, ;Nazir A, 2008, ;Sinclair C, 2003, ;Pang R, 2006, ACM SIGCOMM COMPUTER COMMUNICATION REVIEW;Hu W, 2003, INTERNATIONAL CONFERENCE ON MACHINE LEARNING AND APPLICATIONS;Anderson D, 2007, ESCHOLARSHIP (CALIFORNIA DIGITAL LIBRARY);Fogla P, 2006, ;Wright C, 2007, ;Feldmann A, 1998, ;Javitz H, 1994, ;Anagnostakis K, 2005, ;Paxson V, 2004, ;Ellis D, 2004, ;Tan K, 2005, ;Cormack G, 2007, ACM TRANSACTIONS ON INFORMATION SYSTEMS;Gates C, 2006, ;Coull S, 2007, ;Lippmann R, 1999, RECENT ADVANCES IN INTRUSION DETECTION;Vincent L, 2007, PROCEEDINGS OF THE INTERNATIONAL CONFERENCE ON DOCUMENT ANALYSIS AND RECOGNITION;Kumar A, 2005, ;Xu J, 2001, ;Yen T, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Beusekom J, 2008, ;Killourhy K, 2007, ;Sommer R, 2005, MEDIATUM – THE MEDIA AND PUBLICATIONS REPOSITORY OF THE TECHNICAL UNIVERSITY MUNICH (TECHNICAL UNIVERSITY MUNICH);Witten I, 2008, ;Abe S, 2001, ;Willinger W, 1997, IEEE/ACM TRANSACTIONS ON NETWORKING;Bennett J, 2007, ;Musa T, 2021, LECTURE NOTES IN NETWORKS AND SYSTEMS;Zhang Z, 2001, ;Kruegel C, 2003, ;Feldmann A, 1998, ACM SIGCOMM COMPUTER COMMUNICATION REVIEW;Kumar A, 2005, ;Xu J, 2001, ;Mittal P, 2009, ",,,OPENALEX,"Sommer R, 2010, ","Sommer R, 2010, " +https://openalex.org/W2949767632,10.1109/access.2019.2923707,Effective Heart Disease Prediction Using Hybrid Machine Learning Techniques,2019,en,article,1869,IEEE ACCESS,IEEE Access,Senthilkumar Mohan;Chandrasegar Thirumalai;Gautam Srivastava,Senthilkumar Mohan;Chandrasegar Thirumalai;Gautam Srivastava,"School of Information Technology and Engineering, VIT University, Vellore, India;School of Information Technology and Engineering, VIT University, Vellore, India;Department of Mathematics and Computer Science, Brandon University, Brandon, MB, Canada",,"Heart disease is one of the most significant causes of mortality in the world today. Prediction of cardiovascular disease is a critical challenge in the area of clinical data analysis. Machine learning (ML) has been shown to be effective in assisting in making decisions and predictions from the large quantity of data produced by the healthcare industry. We have also seen ML techniques being used in recent developments in different areas of the Internet of Things (IoT). Various studies give only a glimpse into predicting heart disease with ML techniques. In this paper, we propose a novel method that aims at finding significant features by applying machine learning techniques resulting in improving the accuracy in the prediction of cardiovascular disease. The prediction model is introduced with different combinations of features and several known classification techniques. We produce an enhanced performance level with an accuracy level of 88.7% through the prediction model for heart disease with the hybrid random forest with a linear model (HRFLM).",7,,81542,81554,Computer science;Machine learning;Random forest;Heart disease;Artificial intelligence;Predictive modelling;Disease;Internet of Things;Support vector machine;The Internet;Data mining;Medicine,IN;CA,"Li H, 2018, IEEE NETWORK;Daş R, 2008, EXPERT SYSTEMS WITH APPLICATIONS;Liberatore M, 2007, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Amin M, 2018, TELEMATICS AND INFORMATICS;Samuel O, 2016, EXPERT SYSTEMS WITH APPLICATIONS;Meidan Y, 2017, ;Nahar J, 2012, EXPERT SYSTEMS WITH APPLICATIONS;Anooj P, 2011, JOURNAL OF KING SAUD UNIVERSITY - COMPUTER AND INFORMATION SCIENCES;Gavhane A, 2018, 2018 SECOND INTERNATIONAL CONFERENCE ON ELECTRONICS, COMMUNICATION AND AEROSPACE TECHNOLOGY (ICECA);Nahar J, 2012, EXPERT SYSTEMS WITH APPLICATIONS;Thomas J, 2016, ;Uyar K, 2017, PROCEDIA COMPUTER SCIENCE;Wu J, 2018, IEEE TRANSACTIONS ON NETWORK AND SERVICE MANAGEMENT;Vivekanandan T, 2017, COMPUTERS IN BIOLOGY AND MEDICINE;Wu J, 2017, IEEE TRANSACTIONS ON EMERGING TOPICS IN COMPUTING;Li G, 2018, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Shah S, 2017, PHYSICA A STATISTICAL MECHANICS AND ITS APPLICATIONS;Gandhi M, 2015, ;Wu J, 2016, IEEE ACCESS;Shao Y, 2013, APPLIED SOFT COMPUTING;Alkeshuosh A, 2017, ;Baccour L, 2018, EXPERT SYSTEMS WITH APPLICATIONS;Rajamhoana S, 2018, ;Krishnaiah V, 2016, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Sowmiya C, 2017, ;Esfahani H, 2017, ;Cheng C, 2017, ;Wu J, 2018, IEEE INTERNET OF THINGS JOURNAL;Sonawane J, 2014, ;Abdullah A, 2012, ;Banu N, 2016, ;Zhang W, 2017, COMPUTING IN CARDIOLOGY;Ravish D, 2014, ;Durairaj M, 2015, INTERNATIONAL JOURNAL OF SCIENTIFIC AND TECHNOLOGY RESEARCH;Radhimeenakshi S, 2016, INTERNATIONAL CONFERENCE ON COMPUTING FOR SUSTAINABLE GLOBAL DEVELOPMENT;Dammak F, 2015, ;Rathnayakc B, 2018, ;Sabahi F, 2018, JOURNAL OF BIOMEDICAL INFORMATICS;Kelwade J, 2016, ;Nagar P, 2017, IJARCCE;Kumar P, 2016, INTERNATIONAL JOURNAL OF BIO-SCIENCE AND BIO-TECHNOLOGY;Zaman S, 2017, ;Mahboob T, 2017, ;Rao S, 2017, INDIAN HEART JOURNAL;Tarle B, 2017, ;Tran V, 2017, 2017 INTERNATIONAL CONFERENCE ON ELECTRICAL AND COMPUTING TECHNOLOGIES AND APPLICATIONS (ICECTA)",,,OPENALEX,"Mohan S, 2019, IEEE ACCESS","Mohan S, 2019, IEEE ACCESS" +https://openalex.org/W2936573766,10.1186/s12874-019-0681-4,Machine learning in medicine: a practical introduction,2019,en,article,1272,BMC MEDICAL RESEARCH METHODOLOGY,BMC Medical Research Methodology,Jenni A. M. Sidey-Gibbons;Chris Sidey‐Gibbons,Jenni A. M. Sidey-Gibbons;Chris J. Sidey-Gibbons,"Department of Engineering, University of Cambridge, Trumpington Street, Cambridge, CB2 1PZ, UK;Department of Surgery, Brigham and Women's Hospital, 75 Francis Street, Boston, 01225, Massachusetts, USA. cgibbons2@bwh.harvard.edu;Department of Surgery, Harvard Medical School, 25 Shattuck Street, Boston, 01225, Massachusetts, USA. cgibbons2@bwh.harvard.edu;University of Cambridge Psychometrics Centre, Trumpington Street, Cambridge, CB2 1AG, UK. cgibbons2@bwh.harvard.edu;Department of Surgery, Harvard Medical School, 25 Shattuck Street, Boston, 01225, Massachusetts, USA","Chris J. Sidey-Gibbons (corresponding author), Department of Surgery, Brigham and Women's Hospital, 75 Francis Street, Boston, 01225, Massachusetts, USA. cgibbons2@bwh.harvard.edu; Department of Surgery, Harvard Medical School, 25 Shattuck Street, Boston, 01225, Massachusetts, USA. cgibbons2@bwh.harvard.edu; University of Cambridge Psychometrics Centre, Trumpington Street, Cambridge, CB2 1AG, UK. cgibbons2@bwh.harvard.edu; Department of Surgery, Harvard Medical School, 25 Shattuck Street, Boston, 01225, Massachusetts, USA","BACKGROUND: Following visible successes on a wide range of predictive tasks, machine learning techniques are attracting substantial interest from medical researchers and clinicians. We address the need for capacity development in this area by providing a conceptual introduction to machine learning alongside a practical guide to developing and evaluating predictive algorithms using freely-available open source software and public domain data. METHODS: We demonstrate the use of machine learning techniques by developing three predictive models for cancer diagnosis using descriptions of nuclei sampled from breast masses. These algorithms include regularized General Linear Model regression (GLMs), Support Vector Machines (SVMs) with a radial basis function kernel, and single-layer Artificial Neural Networks. The publicly-available dataset describing the breast mass samples (N=683) was randomly split into evaluation (n=456) and validation (n=227) samples. We trained algorithms on data from the evaluation sample before they were used to predict the diagnostic outcome in the validation dataset. We compared the predictions made on the validation datasets with the real-world diagnostic decisions to calculate the accuracy, sensitivity, and specificity of the three models. We explored the use of averaging and voting ensembles to improve predictive performance. We provide a step-by-step guide to developing algorithms using the open-source R statistical programming environment. RESULTS: The trained algorithms were able to classify cell nuclei with high accuracy (.94 -.96), sensitivity (.97 -.99), and specificity (.85 -.94). Maximum accuracy (.96) and area under the curve (.97) was achieved using the SVM algorithm. Prediction performance increased marginally (accuracy =.97, sensitivity =.99, specificity =.95) when algorithms were arranged into a voting ensemble. CONCLUSIONS: We use a straightforward example to demonstrate the theory and practice of machine learning for clinicians and medical researchers. The principals which we demonstrate here can be readily applied to other complex tasks including natural language processing and image recognition.",19,1,64,64,Machine learning;Artificial intelligence;Computer science;Support vector machine;Artificial neural network;Cross-validation;Sensitivity (control systems);Data mining;Algorithm,GB;US,"Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Bland J, 1986, THE LANCET;Cortes C, 1995, MACHINE LEARNING;Maaten L, 2008, JOURNAL OF MACHINE LEARNING RESEARCH;Cortes C, 1995, MACHINE LEARNING;Blei D, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Hanley J, 1982, RADIOLOGY;Zou H, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Ribeiro M, 2016, ;Esteva A, 2017, NATURE;Jordan M, 2015, SCIENCE;Kosiński M, 2013, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Lazer D, 2014, SCIENCE;Hecht-Nielsen, 1989, ;Beam A, 2018, JAMA;Dahl G, 2013, ;Feinerer I, 2008, JOURNAL OF STATISTICAL SOFTWARE;Wolberg W, 1990, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Efron B, 2016, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Loo M, 2014, ;Bedi G, 2015, SCHIZOPHRENIA;Özkale M, 2016, ;Friedman C, 2010, SCIENCE TRANSLATIONAL MEDICINE;Haider A, 2008, ARCHIVES OF SURGERY;Darcy A, 2016, JAMA;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Greaves F, 2013, JOURNAL OF MEDICAL INTERNET RESEARCH;Wolberg W, 1994, CANCER LETTERS;Banerjee S, 2017, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Hawkins J, 2015, BMJ QUALITY & SAFETY;Brantingham P, 2018, STATISTICS AND PUBLIC POLICY;Leí T, 2016, ;Anderson J, 2015, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Bennett K, 1992, MINDS AT UW (UNIVERSITY OF WISCONSIN);Wagland R, 2015, BMJ QUALITY & SAFETY;Sidey‐Gibbons C, 2017, JOURNAL OF MEDICAL INTERNET RESEARCH;Ong M, 2012, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Jolliffe I, 2014, WILEY STATSREF: STATISTICS REFERENCE ONLINE",,,OPENALEX,"Sidey-Gibbons J, 2019, BMC MEDICAL RESEARCH METHODOLOGY","Sidey-Gibbons J, 2019, BMC MEDICAL RESEARCH METHODOLOGY" +https://openalex.org/W2540093921,10.7551/mitpress/8291.003.0006,Foundations of Machine Learning,2012,en,book-chapter,1088,THE MIT PRESS EBOOKS,The MIT Press eBooks,,,,,"Foundations of Machine LearningSoon we will embark on a theoretical study of AdaBoost in order to understand its properties, particularly its ability as a learning algorithm to generalize, that is, to make accurate predictions on data not seen during training.Before this will be possible, however, it will be necessary to take a step back to outline our approach to the more general problem of machine learning, including some fundamental general-purpose tools that will be invaluable in our analysis of AdaBoost.We study the basic problem of inferring from a set of training examples a classification rule whose predictions are highly accurate on freshly observed test data.On first encounter, it may seem questionable whether this kind of learning should even be possible.After all, why should there be any connection between the training and test examples, and why should it be possible to generalize from a relatively small number of training examples to a potentially vast universe of test examples?Although such objections have indeed often been the subject of philosophical debate, in this chapter we will identify an idealized but realistic model of the inference problem in which this kind of learning can be proved to be entirely feasible when certain conditions are satisfied.In particular, we will see that if we can find a simple rule that fits the training data well, and if the training set is not too small, then this rule will in fact generalize well, providing accurate predictions on previously unseen test examples.This is the basis of the approach presented in this chapter, and we will often use the general analysis on which it is founded to guide us in understanding how, why, and when learning is possible.We also outline in this chapter a mathematical framework for studying machine learning, one in which a precise formulation of the boosting problem can be clearly and naturally expressed.Note that, unlike the rest of the book, this chapter omits nearly all of the proofs of the main results since these have largely all appeared in various texts and articles.",,,23,52,Computer science;Artificial intelligence,,,,,OPENALEX,"NA, 2012, THE MIT PRESS EBOOKS","NA, 2012, THE MIT PRESS EBOOKS" +https://openalex.org/W2041616772,10.1007/978-3-642-42051-1_16,Challenges in Representation Learning: A Report on Three Machine Learning Contests,2013,en,book-chapter,1531,LECTURE NOTES IN COMPUTER SCIENCE,Lecture notes in computer science,Ian Goodfellow;Dumitru Erhan;Pierre Carrier;Aaron Courville;Mehdi Mirza;Ben Hamner;Will Cukierski;Yichuan Tang;David S. Thaler;Dong‐Hyun Lee;Yingbo Zhou;Chetan Ramaiah;Fangxiang Feng;Ruifan Li;Xiaojie Wang;Dimitris Athanasakis;John Shawe‐Taylor;Maxim Milakov;John Park;Radu Tudor Ionescu;Marius Popescu;Cristian Grozea;James Bergstra;Jingjing Xie;Łukasz Romaszko;Bing Xu;Chuang Zhang;Yoshua Bengio,Ian J. Goodfellow;Dumitru Erhan;Pierre Luc Carrier;Aaron Courville;Mehdi Mirza;Ben Hamner;Will Cukierski;Yichuan Tang;David Thaler;Dong-Hyun Lee;Yingbo Zhou;Chetan Ramaiah;Fangxiang Feng;Ruifan Li;Xiaojie Wang;Dimitris Athanasakis;John Shawe-Taylor;Maxim Milakov;John Park;Radu Ionescu;Marius Popescu;Cristian Grozea;James Bergstra;Jingjing Xie;Lukasz Romaszko;Bing Xu;Zhang Chuang;Yoshua Bengio,"Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Google, Venice, USA;Google†#TAB#;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL;Université de Montréal, Montréal, Canada;UNIVERSITE DE MONTREAL",,,,,117,124,Computer science;Representation (politics);Artificial intelligence;Feature learning;Machine learning;Learning to learn;Data science;Mathematics education;Psychology,CA;US,"Breiman L, 2001, MACHINE LEARNING;Cortes C, 1995, MACHINE LEARNING;Lowe D, 1999, ;Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Vincent P, 2008, ;Bradski G, 2000, MEDICAL ENTOMOLOGY AND ZOOLOGY;Ahn L, 2004, ;Grandvalet Y, 2004, ;Goodfellow I, 2013, ;Bengio Y, 2012, ;Ngiam J, 2011, NEURAL INFORMATION PROCESSING SYSTEMS;Ionescu R, 2014, ;Guyon I, 2011, ;Netzer Y, 2024, ;Tang Y, 2013, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2013, ARXIV (CORNELL UNIVERSITY);Goodfellow I, 2012, ARXIV (CORNELL UNIVERSITY);Feng F, 2013, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Goodfellow I, 2013, LECTURE NOTES IN COMPUTER SCIENCE","Goodfellow I, 2013, LECTURE NOTES IN COMPUTER SCIENCE" +https://openalex.org/W2995098893,10.1186/s12911-019-1004-8,Comparing different supervised machine learning algorithms for disease prediction,2019,en,article,1628,BMC MEDICAL INFORMATICS AND DECISION MAKING,BMC Medical Informatics and Decision Making,Shahadat Uddin;Arif Khan;Md Ekramul Hossain;Mohammad Ali Moni,Shahadat Uddin;Arif Khan;Md Ekramul Hossain;Mohammad Ali Moni,"Complex Systems Research Group, Faculty of Engineering, The University of Sydney, Room 524, SIT Building (J12), Darlington, NSW, 2008, Australia. shahadat.uddin@sydney.edu.au;Complex Systems Research Group, Faculty of Engineering, The University of Sydney, Room 524, SIT Building (J12), Darlington, NSW, 2008, Australia;Complex Systems Research Group, Faculty of Engineering, The University of Sydney, Room 524, SIT Building (J12), Darlington, NSW, 2008, Australia;Health Market Quality Research Stream, Capital Markets CRC, Level 3, 55 Harrington Street, Sydney, NSW, Australia;Complex Systems Research Group, Faculty of Engineering, The University of Sydney, Room 524, SIT Building (J12), Darlington, NSW, 2008, Australia;Faculty of Medicine and Health, School of Medical Sciences, The University of Sydney, Camperdown, NSW, 2006, Australia","Shahadat Uddin (corresponding author), Complex Systems Research Group, Faculty of Engineering, The University of Sydney, Room 524, SIT Building (J12), Darlington, NSW, 2008, Australia. shahadat.uddin@sydney.edu.au; Complex Systems Research Group, Faculty of Engineering, The University of Sydney, Room 524, SIT Building (J12), Darlington, NSW, 2008, Australia","BACKGROUND: Supervised machine learning algorithms have been a dominant method in the data mining field. Disease prediction using health data has recently shown a potential application area for these methods. This study ai7ms to identify the key trends among different types of supervised machine learning algorithms, and their performance and usage for disease risk prediction. METHODS: In this study, extensive research efforts were made to identify those studies that applied more than one supervised machine learning algorithm on single disease prediction. Two databases (i.e., Scopus and PubMed) were searched for different types of search items. Thus, we selected 48 articles in total for the comparison among variants supervised machine learning algorithms for disease prediction. RESULTS: We found that the Support Vector Machine (SVM) algorithm is applied most frequently (in 29 studies) followed by the Naïve Bayes algorithm (in 23 studies). However, the Random Forest (RF) algorithm showed superior accuracy comparatively. Of the 17 studies where it was applied, RF showed the highest accuracy in 9 of them, i.e., 53%. This was followed by SVM which topped in 41% of the studies it was considered. CONCLUSION: This study provides a wide overview of the relative performance of different variants of supervised machine learning algorithms for disease prediction. This important information of relative performance can be used to aid researchers in the selection of an appropriate supervised machine learning algorithm for their studies.",19,1,281,281,Health informatics;Computer science;Machine learning;Artificial intelligence;Algorithm;Public health;Medicine;Nursing,AU,"Moher D, 2009, BMJ;Moher D, 2009, ANNALS OF INTERNAL MEDICINE;W H, 1990, CHOICE REVIEWS ONLINE;Rumelhart D, 1986, NATURE;Fawcett T, 2005, PATTERN RECOGNITION LETTERS;McCulloch W, 1943, BULLETIN OF MATHEMATICAL BIOLOGY;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Quinlan J, 1986, MACHINE LEARNING;Demšar J, 2006, ;Sebastiani F, 2002, ACM COMPUTING SURVEYS;Falagas M, 2007, THE FASEB JOURNAL;Michalski R, 2013, ;Rish I, 2001, ;Levy O, 2015, TRANSACTIONS OF THE ASSOCIATION FOR COMPUTATIONAL LINGUISTICS;Delen D, 2004, ARTIFICIAL INTELLIGENCE IN MEDICINE;Sahami M, 1998, ;Cruz J, 2007, PUBMED;Palaniappan S, 2008, ;Aleskerov E, 2002, ;Chen H, 2012, EXPERT SYSTEMS WITH APPLICATIONS;Sinclair C, 2003, ;Long N, 2015, EXPERT SYSTEMS WITH APPLICATIONS;Culler S, 1998, MEDICAL CARE;Ayer T, 2009, RADIOGRAPHICS;Lundin J, 1999, ONCOLOGY;Farran B, 2013, BMJ OPEN;Davis D, 2009, DATA MINING AND KNOWLEDGE DISCOVERY;Zupan B, 2000, ARTIFICIAL INTELLIGENCE IN MEDICINE;Mani S, 2012, PUBMED;Kim J, 2015, HEALTHCARE INFORMATICS RESEARCH;Kim E, 2003, DECISION SUPPORT SYSTEMS;Eski̇dere Ö, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Tapak L, 2013, HEALTHCARE INFORMATICS RESEARCH;Mahadevan S, 1998, ;Thenmozhi K, 2014, ;Bahadur S, 2013, IOSR JOURNAL OF AGRICULTURE AND VETERINARY SCIENCE;Cai L, 2015, PLOS ONE;Tang Z, 2013, PLOS ONE;Yao D, 2013, JOURNAL OF COMPUTERS;Uddin S, 2011, JOURNAL FOR HEALTHCARE QUALITY;Panicker N, 2016, INTERNATIONAL JOURNAL OF TELEMEDICINE AND APPLICATIONS;Lee P, 2007, OPHTHALMOLOGY;Yang J, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Aneja S, 2014, ;Yiannakoulias N, 2009, CHRONIC DISEASES AND INJURIES IN CANADA;Fisher E, 1990, INTERNATIONAL JOURNAL OF TECHNOLOGY ASSESSMENT IN HEALTH CARE;Toshniwal D, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Chen C, 2012, ;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);McCormick T, 2011, SSRN ELECTRONIC JOURNAL;Breiman L, 2001, MACHINE LEARNING;Moher D, 2009, PUBMED;Hosmer D, 2013, WILEY SERIES IN PROBABILITY AND STATISTICS;Touretzky D, 1989, ;Kavakiotis I, 2017, COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL;Chen M, 2017, IEEE ACCESS;Lindley D, 1958, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Sisodia D, 2018, PROCEDIA COMPUTER SCIENCE;Charlton P, 2014, ;Lynch C, 2017, INTERNATIONAL JOURNAL OF MEDICAL INFORMATICS;LG A, 2013, JOURNAL OF HEALTH & MEDICAL INFORMATICS;Anbarasi M, 2010, ;Hung C, 2017, ;Bhatla N, 2012, ;Jin B, 2018, IEEE ACCESS;Khateeb N, 2017, ;Hussain L, 2017, CANCER BIOMARKERS;Taslimitehrani V, 2016, JOURNAL OF BIOMEDICAL INFORMATICS;Alonso D, 2018, JOURNAL OF NUCLEAR CARDIOLOGY;Mansoor H, 2017, HEART & LUNG;Behroozi M, 2016, INTERNATIONAL JOURNAL OF TELEMEDICINE AND APPLICATIONS;Mustaqeem A, 2017, ;Malik S, 2016, SPRINGERPLUS;Ani R, 2016, ;Juhola M, 2018, SCIENTIFIC REPORTS;Mohaimenul I, 2018, STUDIES IN HEALTH TECHNOLOGY AND INFORMATICS;Lu P, 2018, JOURNAL OF HEALTHCARE ENGINEERING;Atlas L, 1990, PROCEEDINGS OF THE IEEE;Marikani T, 2017, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Borah M, 2018, INTERNATIONAL JOURNAL OF MACHINE LEARNING AND COMPUTING;Forssen H, 2017, STUDIES IN HEALTH TECHNOLOGY AND INFORMATICS;Puyalnithi T, 2016, INDIAN JOURNAL OF SCIENCE AND TECHNOLOGY;Zaret B, 1994, JOURNAL OF NUCLEAR CARDIOLOGY",,,OPENALEX,"Uddin S, 2019, BMC MEDICAL INFORMATICS AND DECISION MAKING","Uddin S, 2019, BMC MEDICAL INFORMATICS AND DECISION MAKING" +https://openalex.org/W2347129741,10.1038/nature17439,Machine-learning-assisted materials discovery using failed experiments,2016,en,article,1710,NATURE,Nature,Paul Raccuglia;Katherine C. Elbert;Philip Adler;Casey Falk;Malia B. Wenny;Aurelio Mollo;Mat­thias Zeller;Sorelle A. Friedler;Joshua Schrier;Alexander J. Norquist,Paul Raccuglia;Katherine C. Elbert;Philip D. F. Adler;Casey Falk;Malia B. Wenny;Aurelio Mollo;Matthias Zeller;Sorelle A. Friedler;Joshua Schrier;Alexander J. Norquist,"Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Department of Chemistry, Purdue University, 560 Oval Drive, West Lafayette, Indiana 47907-2084, USA;Department of Chemistry, Purdue University, 560 Oval Drive, West Lafayette, 47907-2084, Indiana, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA;Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA;Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA","Alexander J. Norquist (corresponding author), Haverford College, 370 Lancaster Avenue, Haverford, Pennsylvania 19041, USA; Haverford College, 370 Lancaster Avenue, Haverford, 19041, Pennsylvania, USA",,533,7601,73,76,Computer science;Data science;Artificial intelligence,US,"Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Cortes C, 1995, MACHINE LEARNING;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Allen F, 2002, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Zhou H, 2012, CHEMICAL REVIEWS;Stranks S, 2015, NATURE NANOTECHNOLOGY;Cheetham A, 1999, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Cundy C, 2003, CHEMICAL REVIEWS;Férey G, 2001, CHEMISTRY OF MATERIALS;Li Y, 2014, CHEMICAL REVIEWS;Hautier G, 2010, CHEMISTRY OF MATERIALS;Leach A, 2007, ;Gaultois M, 2013, CHEMISTRY OF MATERIALS;Colón Y, 2014, CHEMICAL SOCIETY REVIEWS;Riniker S, 2013, JOURNAL OF CHEMINFORMATICS;Sokolov A, 2011, NATURE COMMUNICATIONS;Üstün B, 2005, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Fernández M, 2014, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Kalidindi S, 2015, ANNUAL REVIEW OF MATERIALS RESEARCH;Hachmann J, 2013, ENERGY & ENVIRONMENTAL SCIENCE;Hastie T, 2008, SPRINGER SERIES IN STATISTICS;Rao C, 2006, CHEMICAL SOCIETY REVIEWS;Barakat N, 2008, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Beran G, 2014, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Haushalter R, 1992, CHEMISTRY OF MATERIALS;Férey G, 1995, JOURNAL OF FLUORINE CHEMISTRY;Rao C, 2000, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Wicker J, 2014, CRYSTENGCOMM;Martin R, 2013, THE JOURNAL OF PHYSICAL CHEMISTRY C;Thakur T, 2014, ANNUAL REVIEW OF PHYSICAL CHEMISTRY;Thangavelu S, 2015, CRYSTAL GROWTH & DESIGN;Zhao J, 2014, CHINESE SCIENCE BULLETIN;Groom C, 2014, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE CRYSTAL ENGINEERING AND MATERIALS;Yang S, 2009, THE JOURNAL OF PHYSICAL CHEMISTRY C;Olshansky J, 2014, INORGANIC CHEMISTRY",,,OPENALEX,"Raccuglia P, 2016, NATURE","Raccuglia P, 2016, NATURE" +https://openalex.org/W2083780116,10.1016/s0933-3657(01)00077-x,"Machine learning for medical diagnosis: history, state of the art and perspective",2001,en,review,1667,ARTIFICIAL INTELLIGENCE IN MEDICINE,Artificial Intelligence in Medicine,Igor Kononenko,Igor Kononenko,"Faculty of Computer and Information Science, University of Ljubljana, Trzaska 25, 1001, Ljubljana, Slovenia. igor.kononenko@fri.uni-lj.si;Faculty of Computer and Information Science, University of Ljubljana, Traška 25, 1001 Ljubljana, Slovenia#TAB#","Igor Kononenko (corresponding author), Faculty of Computer and Information Science, University of Ljubljana, Trzaska 25, 1001, Ljubljana, Slovenia. igor.kononenko@fri.uni-lj.si; Faculty of Computer and Information Science, University of Ljubljana, Traška 25, 1001 Ljubljana, Slovenia#TAB#",,23,1,89,109,Artificial intelligence;Computer science;Machine learning;Perspective (graphical);Bayesian network;Classifier (UML);Artificial neural network;State (computer science);Reliability (semiconductor);Point (geometry);Data science,SI,"Haykin S, 1998, ;Quinlan J, 1992, ;Hopfield J, 1982, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Quinlan J, 1986, MACHINE LEARNING;Hopfield J, 1984, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Rumelhart D, 1988, ELSEVIER EBOOKS;Kira K, 1992, ELSEVIER EBOOKS;Kononenko I, 1994, LECTURE NOTES IN COMPUTER SCIENCE;Diamond G, 1979, NEW ENGLAND JOURNAL OF MEDICINE;Mullin A, 1963, AMERICAN MATHEMATICAL MONTHLY;Fulkerson B, 1995, TECHNOMETRICS;Kira K, 1992, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Langley P, 1986, MACHINE LEARNING;Clark P, 1991, LECTURE NOTES IN COMPUTER SCIENCE;M. P, 1950, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Weigend A, 1990, INTERNATIONAL JOURNAL OF NEURAL SYSTEMS;Spiegelhalter D, 1993, STATISTICAL SCIENCE;Hunt E, 1966, MEDICAL ENTOMOLOGY AND ZOOLOGY;Sparks J, 1967, AMERICAN EDUCATIONAL RESEARCH JOURNAL;Robnik‐Šikonja M, 1997, ;Cestnik B, 1990, EUROPEAN CONFERENCE ON ARTIFICIAL INTELLIGENCE;Cestnik B, 1987, MEDICAL ENTOMOLOGY AND ZOOLOGY;Catlett J, 1991, LECTURE NOTES IN COMPUTER SCIENCE;Shavlik J, 1991, ;Liu H, 1996, ACM SIGART BULLETIN;Kononenko I, 1991, LECTURE NOTES IN COMPUTER SCIENCE;Kononenko I, 1993, APPLIED ARTIFICIAL INTELLIGENCE;Kukar M, 1999, ARTIFICIAL INTELLIGENCE IN MEDICINE;Gupta M, 1982, ELSEVIER EBOOKS;Quinlan J, 1987, ;Pazzani M, 1996, LECTURE NOTES IN STATISTICS;Kononenko I, 1991, MACHINE LEARNING;, 1991, ARTIFICIAL INTELLIGENCE IN MEDICINE;Gaines B, 1990, MEDICAL ENTOMOLOGY AND ZOOLOGY;Langley P, 1993, LECTURE NOTES IN COMPUTER SCIENCE;Baim P, 1988, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Craven M, 1993, ELSEVIER EBOOKS;Kukar M, 1996, ARTIFICIAL INTELLIGENCE IN MEDICINE;Connell C, 1989, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;Ragavan H, 1993, ELSEVIER EBOOKS;Muggleton S, 1989, ERA;Horn K, 1985, AUSTRALIAN COMPUTER JOURNAL;Kukar M, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Lesmo L, 1993, ELSEVIER EBOOKS;Pirnat V, 1989, LECTURE NOTES IN MEDICAL INFORMATICS;Kononenko I, 1999, REPOSITORY OF THE UNIVERSITY OF LJUBLJANA (UNIVERSITY OF LJUBLJANA);Kononenko I, 2000, REPOSITORY OF THE UNIVERSITY OF LJUBLJANA (UNIVERSITY OF LJUBLJANA);Rumelhart D, 1985, ;Pompe U, 1997, LECTURE NOTES IN COMPUTER SCIENCE;Ryzin J, 1986, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Kononenko I, 1991, MACHINE LEARNING",,,OPENALEX,"Kononenko I, 2001, ARTIFICIAL INTELLIGENCE IN MEDICINE","Kononenko I, 2001, ARTIFICIAL INTELLIGENCE IN MEDICINE" +https://openalex.org/W2981679558,10.1371/journal.pone.0224365,Machine learning algorithm validation with a limited sample size,2019,en,article,1687,PLOS ONE,PLoS ONE,Andrius Vabalas;Emma Gowen;Ellen Poliakoff;Alexander J. Casson,Andrius Vabalas;Emma Gowen;Ellen Poliakoff;Alexander J. Casson,"Materials, Devices and Systems Division, School of Electrical and Electronic Engineering, The University of Manchester, Manchester, England, United Kingdom;School of Biological Sciences, The University of Manchester, Manchester, England, United Kingdom;School of Biological Sciences, The University of Manchester, Manchester, England, United Kingdom;Materials, Devices and Systems Division, School of Electrical and Electronic Engineering, The University of Manchester, Manchester, England, United Kingdom","Andrius Vabalas (corresponding author), Materials, Devices and Systems Division, School of Electrical and Electronic Engineering, The University of Manchester, Manchester, England, United Kingdom","Advances in neuroimaging, genomic, motion tracking, eye-tracking and many other technology-based data collection methods have led to a torrent of high dimensional datasets, which commonly have a small number of samples because of the intrinsic high cost of data collection involving human participants. High dimensional data with a small number of samples is of critical importance for identifying biomarkers and conducting feasibility and pilot work, however it can lead to biased machine learning (ML) performance estimates. Our review of studies which have applied ML to predict autistic from non-autistic individuals showed that small sample size is associated with higher reported classification accuracy. Thus, we have investigated whether this bias could be caused by the use of validation methods which do not sufficiently control overfitting. Our simulations show that K-fold Cross-Validation (CV) produces strongly biased performance estimates with small sample sizes, and the bias is still evident with sample size of 1000. Nested CV and train/test split approaches produce robust and unbiased performance estimates regardless of sample size. We also show that feature selection if performed on pooled training and testing data is contributing to bias considerably more than parameter tuning. In addition, the contribution to bias by data dimensionality, hyper-parameter space and number of CV folds was explored, and validation methods were compared with discriminable data. The results suggest how to design robust testing methodologies when working with small datasets and how to interpret the results of other studies based on what validation method was used.",14,11,e0224365,e0224365,Overfitting;Sample size determination;Computer science;Artificial intelligence;Cross-validation;Data collection;Machine learning;Selection bias;Sample (material);Statistics;Feature selection;Data mining;Pattern recognition (psychology);Mathematics;Artificial neural network,GB,"Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Sudlow C, 2015, PLOS MEDICINE;Boser B, 1992, ;Stone M, 1974, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Ferrari V, 2008, MEDICAL ENTOMOLOGY AND ZOOLOGY;Guyon I, 2002, MACHINE LEARNING;Saeys Y, 2007, BIOINFORMATICS;Libbrecht M, 2015, NATURE REVIEWS GENETICS;Varma S, 2006, BMC BIOINFORMATICS;Cawley G, 2010, ;Raudys Š, 1991, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Krstajić D, 2014, JOURNAL OF CHEMINFORMATICS;Hira Z, 2015, ADVANCES IN BIOINFORMATICS;Arbabshirani M, 2016, NEUROIMAGE;Varoquaux G, 2016, NEUROIMAGE;Bolón‐Canedo V, 2014, INFORMATION SCIENCES;Combrisson E, 2015, JOURNAL OF NEUROSCIENCE METHODS;Jain A, 1982, HANDBOOK OF STATISTICS;Figueroa R, 2012, BMC MEDICAL INFORMATICS AND DECISION MAKING;Beleites C, 2012, ANALYTICA CHIMICA ACTA;Hua J, 2004, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Devos O, 2008, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Mukherjee S, 2003, JOURNAL OF COMPUTATIONAL BIOLOGY;Hyde K, 2019, REVIEW JOURNAL OF AUTISM AND DEVELOPMENTAL DISORDERS;Bone D, 2014, JOURNAL OF AUTISM AND DEVELOPMENTAL DISORDERS;Kassraian P, 2016, FRONTIERS IN PSYCHIATRY;Dernoncourt D, 2013, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Devadoss F, 2014, JOURNAL OF CHEMINFORMATICS;Kanal L, 1971, PATTERN RECOGNITION;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Vabalas A, 2019, PLOS ONE","Vabalas A, 2019, PLOS ONE" +https://openalex.org/W2186615578,10.48550/arxiv.1512.01274,MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems,2015,en,preprint,1922,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Tianqi Chen;Mu Li;Yutian Li;Min Lin;Naiyan Wang;Minjie Wang;Tianjun Xiao;Bing Xu;Chiyuan Zhang;Zheng Zhang,"Chen, Tianqi;Li, Mu;Li, Yutian;Lin, Min;Wang, Naiyan;Wang, Minjie;Xiao, Tianjun;Xu, Bing;Zhang, Chiyuan;Zhang, Zheng",Carnegie Mellon University,,"MXNet is a multi-language machine learning (ML) library to ease the development of ML algorithms, especially for deep neural networks. Embedded in the host language, it blends declarative symbolic expression with imperative tensor computation. It offers auto differentiation to derive gradients. MXNet is computation and memory efficient and runs on various heterogeneous systems, ranging from mobile devices to distributed GPU clusters. This paper describes both the API design and the system implementation of MXNet, and explains how embedding of both symbolic expression and tensor operation is handled in a unified fashion. Our preliminary experiments reveal promising results on large scale deep neural network applications using multiple GPU machines.",,,,,Computer science;Distributed computing;Artificial intelligence,US,"Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Jia Y, 2014, ;Dean J, 2012, ;Collobert R, 2011, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Li M, 2014, OPERATING SYSTEMS DESIGN AND IMPLEMENTATION;Li M, 2014, NEURAL INFORMATION PROCESSING SYSTEMS;Ioffe S, 2024, ARXIV (CORNELL UNIVERSITY);Bastien F, 2012, ARXIV (CORNELL UNIVERSITY);Lin M, 2014, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Chen T, 2015, ARXIV (CORNELL UNIVERSITY)","Chen T, 2015, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W391985582,10.7551/mitpress/8291.003.0026,Adaptive Computation and Machine Learning,2012,en,book-chapter,1435,THE MIT PRESS EBOOKS,The MIT Press eBooks,,,,,,,,527,527,Computer science;Computation;Artificial intelligence;Machine learning;Algorithm,,", 2005, STATISTICS FOR BIOLOGY AND HEALTH;Koza J, 1992, MEDICAL ENTOMOLOGY AND ZOOLOGY;Jukes T, 1969, ELSEVIER EBOOKS;Chung F, 1996, REGIONAL CONFERENCE SERIES IN MATHEMATICS;Lodish H, 1986, ;Neal R, 1996, LECTURE NOTES IN STATISTICS;Hofacker I, 1994, MONATSHEFTE FÜR CHEMIE - CHEMICAL MONTHLY;Godson G, 1978, NATURE;Pearson W, 1990, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;, 1995, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Kearns M, 1994, THE MIT PRESS EBOOKS;Huber P, 1997, SPRINGER SERIES IN STATISTICS;Sejnowski T, 1987, ;Lee C, 1999, SCIENCE;N. M, 1980, COMPUTER LANGUAGES;Ghahramani Z, 1997, MACHINE LEARNING;Marcotte E, 1999, NATURE;R. S, 2004, ;Schervish M, 1996, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Gilks W, 1994, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES D (THE STATISTICIAN);Jensen F, 1990, VBN FORSKNINGSPORTAL (AALBORG UNIVERSITET);Schölkopf B, 1995, ;Hansen J, 1998, GLYCOCONJUGATE JOURNAL;Kolakowski L, 1994, PUBMED;Hirata R, 1990, JOURNAL OF BIOLOGICAL CHEMISTRY;Kulp D, 1996, PUBMED;Pearson W, 1996, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;Zuker M, 1989, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;Gull S, 1989, ;Gull S, 1988, ;Kauffman S, 1974, JOURNAL OF THEORETICAL BIOLOGY;Wang W, 1999, NATURE STRUCTURAL BIOLOGY;Gilks W, 1993, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Gelman A, 1993, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);James B, 1989, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;Hein J, 1990, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;Khorana H, 1988, JOURNAL OF BIOLOGICAL CHEMISTRY;Hansen J, 1995, BIOCHEMICAL JOURNAL;Winkler H, 1920, G. FISCHER EBOOKS;Schneider T, 1996, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;Krogh A, 1995, PUBMED;Uberbacher E, 1996, METHODS IN ENZYMOLOGY ON CD-ROM/METHODS IN ENZYMOLOGY;Savageau M, 1996, ;Huang Z, 1996, CURRENT TOPICS IN MICROBIOLOGY AND IMMUNOLOGY;Lapedes A, 2018, ;Koza J, 1994, PUBMED;Zhu J, 1997, PUBMED;Handley S, 1995, PUBMED;Mackay D, 1996, ;Kullback S, 1951, THE ANNALS OF MATHEMATICAL STATISTICS;Sturm J, 1999, OPTIMIZATION METHODS & SOFTWARE;Saffran J, 1996, SCIENCE;Fleischmann R, 1995, SCIENCE;Kauffman S, 1969, JOURNAL OF THEORETICAL BIOLOGY;Lauritzen S, 1988, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hartwell L, 1999, NATURE;Jordan M, 1998, ;Marinari E, 1992, EUROPHYSICS LETTERS (EPL);King J, 1969, SCIENCE;Gribskov M, 1987, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Waterman M, 1995, ;McAdams H, 1999, TRENDS IN GENETICS;Lari K, 1991, COMPUTER SPEECH & LANGUAGE;Wess J, 1997, THE FASEB JOURNAL;LeCun Y, 1995, ;Jefferys W, 1992, AMERICAN SCIENTIST;Kane P, 1990, SCIENCE;Sellers P, 1974, SIAM JOURNAL ON APPLIED MATHEMATICS;Henikoff S, 1994, JOURNAL OF MOLECULAR BIOLOGY;Giles C, 1992, NEURAL COMPUTATION;Goodsell D, 1994, NUCLEIC ACIDS RESEARCH;Rumelhart D, 1995, ;Woese C, 1967, ;Krogh A, 1997, PUBMED;Karlin S, 1996, JOURNAL OF MOLECULAR BIOLOGY;Sankoff D, 1975, MATHEMATICAL PROGRAMMING;Gibbs M, 1997, ;Spitzer F, 1971, AMERICAN MATHEMATICAL MONTHLY;Pagel M, 1992, PROCEEDINGS OF THE ROYAL SOCIETY B BIOLOGICAL SCIENCES;Stormo G, 1982, NUCLEIC ACIDS RESEARCH;Lin S, 1975, CELL;Pedersen A, 1997, PUBMED;, 1993, CHOICE REVIEWS ONLINE;Kahn P, 1995, SCIENCE;Lattman E, 1993, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Koehl P, 1999, NATURE STRUCTURAL BIOLOGY;Krogh A, 1997, PHYSICAL REVIEW. E, STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS;Zhu X, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Riis S, 1996, JOURNAL OF COMPUTATIONAL BIOLOGY;Stamm S, 1994, NUCLEIC ACIDS RESEARCH;Nowak R, 1995, SCIENCE;Searls D, 1997, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Hubbard T, 1995, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Francesco V, 1997, JOURNAL OF MOLECULAR BIOLOGY;Green P, 1993, SCIENCE;Heyer L, 1999, GENOME RESEARCH;Neal R, 1997, ARXIV.ORG;Wolfsberg T, 1999, GENOME RESEARCH;Shannon C, 1948, BELL SYSTEM TECHNICAL JOURNAL;Geman S, 1984, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Kabsch W, 1983, BIOPOLYMERS;Pearson W, 1988, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Tatusov R, 1997, SCIENCE;Rost B, 1993, JOURNAL OF MOLECULAR BIOLOGY;Welch, 1984, COMPUTER;Lawrence C, 1993, SCIENCE;Smith A, 1993, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Rost B, 1994, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Harsanyi J, 1977, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Hajek B, 1988, MATHEMATICS OF OPERATIONS RESEARCH;Hinton G, 1995, SCIENCE;Tarjan R, 1984, SIAM JOURNAL ON COMPUTING;Probst W, 1992, DNA AND CELL BIOLOGY;Tinoco I, 1971, NATURE;Lauritzen S, 1990, NETWORKS;Rost B, 1993, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Richards F, 1988, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Miklos G, 1996, CELL;Zvelebil M, 1987, JOURNAL OF MOLECULAR BIOLOGY;Rapoport T, 1992, SCIENCE;Henikoff S, 1994, GENOMICS;Krogh A, 1994, NUCLEIC ACIDS RESEARCH;Snyder E, 1995, JOURNAL OF MOLECULAR BIOLOGY;Ornstein R, 1978, BIOPOLYMERS;Taylor F, 1989, BIOSYSTEMS;Goldberg A, 1966, SCIENCE;Swanson R, 1984, BULLETIN OF MATHEMATICAL BIOLOGY;White J, 1994, MATHEMATICAL BIOSCIENCES;Kreegipuu A, 1998, FEBS LETTERS;Prabhu V, 1993, NUCLEIC ACIDS RESEARCH;Karp P, 1999, TRENDS IN BIOTECHNOLOGY;Jungnickel B, 1994, FEBS LETTERS;White S, 1993, JOURNAL OF MOLECULAR EVOLUTION;Rivals É, 1996, BIOCHIMIE;Levin J, 1993, PROTEIN ENGINEERING DESIGN AND SELECTION;Fox G, 1975, JOURNAL OF MOLECULAR EVOLUTION;Ferrán E, 1994, PROTEIN SCIENCE;Gelman S, 1999, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Klingler T, 1994, PUBMED;Russell R, 1993, JOURNAL OF MOLECULAR BIOLOGY;Volkenstein M, 1966, BIOCHIMICA ET BIOPHYSICA ACTA (BBA) - NUCLEIC ACIDS AND PROTEIN SYNTHESIS;Wachtel S, 1993, COMPARATIVE BIOCHEMISTRY AND PHYSIOLOGY PART B COMPARATIVE BIOCHEMISTRY;Hayward S, 1992, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Thanaraj T, 1999, NUCLEIC ACIDS RESEARCH;Kaplan N, 1979, JOURNAL OF MOLECULAR EVOLUTION;Wouters F, 1995, BIOCHIMICA ET BIOPHYSICA ACTA (BBA) - LIPIDS AND LIPID METABOLISM;Fedorov B, 1976, FEBS LETTERS;Nirenberg M, 1963, COLD SPRING HARBOR SYMPOSIA ON QUANTITATIVE BIOLOGY;Kirkpatrick S, 1983, SCIENCE;Jaynes E, 1957, PHYSICAL REVIEW;Fitch W, 1967, SCIENCE;Zuker M, 1981, NUCLEIC ACIDS RESEARCH;Schneider T, 1990, NUCLEIC ACIDS RESEARCH;Swendsen R, 1987, PHYSICAL REVIEW LETTERS;Fraser C, 1995, SCIENCE;Hornik K, 1990, NEURAL NETWORKS;Geyer C, 1992, STATISTICAL SCIENCE;Shore J, 1980, IEEE TRANSACTIONS ON INFORMATION THEORY;Heijne G, 1990, THE JOURNAL OF MEMBRANE BIOLOGY;Salzberg S, 1998, NUCLEIC ACIDS RESEARCH;Reynolds M, 1988, TECHNOMETRICS;Helden J, 1998, JOURNAL OF MOLECULAR BIOLOGY;Gill P, 1994, NATURE GENETICS;Lawrence C, 1990, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Mjolsness E, 1991, JOURNAL OF THEORETICAL BIOLOGY;Sakakibara Y, 1994, NUCLEIC ACIDS RESEARCH;Иванов П, 1996, NATURE GENETICS;Riddle D, 1997, NATURE STRUCTURAL BIOLOGY;Shindyalov I, 1994, PROTEIN ENGINEERING DESIGN AND SELECTION;Hassan M, 1996, JOURNAL OF MOLECULAR BIOLOGY;Forsdyke D, 1995, JOURNAL OF MOLECULAR EVOLUTION;Kneller D, 1990, JOURNAL OF MOLECULAR BIOLOGY;Uberbacher E, 1991, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Rost B, 1994, JOURNAL OF MOLECULAR BIOLOGY;Hinegardner R, 1968, THE AMERICAN NATURALIST;Huang Z, 1996, FOLDING AND DESIGN;Lehman N, 1993, NATURE;Thodberg H, 1996, IEEE TRANSACTIONS ON NEURAL NETWORKS;Gerstein M, 1994, JOURNAL OF MOLECULAR BIOLOGY;Taylor W, 1994, PROTEIN ENGINEERING DESIGN AND SELECTION;Hlavacek W, 1997, JOURNAL OF MOLECULAR BIOLOGY;Heijne G, 1988, BIOCHIMICA ET BIOPHYSICA ACTA (BBA) - REVIEWS ON BIOMEMBRANES;Kaiser D, 1993, CELL;Zhang X, 1992, JOURNAL OF MOLECULAR BIOLOGY;Schneider D, 1992, JOURNAL OF MOLECULAR BIOLOGY;Rose G, 1994, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Maclin R, 1993, MACHINE LEARNING;Hasegawa M, 1980, ORIGINS OF LIFE AND EVOLUTION OF BIOSPHERES;Hanke J, 1999, TRENDS IN GENETICS;Mackay A, 1967, NATURE;York J, 1992, ARTIFICIAL INTELLIGENCE;Lawton J, 1989, NUCLEIC ACIDS RESEARCH;Tolstrup N, 1994, JOURNAL OF MOLECULAR BIOLOGY;Heijne G, 1977, JOURNAL OF MOLECULAR BIOLOGY;Wang Z, 1994, NATURE STRUCTURAL BIOLOGY;Schneider F, 1978, DIE NATURWISSENSCHAFTEN;Ivanov O, 1986, ORIGINS OF LIFE AND EVOLUTION OF BIOSPHERES;Woese C, 1966, COLD SPRING HARBOR SYMPOSIA ON QUANTITATIVE BIOLOGY;Felsenstein J, 1981, JOURNAL OF MOLECULAR EVOLUTION;Needleman S, 1970, JOURNAL OF MOLECULAR BIOLOGY;Smith T, 1981, JOURNAL OF MOLECULAR BIOLOGY;Rissanen J, 1978, AUTOMATICA;Garnier J, 1978, JOURNAL OF MOLECULAR BIOLOGY;Woese C, 1977, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Krogh A, 1994, JOURNAL OF MOLECULAR BIOLOGY;Sander C, 1991, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Larsen N, 1993, NUCLEIC ACIDS RESEARCH;Schneider T, 1986, JOURNAL OF MOLECULAR BIOLOGY;Pebay‐Peyroula E, 1997, SCIENCE;Huelsenbeck J, 1997, SCIENCE;Spiegelhalter D, 1993, STATISTICAL SCIENCE;Wong J, 1975, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Stormo G, 1982, NUCLEIC ACIDS RESEARCH;Presta L, 1988, SCIENCE;Neal R, 1992, ARTIFICIAL INTELLIGENCE;Seung H, 1992, PHYSICAL REVIEW A;Holley L, 1989, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;White K, 1999, SCIENCE;Thorne J, 1991, JOURNAL OF MOLECULAR EVOLUTION;Gamow G, 1954, NATURE;Fill J, 1991, THE ANNALS OF APPLIED PROBABILITY;Lim V, 1974, JOURNAL OF MOLECULAR BIOLOGY;Wang J, 1990, PHYSICA A STATISTICAL MECHANICS AND ITS APPLICATIONS;Trifonov E, 1987, JOURNAL OF MOLECULAR BIOLOGY;R. V, 1979, SCIENCE;Sakakibara Y, 1992, INFORMATION AND COMPUTATION;Hornik K, 1994, NEURAL COMPUTATION;Neal R, 1993, ;Epstein C, 1966, NATURE;Jukes T, 1973, NATURE;Wu C, 1997, COMPUTERS & CHEMISTRY;Karkas J, 1968, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Venkatesh B, 1996, JOURNAL OF MOLECULAR BIOLOGY;Isham V, 1981, INTERNATIONAL STATISTICAL REVIEW;Ovchinnikov Y, 1979, FEBS LETTERS;Stolorz P, 1992, JOURNAL OF MOLECULAR BIOLOGY;Sibbald P, 1990, JOURNAL OF MOLECULAR BIOLOGY;Pardo L, 1992, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Karlin S, 1992, PROTEIN ENGINEERING DESIGN AND SELECTION;Gerhold D, 1996, BIOESSAYS;Stoneking M, 1995, NATURE GENETICS;Rose G, 1997, NATURE STRUCTURAL BIOLOGY;Forcada M, 1995, NEURAL COMPUTATION;Kawabata T, 1997, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Gouy M, 1985, BIOCHIMIE;Schweitzer R, 1995, NATURE GENETICS;Zhang M, 1999, GENOME RESEARCH;Rabiner L, 1989, PROCEEDINGS OF THE IEEE;Hastings W, 1970, BIOMETRIKA;Golub T, 1999, SCIENCE;Kaelbling L, 1996, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;, 1959, JOURNAL OF THE FRANKLIN INSTITUTE;John H, 1994, NEURAL NETWORKS;Matthews B, 1975, BIOCHIMICA ET BIOPHYSICA ACTA (BBA) - PROTEIN STRUCTURE;Spellman P, 1998, MOLECULAR BIOLOGY OF THE CELL;Heijne G, 1986, NUCLEIC ACIDS RESEARCH;Hansen L, 1990, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Tierney L, 1994, THE ANNALS OF STATISTICS;Henderson R, 1990, JOURNAL OF MOLECULAR BIOLOGY;Tamayo P, 1999, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Frishman D, 1995, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Krogh A, 1994, ;Marcotte E, 1999, SCIENCE;Sonnhammer E, 1997, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Qian N, 1988, JOURNAL OF MOLECULAR BIOLOGY;Baker W, 2000, NUCLEIC ACIDS RESEARCH;McEliece R, 1998, IEEE JOURNAL ON SELECTED AREAS IN COMMUNICATIONS;Hobohm U, 1992, PROTEIN SCIENCE;Lin T, 1996, IEEE TRANSACTIONS ON NEURAL NETWORKS;Turner D, 1988, ANNUAL REVIEW OF BIOPHYSICS AND BIOPHYSICAL CHEMISTRY;Thomas C, 1971, ANNUAL REVIEW OF GENETICS;Shachter R, 1988, OPERATIONS RESEARCH;Frasconi P, 1998, IEEE TRANSACTIONS ON NEURAL NETWORKS;Solovyev V, 1994, NUCLEIC ACIDS RESEARCH;Sjölander K, 1996, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Mackay D, 1995, NATURAL LANGUAGE ENGINEERING;Mackay D, 1999, NEURAL COMPUTATION;Giudicelli V, 1999, BIOINFORMATICS;Saul L, 1995, ;Gorodkin J, 1997, NUCLEIC ACIDS RESEARCH;Woese C, 1983, MICROBIOLOGICAL REVIEWS;Qumsiyeh M, 1994, JOURNAL OF HEREDITY;Gudermann T, 1997, ANNUAL REVIEW OF NEUROSCIENCE;Lund O, 1997, PROTEIN ENGINEERING DESIGN AND SELECTION;Modestino J, 1992, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Pedersen A, 1998, JOURNAL OF MOLECULAR BIOLOGY;Shachter R, 1994, ELSEVIER EBOOKS;Parsons R, 1995, PUBMED;Trower M, 1996, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;White S, 1994, ANNUAL REVIEW OF BIOPHYSICS AND BIOMOLECULAR STRUCTURE;Jaakkola T, 1996, ;Jones D, 1996, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Fujiwara Y, 1994, PUBMED;Orgel L, 1974, ;Levin E, 1992, NEURAL INFORMATION PROCESSING SYSTEMS;Krogh A, 1995, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Everitt B, 2005, ENCYCLOPEDIA OF STATISTICS IN BEHAVIORAL SCIENCE;Vapnik V, 1995, ;Press W, 1994, ;Fitch W, 1971, SYSTEMATIC BIOLOGY;Jones D, 1999, JOURNAL OF MOLECULAR BIOLOGY;Nielsen H, 1997, PROTEIN ENGINEERING DESIGN AND SELECTION;Jacobs R, 1991, NEURAL COMPUTATION;Goffeau A, 1996, SCIENCE;Dietterich T, 1998, NEURAL COMPUTATION;Lindsay B, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Sprinzl M, 2004, NUCLEIC ACIDS RESEARCH;Goodall C, 1991, TECHNOMETRICS;Engelman D, 1986, ANNUAL REVIEW OF BIOPHYSICS AND BIOPHYSICAL CHEMISTRY;Drucker H, 1999, IEEE TRANSACTIONS ON NEURAL NETWORKS;Perou C, 1999, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Wingender E, 2000, NUCLEIC ACIDS RESEARCH;Jaynes E, 1968, IEEE TRANSACTIONS ON SYSTEMS SCIENCE AND CYBERNETICS;Enright A, 1999, NATURE;Strader C, 1994, ANNUAL REVIEW OF BIOCHEMISTRY;Levinson S, 1983, BELL SYSTEM TECHNICAL JOURNAL;Göbel U, 1994, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Hebsgaard S, 1996, NUCLEIC ACIDS RESEARCH;Yuh C, 1998, SCIENCE;Heckerman D, 1997, DATA MINING AND KNOWLEDGE DISCOVERY;Nielsen H, 1998, PUBMED;Everitt B, 1984, ;King R, 1996, PROTEIN SCIENCE;Zemła A, 1999, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Chapelle O, 1999, ;Khorana H, 1979, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Smyth P, 1997, NEURAL COMPUTATION;Stolcke A, 1992, ;Schneider R, 1997, NUCLEIC ACIDS RESEARCH;Gorodkin J, 1997, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Gregory P, 1992, ;Nielsen H, 1996, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Jordan M, 1996, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Wiens B, 1999, THE AMERICAN STATISTICIAN;Neer E, 1994, PROTEIN SCIENCE;McGregor M, 1989, PROTEIN ENGINEERING DESIGN AND SELECTION;Presnell S, 1993, ANNUAL REVIEW OF BIOPHYSICS AND BIOMOLECULAR STRUCTURE;Gorodkin J, 1997, PUBMED;Wolpert D, 2018, ;Heijne G, 1987, MEDICAL ENTOMOLOGY AND ZOOLOGY;Engelfriet J, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Perlwitz M, 1988, ADVANCES IN APPLIED MATHEMATICS;Garrett R, 1996, CURRENT BIOLOGY;Xu L, 1995, ;, 2019, ;, 1958, JOURNAL OF THE FRANKLIN INSTITUTE;Yang Y, 1997, ;Mackay D, 1992, NEURAL COMPUTATION;Jaynes E, 1986, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Smith A, 1991, PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY OF LONDON SERIES A PHYSICAL AND ENGINEERING SCIENCES;Fearn T, 1990, BIOMETRICS;Frigessi A, 1993, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Konopka A, 1994, BIOCOMPUTING;Kauffman S, 2018, ;Rumelhart D, 1985, ",,,OPENALEX,"NA, 2012, THE MIT PRESS EBOOKS-a","NA, 2012, THE MIT PRESS EBOOKS" +https://openalex.org/W2182361439,,Efficient and robust automated machine learning,2015,en,article,1259,NEURAL INFORMATION PROCESSING SYSTEMS,Neural Information Processing Systems,Matthias Feurer;Aaron Klein;Katharina Eggensperger;Jost Tobias Springenberg;Manuel Blum;Frank Hutter,Matthias Feurer;Aaron Klein;Katharina Eggensperger;Jost Tobias Springenberg;Manuel Blum;Frank Hutter,"Department of Computer Science , University of Freiburg , Germany;Department of Computer Science , University of Freiburg , Germany;Department of Computer Science , University of Freiburg , Germany;Department of Computer Science , University of Freiburg , Germany;Department of Computer Science , University of Freiburg , Germany;Department of Computer Science , University of Freiburg , Germany",,"The success of machine learning in a broad range of applications has led to an ever-growing demand for machine learning systems that can be used off the shelf by non-experts. To be effective in practice, such systems need to automatically choose a good algorithm and feature preprocessing steps for a new dataset at hand, and also set their respective hyperparameters. Recent work has started to tackle this automated machine learning (AutoML) problem with the help of efficient Bayesian optimization methods. Building on this, we introduce a robust new AutoML system based on scikit-learn (using 15 classifiers, 14 feature preprocessing methods, and 4 data preprocessing methods, giving rise to a structured hypothesis space with 110 hyperparameters). This system, which we dub AUTO-SKLEARN, improves on existing AutoML methods by automatically taking into account past performance on similar datasets, and by constructing ensembles from the models evaluated during the optimization. Our system won the first phase of the ongoing ChaLearn AutoML challenge, and our comprehensive analysis on over 100 diverse datasets shows that it substantially outperforms the previous state of the art in AutoML. We also demonstrate the performance gains due to each of our contributions and derive insights into the effectiveness of the individual components of AUTO-SKLEARN.",28,,2755,2763,Hyperparameter;Machine learning;Artificial intelligence;Computer science;Preprocessor;Bayesian optimization;Feature (linguistics);Set (abstract data type);Bayesian probability,DE,"Breiman L, 2001, MACHINE LEARNING;Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Wolpert D, 1992, NEURAL NETWORKS;Bergstra J, 2011, ;Hutter F, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Fulkerson B, 1995, TECHNOMETRICS;Thornton C, 2013, ;Vanschoren J, 2014, ACM SIGKDD EXPLORATIONS NEWSLETTER;Caruana R, 2004, ;Hamerly G, 2003, ;Brazdil P, 2009, ;Feurer M, 2015, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Pfahringer B, 2000, ;Komer B, 2014, PROCEEDINGS OF THE PYTHON IN SCIENCE CONFERENCES;Bardenet R, 2013, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Guyon I, 2010, ;Reif M, 2012, MACHINE LEARNING;Yogatama D, 2014, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Gomes T, 2011, NEUROCOMPUTING;Guyon I, 2015, ;Caruana R, 2006, PROCEEDINGS;Lacoste A, 2014, ;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY);Snoek J, 2012, ARXIV (CORNELL UNIVERSITY);Brochu E, 2010, ARXIV (CORNELL UNIVERSITY);Kalousis A, 2002, ARCHIVE OUVERTE UNIGE (UNIVERSITY OF GENEVA)",,,OPENALEX,"Feurer M, 2015, NEURAL INFORMATION PROCESSING SYSTEMS","Feurer M, 2015, NEURAL INFORMATION PROCESSING SYSTEMS" +https://openalex.org/W1549998098,10.1201/b17476,Machine Learning: An Algorithmic Perspective,2009,en,book,1082,,,Stephen Marsland,Stephen Marsland,,"Stephen Marsland (corresponding author), ","Traditional books on machine learning can be divided into two groups - those aimed at advanced undergraduates or early postgraduates with reasonable mathematical knowledge and those that are primers on how to code algorithms. The field is ready for a text that not only demonstrates how to use the algorithms that make up machine learning methods, but also provides the background needed to understand how and why these algorithms work. Machine Learning: An Algorithmic Perspective is that text.Theory Backed up by Practical ExamplesThe book covers neural networks, graphical models, reinforcement le",,,,,Python (programming language);Computer science;Implementation;Perspective (graphical);Artificial intelligence;Reinforcement learning;Dimensionality reduction;Theoretical computer science;Machine learning;Programming language,,"Seel N, 2012, ;Hastie T, 2013, ;Naur P, 1991, RESEARCH AT THE UNIVERSITY OF COPENHAGEN (UNIVERSITY OF COPENHAGEN);Naur P, 1995, STUDIES IN COGNITIVE SYSTEMS",,,OPENALEX,"Marsland S, 2009, ","Marsland S, 2009, " +https://openalex.org/W2513506629,10.1126/science.aaf7894,Combining satellite imagery and machine learning to predict poverty,2016,en,article,1642,SCIENCE,Science,Neal Jean;Marshall Burke;Sang Michael Xie;W. Matthew Davis;David B. Lobell;Stefano Ermon,Neal Jean;Marshall Burke;Michael Xie;W. Matthew Davis;David B. Lobell;Stefano Ermon,"Department of Computer Science, Stanford University, Stanford, CA, USA;Department of Electrical Engineering, Stanford University, Stanford, CA, USA;Center on Food Security and the Environment, Stanford University, Stanford, CA, USA;Department of Earth System Science, Stanford University, Stanford, CA, USA;National Bureau of Economic Research, Boston, MA, USA;Department of Computer Science, Stanford University, Stanford, CA, USA;Center on Food Security and the Environment, Stanford University, Stanford, CA, USA;Center on Food Security and the Environment, Stanford University, Stanford, CA, USA;Department of Earth System Science, Stanford University, Stanford, CA, USA;Department of Computer Science, Stanford University, Stanford, CA, USA","Marshall Burke (corresponding author), Center on Food Security and the Environment, Stanford University, Stanford, CA, USA; Department of Earth System Science, Stanford University, Stanford, CA, USA; National Bureau of Economic Research, Boston, MA, USA","Reliable data on economic livelihoods remain scarce in the developing world, hampering efforts to study these outcomes and to design policies that improve them. Here we demonstrate an accurate, inexpensive, and scalable method for estimating consumption expenditure and asset wealth from high-resolution satellite imagery. Using survey and satellite data from five African countries--Nigeria, Tanzania, Uganda, Malawi, and Rwanda--we show how a convolutional neural network can be trained to identify image features that can explain up to 75% of the variation in local-level economic outcomes. Our method, which requires only publicly available data, could transform efforts to track and target poverty in developing countries. It also demonstrates how powerful machine learning techniques can be applied in a setting with limited training data, suggesting broad potential application across many scientific domains.",353,6301,790,794,Proxy (statistics);Poverty;Satellite imagery;Satellite;Daytime;Consumption (sociology);Perspective (graphical);Computer science;Remote sensing;Meteorology;Artificial intelligence;Machine learning;Geography;Economic growth;Economics;Geology;Sociology;Atmospheric sciences;Engineering,US,"LeCun Y, 2015, NATURE;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Russakovsky O, 2015, INTERNATIONAL JOURNAL OF COMPUTER VISION;Pan S, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Filmer D, 2001, DEMOGRAPHY;Razavian A, 2014, ;Chatfield K, 2014, ;Filmer D, 2001, DEMOGRAPHY;Chen X, 2011, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Sahn D, 2003, REVIEW OF INCOME AND WEALTH;Blumenstock J, 2015, SCIENCE;Mnih V, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Michalopoulos S, 2013, THE QUARTERLY JOURNAL OF ECONOMICS;Balk D, 2006, ADVANCES IN PARASITOLOGY/ADVANCES IN PARASITOLOGY;Xie S, 2016, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Pinkovskiy M, 2016, THE QUARTERLY JOURNAL OF ECONOMICS;Devarajan S, 2013, REVIEW OF INCOME AND WEALTH;Sandefur J, 2015, THE JOURNAL OF DEVELOPMENT STUDIES;Jerven M, 2013, VERFASSUNG IN RECHT UND ÜBERSEE;Hand E, 2015, SCIENCE;Hong L, 2016, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE",,,OPENALEX,"Jean N, 2016, SCIENCE","Jean N, 2016, SCIENCE" +https://openalex.org/W2990168612,10.1007/978-3-319-20010-1,An Introduction to Machine Learning,2015,en,book,1019,,,Miroslav Kubát,Miroslav Kubat,"Department of Electrical and Computer Engineering, University of Miami, Coral Gables, USA","Miroslav Kubat (corresponding author), Department of Electrical and Computer Engineering, University of Miami, Coral Gables, USA",,,,,,Advice (programming);Computer science;Simple (philosophy);Artificial intelligence;Mathematics education;Human–computer interaction;Machine learning;Psychology;Programming language;Epistemology,US,,,,OPENALEX,"Kubát M, 2015, ","Kubát M, 2015, " +https://openalex.org/W1983989471,10.1198/jasa.2008.s236,Pattern Recognition and Machine Learning,2008,en,article,1114,JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION,Journal of the American Statistical Association,Thomas Burr,Thomas Burr,Los Alamos National Laboratory;LOS Alamos National Laboratory,"Thomas Burr (corresponding author), Los Alamos National Laboratory; LOS Alamos National Laboratory","""Pattern Recognition and Machine Learning."" Journal of the American Statistical Association, 103(482), pp. 886–887",103,482,886,887,Artificial intelligence;Computer science;Machine learning;Pattern recognition (psychology),US,"Neal R, 2007, TECHNOMETRICS;Hjort N, 2003, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Aitchison J, 1976, BIOMETRIKA;Ye J, 1998, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Black M, 1999, INTELLIGENT DATA ANALYSIS",,,OPENALEX,"Burr T, 2008, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION","Burr T, 2008, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION" +https://openalex.org/W2464725281,10.1038/npjcompumats.2016.28,A general-purpose machine learning framework for predicting properties of inorganic materials,2016,en,article,1689,NPJ COMPUTATIONAL MATERIALS,npj Computational Materials,Logan Ward;Ankit Agrawal;Alok Choudhary;Christopher Wolverton,Logan Ward;Ankit Agrawal;Alok Choudhary;Christopher Wolverton,"Department of Materials Science and Engineering, Northwestern University, Evanston, IL, USA;Department of Electrical Engineering and Computer Science, Northwestern University, Evanston, IL, USA;Department of Electrical Engineering and Computer Science, Northwestern University, Evanston, IL, USA;Department of Materials Science and Engineering, Northwestern University, Evanston, IL, USA","Christopher Wolverton (corresponding author), Department of Materials Science and Engineering, Northwestern University, Evanston, IL, USA","Abstract A very active area of materials research is to devise methods that use machine learning to automatically extract predictive models from existing materials data. While prior examples have demonstrated successful models for some applications, many more applications exist where machine learning can make a strong impact. To enable faster development of machine-learning-based models for such applications, we have created a framework capable of being applied to a broad range of materials data. Our method works by using a chemically diverse list of attributes, which we demonstrate are suitable for describing a wide variety of properties, and a novel method for partitioning the data set into groups of similar materials to boost the predictive accuracy. In this manuscript, we demonstrate how this new method can be used to predict diverse properties of crystalline and amorphous materials, such as band gap energy and glass-forming ability.",2,1,,,Computer science;Machine learning;Variety (cybernetics);Set (abstract data type);Artificial intelligence;Range (aeronautics);Materials science,US,"Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Shockley W, 1961, JOURNAL OF APPLIED PHYSICS;Jain A, 2013, APL MATERIALS;Ho T, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Inoue A, 2000, ACTA MATERIALIA;Callister W, 1985, ;Todeschini R, 2000, METHODS AND PRINCIPLES IN MEDICINAL CHEMISTRY;Bartók A, 2010, PHYSICAL REVIEW LETTERS;King D, 2009, ;Wang W, 2004, MATERIALS SCIENCE AND ENGINEERING R REPORTS;Denton A, 1991, PHYSICAL REVIEW A;Saal J, 2013, JOM;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Curtarolo S, 2013, NATURE MATERIALS;Rodríguez J, 2006, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Belsky A, 2002, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Thornton C, 2013, ;Curtarolo S, 2012, COMPUTATIONAL MATERIALS SCIENCE;Jain A, 2011, COMPUTATIONAL MATERIALS SCIENCE;Pilania G, 2013, SCIENTIFIC REPORTS;Liu M, 2014, ENERGY & ENVIRONMENTAL SCIENCE;Schütt K, 2014, PHYSICAL REVIEW B;Fischer C, 2006, NATURE MATERIALS;Faber F, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Curtarolo S, 2003, PHYSICAL REVIEW LETTERS;Gautier R, 2015, NATURE CHEMISTRY;Hautier G, 2010, INORGANIC CHEMISTRY;Dalsin J, 2005, MATERIALS TODAY;Seko A, 2014, PHYSICAL REVIEW B;Rajan K, 2015, ANNUAL REVIEW OF MATERIALS RESEARCH;Yang K, 2012, NATURE MATERIALS;Kalidindi S, 2015, ANNUAL REVIEW OF MATERIALS RESEARCH;Ding S, 2014, NATURE MATERIALS;Kirklin S, 2012, ADVANCED ENERGY MATERIALS;Dey P, 2013, COMPUTATIONAL MATERIALS SCIENCE;Sparks T, 2015, SCRIPTA MATERIALIA;Weber C, 2013, JOURNAL OF MATERIALS CHEMISTRY A;Ruiz‐Blanco Y, 2015, BMC BIOINFORMATICS;Li Y, 2007, MRS BULLETIN;Bhadeshia H, 2009, MATERIALS SCIENCE AND TECHNOLOGY;Chatterjee S, 2007, MATERIALS SCIENCE AND TECHNOLOGY;Seko A, 2014, PHYSICAL REVIEW B;Villars P, 2003, JOURNAL OF ALLOYS AND COMPOUNDS;Y K, 1997, ;Kong C, 2012, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Yang L, 2013, PHYSICAL REVIEW B;Srinivasan S, 2013, MATERIALS;Hautier G, 2013, TOPICS IN CURRENT CHEMISTRY;Hou Z, 1997, APPLIED CATALYSIS A GENERAL;Zhang T, 2002, MATERIALS TRANSACTIONS;Breiman L, 2001, MACHINE LEARNING;Clayton C, 1987, MATERIALS SCIENCE AND ENGINEERING;Kirklin S, 2015, NPJ COMPUTATIONAL MATERIALS;Agrawal A, 2016, APL MATERIALS;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;Meredig B, 2014, PHYSICAL REVIEW B;Shockley W, 2018, RENEWABLE ENERGY;Mauri A, 2006, BOA (UNIVERSITY OF MILANO-BICOCCA);Kalinin S, 2015, NATURE MATERIALS;, 1995, ANALYTICAL PROCEEDINGS;Rajan K, 2005, STATISTICAL ANALYSIS AND DATA MINING THE ASA DATA SCIENCE JOURNAL;Chen H, 2012, CHEMISTRY OF MATERIALS;Deml A, 2016, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;, 2014, TOPICS IN CURRENT CHEMISTRY;Sumpter B, 1996, ANNUAL REVIEW OF MATERIALS SCIENCE;Meredig B, 2014, CHEMISTRY OF MATERIALS;Lookman T, 2016, APL MATERIALS;Mulholland G, 2016, APL MATERIALS;Wada T, 2003, MATERIALS TRANSACTIONS;Sumpter B, 1996, ANNUAL REVIEW OF MATERIALS SCIENCE;Faber F, 2015, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Ward L, 2016, NPJ COMPUTATIONAL MATERIALS","Ward L, 2016, NPJ COMPUTATIONAL MATERIALS" +https://openalex.org/W2508393166,10.1109/tsp.2016.2601299,"Majorization-Minimization Algorithms in Signal Processing, Communications, and Machine Learning",2016,en,article,1739,IEEE TRANSACTIONS ON SIGNAL PROCESSING,IEEE Transactions on Signal Processing,Ying Sun;Prabhu Babu;Daniel P. Palomar,Ying Sun;Prabhu Babu;Daniel P. Palomar,"Department of Electronic and Computer Engineering, Hong Kong University of Science and Technology, Hong Kong;School of Industrial Engineering, Purdue University, West-Lafayette, IN, USA;CARE, IIT Delhi, Delhi, India;Department of Electronic and Computer Engineering, Hong Kong University of Science and Technology, Hong Kong",,"This paper gives an overview of the majorization-minimization (MM) algorithmic framework, which can provide guidance in deriving problem-driven algorithms with low computational cost. A general introduction of MM is presented, including a description of the basic principle and its convergence results. The extensions, acceleration schemes, and connection to other algorithmic frameworks are also covered. To bridge the gap between theory and practice, upperbounds for a large number of basic functions, derived based on the Taylor expansion, convexity, and special inequalities, are provided as ingredients for constructing surrogate functions. With the pre-requisites established, the way of applying MM to solving specific problems is elaborated by a wide range of applications in signal processing, communications, and machine learning.",65,3,794,816,Signal processing;Computer science;Convexity;Algorithm;Minification;Convergence (economics);Range (aeronautics);Majorization;Acceleration;Mathematical optimization;Mathematics;Digital signal processing,HK;US;IN,"Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Gerchberg R, 1972, OPTIK;Zou H, 2006, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Louis T, 1982, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Combettes P, 2011, SPRINGER OPTIMIZATION AND ITS APPLICATIONS;Shi Q, 2011, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Tseng P, 2001, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Fienup J, 1978, OPTICS LETTERS;Razaviyayn M, 2013, SIAM JOURNAL ON OPTIMIZATION;Blumensath T, 2008, JOURNAL OF FOURIER ANALYSIS AND APPLICATIONS;Marks B, 1978, OPERATIONS RESEARCH;Geman D, 1995, IEEE TRANSACTIONS ON IMAGE PROCESSING;Beaton A, 1974, TECHNOMETRICS;Thi H, 2005, ANNALS OF OPERATIONS RESEARCH;Lange K, 2000, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Horst R, 1999, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Tyler D, 1987, THE ANNALS OF STATISTICS;Costa J, 2006, ACM TRANSACTIONS ON SENSOR NETWORKS;Figueiredo M, 2007, IEEE TRANSACTIONS ON IMAGE PROCESSING;B�hning D, 1992, ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS;Laird N, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Lange K, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Meilijson I, 1989, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Varadhan R, 2008, SCANDINAVIAN JOURNAL OF STATISTICS;Scutari G, 2014, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Kuhn H, 1973, MATHEMATICAL PROGRAMMING;Lange K, 1995, IEEE TRANSACTIONS ON IMAGE PROCESSING;Song J, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Stoica P, 2011, SIGNAL PROCESSING;Böhning D, 1988, ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS;Sriperumbudur B, 2010, MACHINE LEARNING;Chouzenoux E, 2013, SIAM JOURNAL ON IMAGING SCIENCES;Yang Y, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Wiesel A, 2011, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Bolte J, 2016, MATHEMATICS OF OPERATIONS RESEARCH;Sun Y, 2014, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Jamshidian M, 1993, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Ba D, 2013, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Wu T, 2010, STATISTICAL SCIENCE;Song J, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Beck A, 2008, SIAM JOURNAL ON OPTIMIZATION;Naghsh M, 2014, IEEE TRANSACTIONS ON AEROSPACE AND ELECTRONIC SYSTEMS;Florescu A, 2013, SIGNAL PROCESSING;Lange K, 2014, INTERNATIONAL STATISTICAL REVIEW;Facchinei F, 2014, ;Labat C, 2007, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Babu P, 2012, ;Jenatton R, 2009, ARXIV.ORG;Bauer E, 2013, ARXIV (CORNELL UNIVERSITY);Quoc T, 2011, ARXIV.ORG;Beck A, 2009, SIAM JOURNAL ON IMAGING SCIENCES;Lee D, 2000, ;Kushary D, 1998, TECHNOMETRICS;Daubechies I, 2004, COMMUNICATIONS ON PURE AND APPLIED MATHEMATICS;Candès E, 2008, JOURNAL OF FOURIER ANALYSIS AND APPLICATIONS;Wu C, 1983, THE ANNALS OF STATISTICS;Witten D, 2009, BIOSTATISTICS;Fan J, 2014, NATIONAL SCIENCE REVIEW;Daubechies I, 2009, COMMUNICATIONS ON PURE AND APPLIED MATHEMATICS;Charbonnier P, 1997, IEEE TRANSACTIONS ON IMAGE PROCESSING;Geman D, 1992, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Candès E, 2015, IEEE TRANSACTIONS ON INFORMATION THEORY;Chiang M, 2007, IEEE TRANSACTIONS ON WIRELESS COMMUNICATIONS;Shi Q, 2011, ;Févotte C, 2011, NEURAL COMPUTATION;Stoica P, 2010, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Stoica P, 2009, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Hjørungnes A, 2011, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Yuille A, 2001, ;Xiao J, 2008, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Markovsky I, 2008, AUTOMATICA;Mairal J, 2015, SIAM JOURNAL ON OPTIMIZATION;Yaghoobi M, 2009, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Leeuw J, 1988, JOURNAL OF CLASSIFICATION;Zhou H, 2009, STATISTICS AND COMPUTING;Bioucas‐Dias J, 2006, ;Facchinei F, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Jacobson M, 2007, IEEE TRANSACTIONS ON IMAGE PROCESSING;Lee J, 2010, CALTECHAUTHORS (CALIFORNIA INSTITUTE OF TECHNOLOGY);Repetti A, 2014, IEEE SIGNAL PROCESSING LETTERS;Févotte C, 2011, ;Jamshidian M, 1997, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Sha F, 2002, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Eggermont P, 1990, LINEAR ALGEBRA AND ITS APPLICATIONS;Becker M, 1997, STATISTICAL METHODS IN MEDICAL RESEARCH;Allain M, 2006, IEEE TRANSACTIONS ON IMAGE PROCESSING;Salakhutdinov R, 2003, ;Helmbold D, 1995, ;Blumensath T, 2007, ;Oğuz-Ekim P, 2011, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Bertsekas D, 1994, SIAM JOURNAL ON OPTIMIZATION;Lange K, 2012, MATHEMATICAL PROGRAMMING;Lewitt R, 1986, IEEE TRANSACTIONS ON MEDICAL IMAGING;Chrétien S, 2000, IEEE TRANSACTIONS ON INFORMATION THEORY;Fuchs J, 2007, IEEE JOURNAL OF SELECTED TOPICS IN SIGNAL PROCESSING;Shamsi D, 2013, FIELDS INSTITUTE COMMUNICATIONS;Chouzenoux É, 2012, INVERSE PROBLEMS;Journée M, 2008, ARXIV.ORG;Yuan X, 2011, ARXIV (CORNELL UNIVERSITY);Mairal J, 2013, ARXIV (CORNELL UNIVERSITY);Seung D, 2001, ;Magnus J, 1988, BIOMETRICS;Neal R, 1998, ;Parikh N, 2014, FOUNDATIONS AND TRENDS® IN OPTIMIZATION;Hunter D, 2004, THE AMERICAN STATISTICIAN;Parlar M, 2000, BIRKHÄUSER BOSTON EBOOKS;Wipf D, 2004, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Fessler J, 1994, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Krishnapuram B, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Lipp T, 2015, OPTIMIZATION AND ENGINEERING;d’Aspremont A, 2007, SIAM REVIEW;Pierro A, 1995, IEEE TRANSACTIONS ON MEDICAL IMAGING;Song J, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Beck A, 2009, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Lange K, 2000, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Ziegel E, 1989, TECHNOMETRICS;Song J, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;JournéeMichel, 2010, JOURNAL OF MACHINE LEARNING RESEARCH;Chouzenoux É, 2013, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Figueiredo M, 2005, ;Dinh Q, 2010, ;Chouzenoux É, 2016, JOURNAL OF GLOBAL OPTIMIZATION;Sun Y, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;YuanXiao-Tong, 2013, JOURNAL OF MACHINE LEARNING RESEARCH;Vaida F, 2005, ;Audet C, 2016, OPTIMIZATION AND ENGINEERING;Scutari G, 2014, ARXIV (CORNELL UNIVERSITY);Palacios-Gómez F, 1982, MANAGEMENT SCIENCE;Qiu T, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Chouzenoux É, 2011, IEEE TRANSACTIONS ON IMAGE PROCESSING;, 1987, IEEE TRANSACTIONS ON MEDICAL IMAGING;Sun Y, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Lange K, 2016, ;Chouzenoux É, 2017, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Sun Y, 2015, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Jamshidian M, 1993, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Korkmaz S, 2009, ;Becker M, 1997, STATISTICAL METHODS IN MEDICAL RESEARCH;Helmbold D, 1997, MACHINE LEARNING;Chouzenoux E, 2016, IEEE SIGNAL PROCESSING LETTERS;Qiu T, 2015, ;Zhou H, 2015, ARXIV (CORNELL UNIVERSITY);Marjanovic G, 2014, ARXIV (CORNELL UNIVERSITY);I. E, 1971, MATHEMATICS OF COMPUTATION;Bonnans J, 2006, ;Heiser W, 1995, ;Quoc T, 2011, ARXIV (CORNELL UNIVERSITY);Chouzenoux E, 2015, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Sun Y, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING","Sun Y, 2016, IEEE TRANSACTIONS ON SIGNAL PROCESSING" +https://openalex.org/W4299828299,10.48550/arxiv.1207.4676,Proceedings of the 29th International Conference on Machine Learning (ICML-12),2012,en,preprint,1618,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),John Langford;Joëlle Pineau,"Langford, John;Pineau, Joelle",Editors;Editors,,"This is an index to the papers that appear in the Proceedings of the 29th International Conference on Machine Learning (ICML-12). The conference was held in Edinburgh, Scotland, June 27th - July 3rd, 2012.",,,,,Library science;Geography;Forestry;Computer science,US,,,,OPENALEX,"Langford J, 2012, ARXIV (CORNELL UNIVERSITY)","Langford J, 2012, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W2547447472,10.1063/1.4966192,Perspective: Machine learning potentials for atomistic simulations,2016,en,article,1490,THE JOURNAL OF CHEMICAL PHYSICS,The Journal of Chemical Physics,Jörg Behler,Jörg Behler,"Lehrstuhl für Theoretische Chemie, Ruhr-Universität Bochum , D-44780 Bochum, Germany","Jörg Behler (corresponding author), Lehrstuhl für Theoretische Chemie, Ruhr-Universität Bochum , D-44780 Bochum, Germany","Nowadays, computer simulations have become a standard tool in essentially all fields of chemistry, condensed matter physics, and materials science. In order to keep up with state-of-the-art experiments and the ever growing complexity of the investigated problems, there is a constantly increasing need for simulations of more realistic, i.e., larger, model systems with improved accuracy. In many cases, the availability of sufficiently efficient interatomic potentials providing reliable energies and forces has become a serious bottleneck for performing these simulations. To address this problem, currently a paradigm change is taking place in the development of interatomic potentials. Since the early days of computer simulations simplified potentials have been derived using physical approximations whenever the direct application of electronic structure methods has been too demanding. Recent advances in machine learning (ML) now offer an alternative approach for the representation of potential-energy surfaces by fitting large data sets from electronic structure calculations. In this perspective, the central ideas underlying these ML potentials, solved problems and remaining challenges are reviewed along with a discussion of their current applicability and limitations.",145,17,170901,170901,Bottleneck;Representation (politics);Computer science;Perspective (graphical);Interatomic potential;Statistical physics;Electronic structure;Computational science;Theoretical computer science;Artificial intelligence;Molecular dynamics;Physics;Quantum mechanics,DE,"McCulloch W, 1943, BULLETIN OF MATHEMATICAL BIOLOGY;Cybenko G, 1989, MATHEMATICS OF CONTROL SIGNALS AND SYSTEMS;Allen F, 2002, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Hornik K, 1991, NEURAL NETWORKS;Behler J, 2007, PHYSICAL REVIEW LETTERS;Steinhardt P, 1983, PHYSICAL REVIEW. B, CONDENSED MATTER;Bartók A, 2010, PHYSICAL REVIEW LETTERS;Tersoff J, 1990, PHYSICAL REVIEW. B, CONDENSED MATTER;Bartók A, 2013, PHYSICAL REVIEW B;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Behler J, 2011, THE JOURNAL OF CHEMICAL PHYSICS;, 2000, ;Thompson A, 2014, JOURNAL OF COMPUTATIONAL PHYSICS;Kohn W, 1996, PHYSICAL REVIEW LETTERS;Braams B, 2009, INTERNATIONAL REVIEWS IN PHYSICAL CHEMISTRY;Behler J, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Pilania G, 2013, SCIENTIFIC REPORTS;Hansen K, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Behler J, 2011, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Li Z, 2015, PHYSICAL REVIEW LETTERS;Bartók A, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Gasteiger J, 1993, ANGEWANDTE CHEMIE INTERNATIONAL EDITION IN ENGLISH;Blank T, 1995, THE JOURNAL OF CHEMICAL PHYSICS;Behler J, 2014, JOURNAL OF PHYSICS CONDENSED MATTER;Artrith N, 2011, PHYSICAL REVIEW B;Lorenz S, 2004, CHEMICAL PHYSICS LETTERS;Rupp M, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Bernstein F, 1978, ARCHIVES OF BIOCHEMISTRY AND BIOPHYSICS;Khaliullin R, 2011, NATURE MATERIALS;Handley C, 2010, THE JOURNAL OF PHYSICAL CHEMISTRY A;Ghasemi S, 2015, PHYSICAL REVIEW B;Manzhos S, 2006, THE JOURNAL OF CHEMICAL PHYSICS;Rupp M, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Balabin R, 2011, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Manzhos S, 2005, THE JOURNAL OF PHYSICAL CHEMISTRY A;Geiger P, 2013, THE JOURNAL OF CHEMICAL PHYSICS;Agrafiotis D, 2002, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Behler J, 2007, THE JOURNAL OF CHEMICAL PHYSICS;Handley C, 2009, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Jose K, 2012, THE JOURNAL OF CHEMICAL PHYSICS;Sosso G, 2012, PHYSICAL REVIEW B;Gassner H, 1998, THE JOURNAL OF PHYSICAL CHEMISTRY A;Valle M, 2010, ACTA CRYSTALLOGRAPHICA SECTION A FOUNDATIONS OF CRYSTALLOGRAPHY;Handley C, 2014, THE EUROPEAN PHYSICAL JOURNAL B;Ishida T, 1999, CHEMICAL PHYSICS LETTERS;Houlding S, 2007, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Keil M, 2004, JOURNAL OF COMPUTATIONAL CHEMISTRY;Hobday S, 1999, MODELLING AND SIMULATION IN MATERIALS SCIENCE AND ENGINEERING;Laio A, 2007, LECTURE NOTES IN PHYSICS;Thomsen J, 1989, JOURNAL OF MAGNETIC RESONANCE (1969);Born M, 1927, ANNALEN DER PHYSIK;Tersoff J, 1989, PHYSICAL REVIEW. B, CONDENSED MATTER;Marx D, 2009, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;De S, 2016, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Morawietz T, 2016, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Jain A, 2016, JOURNAL OF MATERIALS RESEARCH/PRATT'S GUIDE TO VENTURE CAPITAL SOURCES;Ward L, 2016, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Natarajan S, 2016, PHYSICAL CHEMISTRY CHEMICAL PHYSICS;Artrith N, 2014, NANO LETTERS;Elias J, 2016, ACS CATALYSIS;Zhu L, 2016, THE JOURNAL OF CHEMICAL PHYSICS;J. B, 2007, MAX PLANCK INSTITUTE FOR PLASMA PHYSICS;Sumpter B, 1994, ANNUAL REVIEW OF PHYSICAL CHEMISTRY;Hellström M, 2016, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS",,,OPENALEX,"Behler J, 2016, THE JOURNAL OF CHEMICAL PHYSICS","Behler J, 2016, THE JOURNAL OF CHEMICAL PHYSICS" +https://openalex.org/W2109722477,10.7551/mitpress/7503.003.0040,Map-Reduce for Machine Learning on Multicore,2007,en,book-chapter,1253,THE MIT PRESS EBOOKS,The MIT Press eBooks,Cheng-Tao Chu;Sang Kyun Kim;Yi-An Lin;Yuanyuan Yu;Gary Bradski;Andrew Y. Ng;Kunle Olukotun,Cheng-Tao Chu;Sang Kyun Kim;Yi-An Lin;YuanYuan Yu;Gary Bradski;Andrew Y. Ng;Kunle Olukotun,"CS. Department, Stanford University, Stanford CA#TAB#;CS. Department, Stanford University, Stanford CA#TAB#;CS. Department, Stanford University, Stanford CA#TAB#;CS. Department, Stanford University, Stanford CA#TAB#;CS. Department, Stanford University, Stanford CA and Rexee Inc.#TAB#;CS. Department, Stanford University, Stanford CA#TAB#;CS. Department, Stanford University, Stanford CA#TAB#",,"We are at the beginning of the multicore era. Computers will have increasingly many cores (processors), but there is still no good programming framework for these architectures, and thus no simple and unified way for machine learning to take advantage of the potential speed up. In this paper, we develop a broadly applicable parallel programming method, one that is easily applied to many different learning algorithms. Our work is in distinct contrast to the tradition in machine learning of designing (often ingenious) ways to speed up a single algorithm at a time. Specifically, we show that algorithms that fit the Statistical Query model [15] can be written in a certain summation form, which allows them to be easily parallelized on multicore computers. We adapt Google's map-reduce [7] paradigm to demonstrate this parallel speed up technique on a variety of learning algorithms including locally weighted linear regression (LWLR), k-means, logistic regression (LR), naive Bayes (NB), SVM, ICA, PCA, gaussian discriminant analysis (GDA), EM, and backpropagation (NN). Our experimental results show basically linear speedup with an increasing number of processors.",,,281,288,Computer science;Speedup;Machine learning;Artificial intelligence;Multi-core processor;Support vector machine;Parallel computing;Algorithm,US,"Wold S, 1987, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Bell A, 1995, NEURAL COMPUTATION;Hartigan J, 1975, ;Cleveland W, 1988, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Platt J, 1998, THE MIT PRESS EBOOKS;Valiant L, 1984, ;Moon T, 1996, IEEE SIGNAL PROCESSING MAGAZINE;Dayalan M, 2018, INTERNATIONAL JOURNAL OF RESEARCH AND ENGINEERING;Vapnik V, 2006, INFORMATION SCIENCE AND STATISTICS;Lewis D, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Kearns M, 1994, THE MIT PRESS EBOOKS;Pregibon D, 1981, THE ANNALS OF STATISTICS;Langley P, 1992, ;Hastie T, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Chapelle O, 2007, NEURAL COMPUTATION;Moore G, 1975, INTERNATIONAL ELECTRON DEVICES MEETING;Kearns M, 1998, JOURNAL OF THE ACM;Sutter H, 2005, QUEUE;Graf H, 2004, ;Frank D, 2002, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Csanky L, 1976, SIAM JOURNAL ON COMPUTING;Gelsinger P, 2002, ;Jin R, 2004, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Caragea D, 2004, INTERNATIONAL JOURNAL OF HYBRID INTELLIGENT SYSTEMS;Jin R, 2002, SIAM INTERNATIONAL CONFERENCE ON DATA MINING;Welsch R, 1977, NATIONAL BUREAU OF ECONOMIC RESEARCH",,,OPENALEX,"Chu C, 2007, THE MIT PRESS EBOOKS","Chu C, 2007, THE MIT PRESS EBOOKS" +https://openalex.org/W1569512666,,Neural Networks And Learning Machines,2010,en,book,6573,,,S. Haykin,S. Haykin,,"S. Haykin (corresponding author), ",,,,,,Artificial neural network;Computer science;Artificial intelligence;Cognitive science;Machine learning;Psychology,,,,,OPENALEX,"Haykin S, 2010, ","Haykin S, 2010, " +https://openalex.org/W1723619723,10.18637/jss.v017.b05,Pattern Recognition and Machine Learning,2007,en,article,1389,JOURNAL OF STATISTICAL SOFTWARE,Journal of Statistical Software,John H. Maindonald,John Maindonald,,"John Maindonald (corresponding author), ",Abstracts not available for BookReviews,17,Book Review 5,,,Computer science;Artificial intelligence;Pattern recognition (psychology);Machine learning,,"Ripley B, 1996, CAMBRIDGE UNIVERSITY PRESS EBOOKS",,,OPENALEX,"Maindonald J, 2007, JOURNAL OF STATISTICAL SOFTWARE","Maindonald J, 2007, JOURNAL OF STATISTICAL SOFTWARE" +https://openalex.org/W2789970635,10.1056/nejmp1714229,Implementing Machine Learning in Health Care — Addressing Ethical Challenges,2018,en,article,1625,NEW ENGLAND JOURNAL OF MEDICINE,New England Journal of Medicine,Danton Char;Nigam H. Shah;David Magnus,Danton S. Char;Nigam H. Shah;David Magnus,"From the Department of Anesthesiology, Division of Pediatric Cardiac Anesthesia (D.S.C.), the Center for Biomedical Ethics (D.S.C., D.M.), and the Center for Biomedical Informatics Research (N.S.), Stanford University School of Medicine, Stanford, CA;Center for Biomedical Ethics;Department of Anesthesiology, Division of Pediatric Cardiac Anesthesia;From the Department of Anesthesiology, Division of Pediatric Cardiac Anesthesia (D.S.C.), the Center for Biomedical Ethics (D.S.C., D.M.), and the Center for Biomedical Informatics Research (N.S.), Stanford University School of Medicine, Stanford, CA;From the Department of Anesthesiology, Division of Pediatric Cardiac Anesthesia (D.S.C.), the Center for Biomedical Ethics (D.S.C., D.M.), and the Center for Biomedical Informatics Research (N.S.), Stanford University School of Medicine, Stanford, CA;Center for Biomedical Ethics",,"We need to consider the ethical challenges inherent in implementing machine learning in health care if its benefits are to be realized. Some of these challenges are straightforward, whereas others have less obvious risks but raise broader ethical concerns.",378,11,981,983,Ethical issues;Health care;Engineering ethics;Risk analysis (engineering);Psychology;Computer science;Business;Political science;Engineering,US,"Obermeyer Z, 2016, NEW ENGLAND JOURNAL OF MEDICINE;Gijsberts C, 2015, PLOS ONE;Longhurst C, 2014, HEALTH AFFAIRS;Siegler M, 1982, NEW ENGLAND JOURNAL OF MEDICINE",,,OPENALEX,"Char D, 2018, NEW ENGLAND JOURNAL OF MEDICINE","Char D, 2018, NEW ENGLAND JOURNAL OF MEDICINE" +https://openalex.org/W2883583109,10.1126/science.aat2663,Inverse molecular design using machine learning: Generative models for matter engineering,2018,en,review,2057,SCIENCE,Science,Benjamín Sánchez-Lengeling;Alán Aspuru‐Guzik,Benjamin Sanchez-Lengeling;Alán Aspuru-Guzik,"Department of Chemistry and Chemical Biology, Harvard University, 12 Oxford Street, Cambridge, MA 02138, USA;Canadian Institute for Advanced Research (CIFAR) Senior Fellow, Toronto, Ontario M5S 1M1, Canada;Department of Chemistry and Department of Computer Science, University of Toronto, Toronto, Ontario M5S 3H6, Canada;Vector Institute for Artificial Intelligence, Toronto, Ontario M5S 1M1, Canada","Alán Aspuru-Guzik (corresponding author), Canadian Institute for Advanced Research (CIFAR) Senior Fellow, Toronto, Ontario M5S 1M1, Canada; Department of Chemistry and Department of Computer Science, University of Toronto, Toronto, Ontario M5S 3H6, Canada; Vector Institute for Artificial Intelligence, Toronto, Ontario M5S 1M1, Canada","The discovery of new materials can bring enormous societal and technological progress. In this context, exploring completely the large space of potential materials is computationally intractable. Here, we review methods for achieving inverse design, which aims to discover tailored materials from the starting point of a particular desired functionality. Recent advances from the rapidly growing field of artificial intelligence, mostly from the subfield of machine learning, have resulted in a fertile exchange of ideas, where approaches to inverse molecular design are being proposed and employed at a rapid pace. Among these, deep generative models have been applied to numerous classes of materials: rational design of prospective drugs, synthetic routes to organic compounds, and optimization of photovoltaics and redox flow batteries, as well as a variety of other solid-state materials.",361,6400,360,365,Generative grammar;Context (archaeology);Computer science;Variety (cybernetics);Pace;Artificial intelligence;Generative Design;Biochemical engineering;Machine learning;Engineering;Biology,US;CA,"Hochreiter S, 1997, NEURAL COMPUTATION;Silver D, 2016, NATURE;Jordan M, 2015, SCIENCE;Rogers D, 2010, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Newman D, 2016, JOURNAL OF NATURAL PRODUCTS;Shahriari B, 2015, PROCEEDINGS OF THE IEEE;Scharber M, 2006, ADVANCED MATERIALS;Gómez-Bombarelli R, 2018, ACS CENTRAL SCIENCE;Bartók A, 2013, PHYSICAL REVIEW B;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Raccuglia P, 2016, NATURE;Kearnes S, 2016, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Behler J, 2011, THE JOURNAL OF CHEMICAL PHYSICS;Huskinson B, 2014, NATURE;Schütt K, 2017, NATURE COMMUNICATIONS;Weininger D, 1989, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Gómez‐Bombarelli R, 2016, NATURE MATERIALS;Jain A, 2011, COMPUTATIONAL MATERIALS SCIENCE;Schneider G, 2005, NATURE REVIEWS DRUG DISCOVERY;Hansen K, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Reymond J, 2015, ACCOUNTS OF CHEMICAL RESEARCH;Hoelder S, 2012, MOLECULAR ONCOLOGY;Lin K, 2016, NATURE ENERGY;Ley S, 2015, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Wei J, 2016, ACS CENTRAL SCIENCE;Nikolaev P, 2016, NPJ COMPUTATIONAL MATERIALS;Cheng L, 2014, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Virshup A, 2013, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Pyzer‐Knapp E, 2015, ANNUAL REVIEW OF MATERIALS RESEARCH;Broach J, 1996, NATURE;Carnero A, 2006, CLINICAL & TRANSLATIONAL ONCOLOGY;Robbins D, 2011, SCIENCE;Hachmann J, 2013, ENERGY & ENVIRONMENTAL SCIENCE;Petousis I, 2017, SCIENTIFIC DATA;Qu X, 2015, COMPUTATIONAL MATERIALS SCIENCE;Kanal I, 2013, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Yan Q, 2017, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Ikebata H, 2017, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Wang M, 2006, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Lilienfeld O, 2013, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Rupakheti C, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Hirn M, 2017, MULTISCALE MODELING AND SIMULATION;Supady A, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Xiao D, 2011, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Kühn C, 1996, THE JOURNAL OF PHYSICAL CHEMISTRY;Olah C, 2016, DISTILL;Weymuth T, 2014, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Meredig B, 2014, CHEMISTRY OF MATERIALS;Qian C, 2014, SMALL;Mnih V, 2013, ARXIV (CORNELL UNIVERSITY);Yu L, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Gómez‐Bombarelli R, 2018, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Segler M, 2018, NATURE;Olivecrona M, 2017, JOURNAL OF CHEMINFORMATICS;Popova M, 2018, SCIENCE ADVANCES;Kadurin A, 2017, MOLECULAR PHARMACEUTICS;Zhou Z, 2017, ACS CENTRAL SCIENCE;Zunger A, 2018, NATURE REVIEWS CHEMISTRY;Kirkpatrick P, 2004, NATURE;Yang X, 2017, SCIENCE AND TECHNOLOGY OF ADVANCED MATERIALS;Maine E, 2006, RESEARCH POLICY;Lopez S, 2017, JOULE;Kitson P, 2018, SCIENCE;Roch L, 2018, SCIENCE ROBOTICS;Sánchez-Lengeling B, 2017, CHEMRXIV;Bouchacourt D, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Aspuru‐Guzik A, 2018, ACS CENTRAL SCIENCE;Mullard A, 2017, NATURE;Jørgensen P, 2018, MOLECULAR INFORMATICS;Yoshikawa N, 2018, CHEMISTRY LETTERS;A. S, 2015, MAX PLANCK DIGITAL LIBRARY;Sánchez-Lengeling B, 2017, CHEMRXIV;Aspuru‐Guzik A, 2018, DIGITAL ACCESS TO SCHOLARSHIP AT HARVARD (DASH) (HARVARD UNIVERSITY);Saxena S, 2021, INTECHOPEN EBOOKS;Weymuth T, 2014, CHEMINFORM",,,OPENALEX,"Sánchez-Lengeling B, 2018, SCIENCE","Sánchez-Lengeling B, 2018, SCIENCE" +https://openalex.org/W3014596384,10.1007/s10994-021-05946-3,Aleatoric and epistemic uncertainty in machine learning: an introduction to concepts and methods,2021,en,article,1473,MACHINE LEARNING,Machine Learning,Eyke Hüllermeier;Willem Waegeman,Eyke Hüllermeier;Willem Waegeman,"Heinz Nixdorf Institute and Department of Computer Science, Paderborn University, Paderborn, Germany;Department of Mathematical Modelling, Statistics and Bioinformatics, Ghent University, Ghent, Belgium","Eyke Hüllermeier (corresponding author), Heinz Nixdorf Institute and Department of Computer Science, Paderborn University, Paderborn, Germany","Abstract The notion of uncertainty is of major importance in machine learning and constitutes a key element of machine learning methodology. In line with the statistical tradition, uncertainty has long been perceived as almost synonymous with standard probability and probabilistic predictions. Yet, due to the steadily increasing relevance of machine learning for practical applications and related issues such as safety requirements, new problems and challenges have recently been identified by machine learning scholars, and these problems may call for new methodological developments. In particular, this includes the importance of distinguishing between (at least) two different types of uncertainty, often referred to as aleatoric and epistemic . In this paper, we provide an introduction to the topic of uncertainty in machine learning as well as an overview of attempts so far at handling uncertainty in general and formalizing this distinction in particular.",110,3,457,506,Probabilistic logic;Relevance (law);Algorithmic learning theory;Key (lock);Computational learning theory;Element (criminal law),DE;BE,"Jordan M, 1999, MACHINE LEARNING;Tax D, 2003, MACHINE LEARNING;Mackay D, 1992, NEURAL COMPUTATION;Kiureghian A, 2008, STRUCTURAL SAFETY;Jeffreys H, 1946, PROCEEDINGS OF THE ROYAL SOCIETY OF LONDON A MATHEMATICAL AND PHYSICAL SCIENCES;Smets P, 1994, ARTIFICIAL INTELLIGENCE;Hartley R, 1928, BELL SYSTEM TECHNICAL JOURNAL;Wolpert D, 1996, NEURAL COMPUTATION;Seeger M, 2004, INTERNATIONAL JOURNAL OF NEURAL SYSTEMS;Endres D, 2003, IEEE TRANSACTIONS ON INFORMATION THEORY;Bernardo J, 1979, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Zadrozny B, 2002, ;Graves A, 2011, ;Chow C, 1970, IEEE TRANSACTIONS ON INFORMATION THEORY;Zadrozny B, 2001, ;Khan S, 2014, THE KNOWLEDGE ENGINEERING REVIEW;Dubois D, 2006, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Nguyen H, 1978, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Shilkret N, 1971, INDAGATIONES MATHEMATICAE (PROCEEDINGS);Mitchell T, 1977, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Hora S, 1996, RELIABILITY ENGINEERING & SYSTEM SAFETY;Denker J, 1990, ;Hellman M, 1970, IEEE TRANSACTIONS ON SYSTEMS SCIENCE AND CYBERNETICS;Herbei R, 2006, CANADIAN JOURNAL OF STATISTICS;Papadopoulos H, 2008, INTECH EBOOKS;Bernard J, 2004, INTERNATIONAL JOURNAL OF APPROXIMATE REASONING;Balasubramanian V, 2014, ;Gama J, 2012, PROGRESS IN ARTIFICIAL INTELLIGENCE;Senge R, 2013, INFORMATION SCIENCES;Yang F, 2009, BMC BIOINFORMATICS;Destercke S, 2008, INTERNATIONAL JOURNAL OF APPROXIMATE REASONING;Denœux T, 2013, INTERNATIONAL JOURNAL OF APPROXIMATE REASONING;Hühn J, 2008, IEEE TRANSACTIONS ON FUZZY SYSTEMS;Kruse R, 1991, ARTIFICIAL INTELLIGENCE;Dubois D, 1997, JOURNAL OF MATHEMATICAL ANALYSIS AND APPLICATIONS;Zaffalon M, 2002, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Dubois D, 1996, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS - PART A SYSTEMS AND HUMANS;Wasserman L, 1990, CANADIAN JOURNAL OF STATISTICS;Kruppa J, 2014, BIOMETRICAL JOURNAL;Hüllermeier E, 2008, FUZZY SETS AND SYSTEMS;Zaffalon M, 2012, INTERNATIONAL JOURNAL OF APPROXIMATE REASONING;Corani G, 2008, ;Coz J, 2009, JOURNAL OF MACHINE LEARNING RESEARCH;Lambrou A, 2010, IEEE TRANSACTIONS ON INFORMATION TECHNOLOGY IN BIOMEDICINE;Bi W, 2015, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Dubois D, 2006, INTERNATIONAL JOURNAL OF APPROXIMATE REASONING;Klir G, 1987, FUZZY SETS AND SYSTEMS;Gammerman A, 2002, THEORETICAL COMPUTER SCIENCE;Corani G, 2009, ;Gal Y, 2015, ARXIV (CORNELL UNIVERSITY);Breiman L, 2001, MACHINE LEARNING;Shafer G, 1976, PRINCETON UNIVERSITY PRESS EBOOKS;Liu F, 2008, ;Walley P, 1991, ;Pukelsheim F, 2006, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Frieden B, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Yager R, 1983, INTERNATIONAL JOURNAL OF GENERAL SYSTEMS;Cozman F, 2000, ARTIFICIAL INTELLIGENCE;Csiszár I, 2008, ENTROPY;Sato M, 2018, ;Kull M, 2017, BRISTOL RESEARCH (UNIVERSITY OF BRISTOL);Blum M, 2013, THE EUROPEAN SYMPOSIUM ON ARTIFICIAL NEURAL NETWORKS;Abellán J, 2006, INTERNATIONAL JOURNAL OF GENERAL SYSTEMS;Barber R, 2020, INFORMATION AND INFERENCE A JOURNAL OF THE IMA;Freitas A, 2007, IGI GLOBAL EBOOKS;Sourati J, 2017, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Flach P, 2017, ENCYCLOPEDIA OF MACHINE LEARNING AND DATA MINING;Nguyen V, 2018, ;Varshney K, 2017, BIG DATA;Yang G, 2016, IEEE TRANSACTIONS ON CYBERNETICS;Kull M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Oh S, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;ABELLAN J, 2000, INTERNATIONAL JOURNAL OF UNCERTAINTY FUZZINESS AND KNOWLEDGE-BASED SYSTEMS;Perelló-Nieto M, 2016, ;Yang G, 2016, SOFT COMPUTING;Linusson H, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Lassiter D, 2019, PHILOSOPHICAL STUDIES;Klir G, 1994, JOHN WILEY & SONS, INC. EBOOKS;Linusson H, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Johansson U, 2018, KTH PUBLICATION DATABASE DIVA (KTH ROYAL INSTITUTE OF TECHNOLOGY);Tan M, 2019, ARXIV (CORNELL UNIVERSITY);Hendrycks D, 2016, ARXIV (CORNELL UNIVERSITY);Lee K, 2018, ARXIV (CORNELL UNIVERSITY);Lakshminarayanan B, 2016, ARXIV (CORNELL UNIVERSITY);Liang S, 2017, ARXIV (CORNELL UNIVERSITY);Şensoy M, 2018, ARXIV (CORNELL UNIVERSITY);Papernot N, 2018, ARXIV (CORNELL UNIVERSITY);Malinin A, 2018, ARXIV (CORNELL UNIVERSITY);DeVries T, 2018, ARXIV (CORNELL UNIVERSITY);Depeweg S, 2017, ARXIV (CORNELL UNIVERSITY);Gneiting T, 2005, ;Feng J, 2019, ARXIV (CORNELL UNIVERSITY);Ramaswamy H, 2015, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Hüllermeier E, 2021, MACHINE LEARNING","Hüllermeier E, 2021, MACHINE LEARNING" +https://openalex.org/W2530417694,10.48550/arxiv.1610.02527,Federated Optimization: Distributed Machine Learning for On-Device Intelligence,2016,en,preprint,1653,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),Jakub Konečný;H. Brendan McMahan;Daniel Ramage;Peter Richtárik,"Konečný, Jakub;McMahan, H. Brendan;Ramage, Daniel;Richtárik, Peter","University of Edinburgh,",,"We introduce a new and increasingly relevant setting for distributed optimization in machine learning, where the data defining the optimization are unevenly distributed over an extremely large number of nodes. The goal is to train a high-quality centralized model. We refer to this setting as Federated Optimization. In this setting, communication efficiency is of the utmost importance and minimizing the number of rounds of communication is the principal goal. A motivating example arises when we keep the training data locally on users' mobile devices instead of logging it to a data center for training. In federated optimziation, the devices are used as compute nodes performing computation on their local data in order to update a global model. We suppose that we have extremely large number of devices in the network --- as many as the number of users of a given service, each of which has only a tiny fraction of the total data available. In particular, we expect the number of data points available locally to be much smaller than the number of devices. Additionally, since different users generate data with different patterns, it is reasonable to assume that no device has a representative sample of the overall distribution. We show that existing algorithms are not suitable for this setting, and propose a new algorithm which shows encouraging experimental results for sparse convex problems. This work also sets a path for future research needed in the context of \federated optimization.",,,,,Computer science;Artificial intelligence;Distributed learning;Federated learning;Distributed computing;Machine learning;Psychology,GB,"Boyd S, 2010, NOW PUBLISHERS, INC. EBOOKS;Robbins H, 1951, THE ANNALS OF MATHEMATICAL STATISTICS;Liu D, 1989, MATHEMATICAL PROGRAMMING;Vapnik V, 1999, IEEE TRANSACTIONS ON NEURAL NETWORKS;Bertsekas D, 1989, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Zaharia M, 2010, ;Dean J, 2012, ;Polyak B, 1964, USSR COMPUTATIONAL MATHEMATICS AND MATHEMATICAL PHYSICS;Nemirovski A, 2009, SIAM JOURNAL ON OPTIMIZATION;Zinkevich M, 2010, ;Ngiam J, 2011, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Deng W, 2015, JOURNAL OF SCIENTIFIC COMPUTING;Bach F, 2011, ;Needell D, 2015, MATHEMATICAL PROGRAMMING;Shalf J, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Bekkerman R, 2011, ;Liu J, 2015, SIAM JOURNAL ON OPTIMIZATION;Ma C, 2017, OPTIMIZATION METHODS & SOFTWARE;Duchi J, 2014, JOURNAL OF THE ACM;Zhang Y, 2013, ARXIV (CORNELL UNIVERSITY);Schmidt M, 2016, MATHEMATICAL PROGRAMMING;Zhang Y, 2015, ;Konečný J, 2017, FRONTIERS IN APPLIED MATHEMATICS AND STATISTICS;Yang T, 2013, ;Shamir O, 2014, ;Gürbüzbalaban M, 2019, MATHEMATICAL PROGRAMMING;Lin C, 2014, ;Duchi J, 2013, ;Qu Z, 2015, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Mareček J, 2015, SPRINGER PROCEEDINGS IN MATHEMATICS & STATISTICS;Zhuang Y, 2015, LECTURE NOTES IN COMPUTER SCIENCE;Mania H, 2017, SIAM JOURNAL ON OPTIMIZATION;Konečný J, 2017, OPTIMIZATION METHODS & SOFTWARE;McMahan H, 2016, ARXIV (CORNELL UNIVERSITY);Konečný J, 2015, ARXIV (CORNELL UNIVERSITY);Dekel O, 2010, ARXIV (CORNELL UNIVERSITY);Reddi S, 2016, ARXIV (CORNELL UNIVERSITY);Bradley J, 2011, ARXIV (CORNELL UNIVERSITY);Reddi S, 2015, ARXIV (CORNELL UNIVERSITY);Takáč M, 2013, ARXIV (CORNELL UNIVERSITY);Frostig R, 2015, ARXIV (CORNELL UNIVERSITY);Duchi J, 2015, ARXIV (CORNELL UNIVERSITY);Lee J, 2015, ARXIV (CORNELL UNIVERSITY);Takáč M, 2015, ARXIV (CORNELL UNIVERSITY);Defazio A, 2016, ARXIV (CORNELL UNIVERSITY);Csiba D, 2015, ARXIV (CORNELL UNIVERSITY);Allen-Zhu Z, 2016, ARXIV (CORNELL UNIVERSITY);Smith V, 2015, ARXIV (CORNELL UNIVERSITY);Mahajan D, 2013, ARXIV (CORNELL UNIVERSITY);Abadi M, 2016, ;Нестеров Ю, 2014, MEDICAL ENTOMOLOGY AND ZOOLOGY;Shamir O, 2014, ;Bottou L, 2018, SIAM REVIEW;Moritz P, 2016, INTERNATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE AND STATISTICS;Allen-Zhu Z, 2017, ;Babanezhad R, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Niu F, 2011, ARXIV (CORNELL UNIVERSITY);Defazio A, 2014, ARXIV (CORNELL UNIVERSITY);Reddi S, 2016, ARXIV (CORNELL UNIVERSITY);Woodworth B, 2016, ARXIV (CORNELL UNIVERSITY);Wang H, 2014, ARXIV (CORNELL UNIVERSITY);Gower R, 2016, ARXIV (CORNELL UNIVERSITY);Leblond R, 2016, ARXIV (CORNELL UNIVERSITY);Csiba D, 2016, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Konečný J, 2016, ARXIV (CORNELL UNIVERSITY)","Konečný J, 2016, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W3148181069,10.38094/jastt20165,Classification Based on Decision Tree Algorithm for Machine Learning,2021,en,article,1820,JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS,Journal of Applied Science and Technology Trends,Bahzad Charbuty;Adnan Mohsin Abdulazeez,Bahzad Charbuty;Adnan Abdulazeez,"IT Department, Technical College of Informatics Akre, Duhok Polytechnic University, Duhok, Kurdistan Region, Iraq;Duhok Polytechnic University, Duhok, Kurdistan Region, Iraq",,"Decision tree classifiers are regarded to be a standout of the most well-known methods to data classification representation of classifiers. Different researchers from various fields and backgrounds have considered the problem of extending a decision tree from available data, such as machine study, pattern recognition, and statistics. In various fields such as medical disease analysis, text classification, user smartphone classification, images, and many more the employment of Decision tree classifiers has been proposed in many ways. This paper provides a detailed approach to the decision trees. Furthermore, paper specifics, such as algorithms/approaches used, datasets, and outcomes achieved, are evaluated and outlined comprehensively. In addition, all of the approaches analyzed were discussed to illustrate the themes of the authors and identify the most accurate classifiers. As a result, the uses of different types of datasets are discussed and their findings are analyzed.",2,01,20,28,Decision tree;Computer science;Machine learning;Artificial intelligence;ID3 algorithm;Decision tree learning;Incremental decision tree;Representation (politics);Tree (set theory);Statistical classification;Data mining;Pattern recognition (psychology);Mathematics,IQ,"Kotsiantis S, 2007, ;Carleo G, 2019, REVIEWS OF MODERN PHYSICS;Libbrecht M, 2015, NATURE REVIEWS GENETICS;Kotsiantis S, 2006, ARTIFICIAL INTELLIGENCE REVIEW;Palo V, 2015, ;Lim T, 2000, MACHINE LEARNING;Song Y, 2015, PUBMED;Tso K, 2007, ENERGY;Zou Q, 2018, FRONTIERS IN GENETICS;Rokach L, 2005, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Janikow C, 1998, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART B (CYBERNETICS);Loh W, 2014, INTERNATIONAL STATISTICAL REVIEW;Raileanu L, 2004, ANNALS OF MATHEMATICS AND ARTIFICIAL INTELLIGENCE;Swain P, 1977, IEEE TRANSACTIONS ON GEOSCIENCE ELECTRONICS;Mántaras R, 1991, MACHINE LEARNING;Eesa A, 2014, EXPERT SYSTEMS WITH APPLICATIONS;Brodley C, 1995, MACHINE LEARNING;Barros R, 2011, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Zhao Y, 2007, ADVANCES IN SPACE RESEARCH;Stein G, 2005, ;Liang J, 2019, IEEE TRANSACTIONS ON DEPENDABLE AND SECURE COMPUTING;Zeebaree D, 2018, ;Pérez‐Ortiz M, 2016, ENERGIES;Shang C, 2013, KNOWLEDGE-BASED SYSTEMS;Zeebaree D, 2019, ;Maszczyk T, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Linty N, 2018, IEEE TRANSACTIONS ON AEROSPACE AND ELECTRONIC SYSTEMS;Damanik I, 2019, JOURNAL OF PHYSICS CONFERENCE SERIES;Suresh A, 2019, SOFT COMPUTING;Feng C, 2017, ;Taneja S, 2014, ;Gavankar S, 2017, ;Wang J, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Kumar R, 2012, ;Bengio Y, 2010, COMPUTATIONAL INTELLIGENCE;Kuang W, 2019, IEEE TRANSACTIONS ON CIRCUITS AND SYSTEMS FOR VIDEO TECHNOLOGY;Ahmed N, 2018, ;Anuradha, 2014, ;Nagra A, 2019, CONNECTION SCIENCE;Patil D, 2006, ;Zhang Y, 2019, ;Shamim A, 2010, ;Cheushev V, 2002, ;Hussain D, 2018, JOURNAL OF X-RAY SCIENCE AND TECHNOLOGY;Hassan O, 2018, ;Ahmim A, 2019, ;Eesa A, 2017, SCIENCE JOURNAL OF UNIVERSITY OF ZAKHO;Liu Y, 2013, ;Maulud D, 2020, JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS;Zebari R, 2020, JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS;Ling Q, 2023, APPLIED AND COMPUTATIONAL ENGINEERING;Brodley C, 1995, MACHINE LEARNING;Priyanka N, 2020, INTERNATIONAL JOURNAL OF INFORMATION AND DECISION SCIENCES;, 2017, INTERNATIONAL JOURNAL OF MODERN TRENDS IN ENGINEERING & RESEARCH;Hillel T, 2020, JOURNAL OF CHOICE MODELLING;Pahwa K, 2019, ;Li M, 2019, ENTROPY;Yang F, 2019, ;Omar A, 2019, ;Assegie T, 2019, INTERNATIONAL JOURNAL OF POWER ELECTRONICS AND DRIVE SYSTEMS/INTERNATIONAL JOURNAL OF ELECTRICAL AND COMPUTER ENGINEERING;Nandhini S, 2020, 2020 INTERNATIONAL CONFERENCE ON EMERGING TRENDS IN INFORMATION TECHNOLOGY AND ENGINEERING (IC-ETITE);Sulaiman M, 2020, INTERNATIONAL JOURNAL OF SUSTAINABLE CONSTRUCTION ENGINEERING AND TECHNOLOGY (UNIVERSITI TUN HUSSEIN ONN MALAYSIA);Pathan S, 2019, JOURNAL OF APPLIED BIOMEDICINE;Kumar D, 2020, INTERNATIONAL JOURNAL OF INFORMATION AND DECISION SCIENCES;Patil S, 2019, 2019 3RD INTERNATIONAL CONFERENCE ON TRENDS IN ELECTRONICS AND INFORMATICS (ICOEI);Sathiyanarayanan P, 2019, 2019 IEEE INTERNATIONAL CONFERENCE ON SYSTEM, COMPUTATION, AUTOMATION AND NETWORKING (ICSCAN);Dash S, 2020, SMART INNOVATION, SYSTEMS AND TECHNOLOGIES;Arowolo M, 2020, ;Mrva J, 2019, ;Jiao S, 2020, JOURNAL OF PHYSICS CONFERENCE SERIES;Taloba A, 2019, ;Ramadhan I, 2020, ;Felice F, 2019, JOURNAL OF CANCER RESEARCH AND CLINICAL ONCOLOGY;Issa A, 2011, ENGINEERING AND TECHNOLOGY JOURNAL;Chen X, 2019, 2019 3RD INTERNATIONAL CONFERENCE ON ELECTRONIC INFORMATION TECHNOLOGY AND COMPUTER ENGINEERING (EITCE);Vishwakarma A, 2024, ;M�ntaras R, 1991, MACHINE LEARNING;Batitis V, 2020, ;Goyal P, 2020, ",,,OPENALEX,"Charbuty B, 2021, JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS","Charbuty B, 2021, JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS" +https://openalex.org/W2569214105,10.1016/j.csbj.2016.12.005,Machine Learning and Data Mining Methods in Diabetes Research,2017,en,review,1416,COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL,Computational and Structural Biotechnology Journal,Ioannis Kavakiotis;O. Tsave;Athanasios Salifoglou;Nicos Maglaveras;Ioannis Vlahavas;Ioanna Chouvarda,Ioannis Kavakiotis;Olga Tsave;Athanasios Salifoglou;Nicos Maglaveras;Ioannis Vlahavas;Ioanna Chouvarda,"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece;Institute of Applied Biosciences, CERTH, Thessaloniki, Greece;Laboratory of Inorganic Chemistry, Department of Chemical Engineering, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece;Laboratory of Inorganic Chemistry, Department of Chemical Engineering, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece;Institute of Applied Biosciences, CERTH, Thessaloniki, Greece;Lab of Computing and Medical Informatics, Medical School, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece;Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece;Institute of Applied Biosciences, CERTH, Thessaloniki, Greece;Lab of Computing and Medical Informatics, Medical School, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece","Ioannis Kavakiotis (corresponding author), Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki 54124, Greece; Institute of Applied Biosciences, CERTH, Thessaloniki, Greece","The remarkable advances in biotechnology and health sciences have led to a significant production of data, such as high throughput genetic data and clinical information, generated from large Electronic Health Records (EHRs). To this end, application of machine learning and data mining methods in biosciences is presently, more than ever before, vital and indispensable in efforts to transform intelligently all available information into valuable knowledge. Diabetes mellitus (DM) is defined as a group of metabolic disorders exerting significant pressure on human health worldwide. Extensive research in all aspects of diabetes (diagnosis, etiopathophysiology, therapy, etc.) has led to the generation of huge amounts of data. The aim of the present study is to conduct a systematic review of the applications of machine learning, data mining techniques and tools in the field of diabetes research with respect to a) Prediction and Diagnosis, b) Diabetic Complications, c) Genetic Background and Environment, and e) Health Care and Management with the first category appearing to be the most popular. A wide range of machine learning algorithms were employed. In general, 85% of those used were characterized by supervised learning approaches and 15% by unsupervised ones, and more specifically, association rules. Support vector machines (SVM) arise as the most successful and widely used algorithm. Concerning the type of data, clinical datasets were mainly used. The title applications in the selected articles project the usefulness of extracting valuable knowledge leading to new hypotheses targeting deeper understanding and further investigation in DM.",15,,104,116,Machine learning;Support vector machine;Artificial intelligence;Computer science;Field (mathematics);Data science;Knowledge extraction;Data mining;Health care;Mathematics,GR,"Witten I, 2011, ELSEVIER EBOOKS;Agrawal R, 1994, VERY LARGE DATA BASES;Després J, 2006, NATURE;Robnik‐Šikonja M, 2003, MACHINE LEARNING;Krentz A, 2005, DRUGS;Laing S, 2003, DIABETOLOGIA;Tapp R, 2003, DIABETES CARE;Roychowdhury S, 2014, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Giancardo L, 2011, MEDICAL IMAGE ANALYSIS;Özçift A, 2011, COMPUTER METHODS AND PROGRAMS IN BIOMEDICINE;Georga E, 2012, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Wright A, 2014, JOURNAL OF BIOMEDICAL INFORMATICS;Chikh M, 2011, JOURNAL OF MEDICAL SYSTEMS;Choi S, 2014, COMPUTATIONAL AND MATHEMATICAL METHODS IN MEDICINE;Huang G, 2015, BMC BIOINFORMATICS;El–Sappagh S, 2015, ARTIFICIAL INTELLIGENCE IN MEDICINE;Mani S, 2012, PUBMED;Anderson A, 2015, JOURNAL OF BIOMEDICAL INFORMATICS;Herrero P, 2014, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Tapak L, 2013, HEALTHCARE INFORMATICS RESEARCH;Georga E, 2013, DIABETES TECHNOLOGY & THERAPEUTICS;Worachartcheewan A, 2015, THE SCIENTIFIC WORLD JOURNAL;Jin H, 2015, PREVENTING CHRONIC DISEASE;Simon G, 2014, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Cai L, 2015, PLOS ONE;Jelinek H, 2006, AUSTRALIAN JOURNAL OF PRIMARY HEALTH;Yusuf N, 2015, BMC BIOINFORMATICS;Török Z, 2015, JOURNAL OF DIABETES RESEARCH;Pinhas‐Hamiel O, 2013, INTERNATIONAL JOURNAL OF EATING DISORDERS;Ibrahim S, 2015, MEDICAL & BIOLOGICAL ENGINEERING & COMPUTING;Jonnagaddala J, 2015, BIOMED RESEARCH INTERNATIONAL;Lagani V, 2015, JOURNAL OF DIABETES AND ITS COMPLICATIONS;Nantasenamat C, 2015, DRUG DESIGN DEVELOPMENT AND THERAPY;Chen L, 2012, INTELLIGENT DATA ANALYSIS;Huang J, 2013, TALANTA;Shankaracharya N, 2012, THE REVIEW OF DIABETIC STUDIES;Halevas E, 2015, JOURNAL OF INORGANIC BIOCHEMISTRY;Simon G, 2013, PUBMED;Bradley P, 2013, BIG DATA;Gregori D, 2009, JOURNAL OF MEDICAL SYSTEMS;Tsave O, 2015, JOURNAL OF INORGANIC BIOCHEMISTRY;Nimmagadda S, 2014, INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH;Deja R, 2015, BIOMEDICAL ENGINEERING ONLINE;Schrom J, 2013, PUBMED;Lee J, 2013, COMPUTERS IN BIOLOGY AND MEDICINE;Pakhomov S, 2011, PUBMED;Quellec G, 2013, ;Karahoca A, 2012, EXPERT SYSTEMS WITH APPLICATIONS;Yarimizu M, 2015, ADVANCES IN BIOINFORMATICS;Kavakiotis I, 2013, ;Han J, 2012, CHOICE REVIEWS ONLINE;Kumar D, 1995, CHOICE REVIEWS ONLINE;Cover T, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Fan J, 2008, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Marx V, 2013, NATURE;Monte S, 2008, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Cade W, 2008, PHYSICAL THERAPY;Cryer P, 2003, PUBMED;Kaprio J, 1992, DIABETOLOGIA;Meng X, 2012, THE KAOHSIUNG JOURNAL OF MEDICAL SCIENCES;Malley J, 2011, METHODS OF INFORMATION IN MEDICINE;Sudharsan B, 2014, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Mattmann C, 2013, NATURE;Ganji M, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Nguyen C, 2013, DIABETES;Han L, 2014, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Lee B, 2015, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Çalişir D, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Zhang B, 2013, IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING;Robertson G, 2011, JOURNAL OF ELECTRICAL AND COMPUTER ENGINEERING;Beloufa F, 2013, COMPUTER METHODS AND PROGRAMS IN BIOMEDICINE;Aslam M, 2013, EXPERT SYSTEMS WITH APPLICATIONS;Habibi S, 2015, GLOBAL JOURNAL OF HEALTH SCIENCE;Cox M, 2009, CLINICAL DIABETES;Oh E, 2013, BMC MEDICAL INFORMATICS AND DECISION MAKING;Leung R, 2013, BMC NEPHROLOGY;Lagani V, 2015, JOURNAL OF DIABETES AND ITS COMPLICATIONS;Anjos S, 2004, MOLECULAR GENETICS AND METABOLISM;Georga E, 2015, MEDICAL & BIOLOGICAL ENGINEERING & COMPUTING;Ramezankhani A, 2014, MEDICAL DECISION MAKING;Lee B, 2013, IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS;Stranieri A, 2013, ARTIFICIAL INTELLIGENCE IN MEDICINE;Pires R, 2013, IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING;Török Z, 2013, BMC OPHTHALMOLOGY;Lopes M, 2014, GENOMICS;Lee W, 2012, ASIAN JOURNAL OF SURGERY;Wang K, 2015, JOURNAL OF BIOMEDICAL INFORMATICS;Ramezankhani A, 2015, INTERNATIONAL JOURNAL OF ENDOCRINOLOGY AND METABOLISM;Park S, 2011, BMC SYSTEMS BIOLOGY;Abawajy J, 2013, COMPUTERS IN BIOLOGY AND MEDICINE;Caveney E, 2011, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Li B, 2013, CURRENT DIABETES REPORTS;Marling C, 2013, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Lee J, 2011, OSONG PUBLIC HEALTH AND RESEARCH PERSPECTIVES;Jensen M, 2014, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Bujac S, 2014, DIABETES THERAPY;Namayanja J, 2012, JOURNAL OF MEDICAL SYSTEMS;Renard L, 2011, BMC MEDICAL INFORMATICS AND DECISION MAKING;Narasimhan K, 2014, CNS & NEUROLOGICAL DISORDERS - DRUG TARGETS;Ozery-Flato M, 2013, DIABETOLOGY & METABOLIC SYNDROME;Breiman L, 2001, MACHINE LEARNING;Agrawal R, 1993, ;Association A, 2010, DIABETES CARE;GuyonIsabelle, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Fayyad U, 1996, ;Zeevi D, 2015, CELL;Blundell J, 2017, PUBMED;Association A, 2008, DIABETES CARE;, 2019, SERIES IN MACHINE PERCEPTION AND ARTIFICIAL INTELLIGENCE;Cryer P, 2003, DIABETES CARE;Thomas N, 2016, JAYPEE BROTHERS MEDICAL PUBLISHERS (P) LTD. EBOOKS;Bocca J, 1994, ;Razavian N, 2015, BIG DATA;Bagherzadeh‐Khiabani F, 2015, JOURNAL OF CLINICAL EPIDEMIOLOGY;Agarwal V, 2016, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Batal I, 2012, ;Allalou A, 2016, DIABETES;Farran B, 2013, BMJ OPEN;Bashir S, 2015, JOURNAL OF BIOMEDICAL INFORMATICS;Rau H, 2015, COMPUTER METHODS AND PROGRAMS IN BIOMEDICINE;Anderson J, 2015, JOURNAL OF DIABETES SCIENCE AND TECHNOLOGY;Malik S, 2016, SPRINGERPLUS;Oh W, 2016, BIG DATA;Collins A, 1988, ELSEVIER EBOOKS;Dubrava S, 2016, PAIN MEDICINE;Ogunyemi O, 2015, PUBMED;Belciug S, 2014, JOURNAL OF BIOMEDICAL INFORMATICS;Jin J, 2015, JOURNAL OF DIABETES RESEARCH;Lee Y, 2012, OBESITY RESEARCH & CLINICAL PRACTICE;Tsave O, 2016, JOURNAL OF INORGANIC BIOCHEMISTRY;Jelinek H, 2016, COMPUTERS IN BIOLOGY AND MEDICINE;Ibrahim Z, 2013, WILEY EBOOKS;Sideris C, 2016, COMPUTERS IN BIOLOGY AND MEDICINE;Patra J, 2010, JOURNAL OF COMPUTATIONAL CHEMISTRY;Fong S, 2013, BIOMED RESEARCH INTERNATIONAL;Haifeng L, 2013, STUDIES IN HEALTH TECHNOLOGY AND INFORMATICS;Sacchi L, 2015, ;Prentašić P, 2014, ;Worachartcheewan A, 2013, PUBMED;Worachartcheewan A, 2013, PUBMED;Zhao L, 2016, GENETIC EPIDEMIOLOGY;Krishnamoorthy S, 2015, PLOS ONE;Hoyt R, 2016, PUBMED;Anderson A, 2015, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Kavakiotis I, 2017, COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL","Kavakiotis I, 2017, COMPUTATIONAL AND STRUCTURAL BIOTECHNOLOGY JOURNAL" +https://openalex.org/W4402843978,10.58248/pn633,Interpretable machine learning,2020,en,report,987,,,Parliamentary Office of Science and Technology;Lorna Christie,Parliamentary Office of Science and Technology;Lorna Christie,,"Parliamentary Office of Science and Technology (corresponding author), ","Machine learning (ML, a type of artificial intelligence) is increasingly being used to support decision making in a variety of applications including recruitment and clinical diagnoses. While ML has many advantages, there are concerns that in some cases it may not be possible to explain completely how its outputs have been produced. This POSTnote gives an overview of ML and its role in decision-making. It examines the challenges of understanding how a complex ML system has reached its output, and some of the technical approaches to making ML easier to interpret. It also gives a brief overview of some of the proposed tools for making ML systems more accountable.",,,,,Artificial intelligence;Computer science;Machine learning,,,,,OPENALEX,"Technology P, 2020, ","Technology P, 2020, " +https://openalex.org/W1569098853,10.7551/mitpress/8996.001.0001,Optimization for Machine Learning,2011,en,book,897,THE MIT PRESS EBOOKS,The MIT Press eBooks,Suvrit Sra,"Sra, Suvrit 1976-","Dept. Empirical Inference, Max Planck Institute for Intelligent Systems, Max Planck Society","Sra, Suvrit 1976- (corresponding author), Dept. Empirical Inference, Max Planck Institute for Intelligent Systems, Max Planck Society","An up-to-date account of the interplay between optimization and machine learning, accessible to students and researchers in both communities. The interplay between optimization and machine learning is one of the most important developments in modern computational science. Optimization formulations and methods are proving to be vital in designing algorithms to extract essential knowledge from huge volumes of data. Machine learning, however, is not simply a consumer of optimization technology but a rapidly evolving field that is itself generating new optimization ideas. This book captures the state of the art of the interaction between optimization and machine learning in a way that is accessible to researchers in both fields. Optimization approaches have enjoyed prominence in machine learning because of their wide applicability and attractive theoretical properties. The increasing complexity, size, and variety of today's machine learning models call for the reassessment of existing assumptions. This book starts the process of reassessment. It describes the resurgence in novel contexts of established frameworks such as first-order methods, stochastic approximations, convex relaxations, interior-point methods, and proximal methods. It also devotes attention to newer themes such as regularized optimization, robust optimization, gradient and subgradient methods, splitting techniques, and second-order methods. Many of these techniques draw inspiration from other fields, including operations research, theoretical computer science, and subfields of optimization. The book will enrich the ongoing cross-fertilization between the machine learning community and these other fields, and within the broader optimization community.",,,,,Computer science;Artificial intelligence;Machine learning;Field (mathematics);Stochastic optimization;Optimization problem;Computational learning theory;Online machine learning;Variety (cybernetics);Process (computing);Subgradient method;Mathematical optimization;Active learning (machine learning);Algorithm;Mathematics,DE,"Lee D, 1999, NATURE;Horn R, 1991, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Shawe‐Taylor J, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Neal R, 1996, LECTURE NOTES IN STATISTICS;Joachims T, 2006, TECHNICAL REPORTS;, 1982, ELSEVIER EBOOKS;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Cesa‐Bianchi N, 2006, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Kocsis L, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Wright S, 1997, SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS EBOOKS;Kuybeda O, 2012, JOURNAL OF STRUCTURAL BIOLOGY;Combettes P, 2011, SPRINGER OPTIMIZATION AND ITS APPLICATIONS;Benveniste A, 1990, ;Shapiro A, 2009, ;, 2006, FOUNDATIONS OF ARTIFICIAL INTELLIGENCE;, 1984, MATHEMATICS AND COMPUTERS IN SIMULATION;Moreau J, 1965, BULLETIN DE LA SOCIÉTÉ MATHÉMATIQUE DE FRANCE;Anthony M, 1999, LONDON SCHOOL OF ECONOMICS AND POLITICAL SCIENCE RESEARCH ONLINE (LONDON SCHOOL OF ECONOMICS AND POLITICAL SCIENCE);Vapnik V, 1963, AUTOMATION AND REMOTE CONTROL;Borwein J, 2000, UNIVERSITY OF MARIBOR DIGITAL LIBRARY (UNIVERSITY OF MARIBOR);Klee V, 1970, ;Kim S, 2018, FIGSHARE;Bubeck S, 2009, LECTURE NOTES IN COMPUTER SCIENCE;Schmidt M, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Jenatton R, 2010, FOOD CHEMISTRY;Sören S, 2010, MAX PLANCK INSTITUTE FOR PLASMA PHYSICS;Yuan G, 2010, ;Abernethy J, 2008, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Maros I, 2003, INTERNATIONAL SERIES IN MANAGEMENT SCIENCE/OPERATIONS RESEARCH/INTERNATIONAL SERIES IN OPERATIONS RESEARCH & MANAGEMENT SCIENCE;Rosasco L, 2009, ;Kivinen J, 2001, MACHINE LEARNING;Bertsimas D, 2011, SIAM REVIEW;Domingo C, 2002, DATA MINING AND KNOWLEDGE DISCOVERY;Komodakis N, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Bellavia S, 1998, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Bartlett P, 2008, QUT EPRINTS (QUEENSLAND UNIVERSITY OF TECHNOLOGY);Franc V, 2009, ;Gondzio J, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Hazan E, 2009, ;Tomioka R, 2010, ;Varadarajan J, 2010, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Caramanis C, 2008, ;Jenatton R, 2009, ARXIV.ORG;Sontag D, 2012, ARXIV (CORNELL UNIVERSITY);Maurer A, 2009, ARXIV.ORG;Bach F, 2008, ARXIV.ORG;Mairal J, 2010, ARXIV (CORNELL UNIVERSITY);Bubeck S, 2010, THESES.FR (ABES);Audibert J, 2010, ARXIV (CORNELL UNIVERSITY);Saha A, 2009, ARXIV.ORG;Chen S, 1998, SIAM JOURNAL ON SCIENTIFIC COMPUTING;Hearst M, 1998, IEEE INTELLIGENT SYSTEMS AND THEIR APPLICATIONS;Johnson S, 1967, PSYCHOMETRIKA;Rosenblatt M, 1956, THE ANNALS OF MATHEMATICAL STATISTICS;Rockafellar R, 1976, SIAM JOURNAL ON CONTROL AND OPTIMIZATION;Combettes P, 2005, MULTISCALE MODELING AND SIMULATION;Lai T, 1985, ADVANCES IN APPLIED MATHEMATICS;Robbins H, 1952, BULLETIN OF THE AMERICAN MATHEMATICAL SOCIETY;Grötschel M, 1981, COMBINATORICA;Lions P, 1979, SIAM JOURNAL ON NUMERICAL ANALYSIS;Ben‐Tal A, 2000, MATHEMATICAL PROGRAMMING;Douglas J, 1956, TRANSACTIONS OF THE AMERICAN MATHEMATICAL SOCIETY;Ng A, 2004, ;Bach F, 2004, ;Yin W, 2008, SIAM JOURNAL ON IMAGING SCIENCES;Oden J, 1992, MATHEMATICS OF COMPUTATION;Fazel M, 2001, ;Vanderbei R, 1998, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;Tseng P, 2007, MATHEMATICAL PROGRAMMING;Nilim A, 2005, OPERATIONS RESEARCH;Zhao P, 2009, THE ANNALS OF STATISTICS;Flaxman A, 2005, SYMPOSIUM ON DISCRETE ALGORITHMS;Renegar J, 1988, MATHEMATICAL PROGRAMMING;Osborne M, 2000, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Figueiredo M, 2007, IEEE TRANSACTIONS ON IMAGE PROCESSING;Koh K, 2007, ;Tseng P, 1991, SIAM JOURNAL ON CONTROL AND OPTIMIZATION;Gill P, 1986, MATHEMATICAL PROGRAMMING;Lemaréchal C, 1995, MATHEMATICAL PROGRAMMING;Alon N, 1997, JOURNAL OF THE ACM;Weaver J, 1991, MAGNETIC RESONANCE IN MEDICINE;Gaussier É, 2005, ;Setzer S, 2010, INTERNATIONAL JOURNAL OF COMPUTER VISION;Brucker P, 1984, OPERATIONS RESEARCH LETTERS;Forrest J, 1992, MATHEMATICAL PROGRAMMING;Awerbuch B, 2007, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Teo C, 2007, ;Röth V, 2008, ;Tomioka R, 2009, NEUROIMAGE;Cheney E, 1959, NUMERISCHE MATHEMATIK;Kiwiel K, 1983, MATHEMATICAL PROGRAMMING;Han S, 1988, SIAM JOURNAL ON CONTROL AND OPTIMIZATION;Tomioka R, 2007, ;Trafali̇s T, 2006, OPTIMIZATION METHODS & SOFTWARE;Schmidt M, 2010, ;Dani V, 2006, ;Bhattacharyya C, 2004, JOURNAL OF COMPUTATIONAL BIOLOGY;Maculan N, 1989, OPERATIONS RESEARCH LETTERS;Kumar M, 2008, ;Goldfarb D, 2004, MATHEMATICAL PROGRAMMING;Rudin L, 1992, PHYSICA D NONLINEAR PHENOMENA;Beck A, 2009, SIAM JOURNAL ON IMAGING SCIENCES;Boser B, 1992, ;Efron B, 2004, THE ANNALS OF STATISTICS;Lustig M, 2007, MAGNETIC RESONANCE IN MEDICINE;Hoeffding W, 1994, SPRINGER SERIES IN STATISTICS;Cai J, 2010, SIAM JOURNAL ON OPTIMIZATION;Donoho D, 1995, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Eckstein J, 1992, MATHEMATICAL PROGRAMMING;Gabay D, 1976, COMPUTERS & MATHEMATICS WITH APPLICATIONS;Hestenes M, 1969, JOURNAL OF OPTIMIZATION THEORY AND APPLICATIONS;Auer P, 2002, SIAM JOURNAL ON COMPUTING;Tsochantaridis I, 2005, MPG.PURE (MAX PLANCK SOCIETY);Dembo R, 1982, SIAM JOURNAL ON NUMERICAL ANALYSIS;Kelley J, 1960, JOURNAL OF THE SOCIETY FOR INDUSTRIAL AND APPLIED MATHEMATICS;Chvátal V, 2009, ;Figueiredo M, 2003, IEEE TRANSACTIONS ON IMAGE PROCESSING;Evgeniou T, 2000, ADVANCES IN COMPUTATIONAL MATHEMATICS;Bartlett P, 1998, IEEE TRANSACTIONS ON INFORMATION THEORY;Ghaoui L, 1997, SIAM JOURNAL ON MATRIX ANALYSIS AND APPLICATIONS;Fu W, 1998, JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS;Thompson J, 1987, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Bach F, 2011, NOW PUBLISHERS, INC. EBOOKS;Bottou L, 2003, ;Yuan M, 2007, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Poggio T, 2004, NATURE CELL BIOLOGY;Zien A, 2007, ;Gertz E, 2003, ACM TRANSACTIONS ON MATHEMATICAL SOFTWARE;Steinwart I, 2005, IEEE TRANSACTIONS ON INFORMATION THEORY;Kloft M, 2009, ;Ferris M, 2002, SIAM JOURNAL ON OPTIMIZATION;Mnih V, 2008, ;Franc V, 2008, ;Bubeck S, 2008, REPEC: RESEARCH PAPERS IN ECONOMICS;Wang C, 2009, ;Argyriou A, 2006, ;Gondzio J, 2010, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Werner T, 2008, ;Hall J, 2005, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Le Q, 2007, NEURAL INFORMATION PROCESSING SYSTEMS;Grove A, 2001, MACHINE LEARNING;Marsten R, 1990, INFORMS JOURNAL ON APPLIED ANALYTICS;Colombo M, 2007, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Varadarajan J, 2010, ;Kort B, 1976, SIAM JOURNAL ON CONTROL AND OPTIMIZATION;Sprechmann P, 2010, ;Ibaraki S, 1992, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Bach F, 2007, ARXIV.ORG;Pierre-Arnuad C, 2014, ARXIV (CORNELL UNIVERSITY);Zhang X, 2012, ARXIV (CORNELL UNIVERSITY);Tibshirani R, 1996, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Cortes C, 1995, MACHINE LEARNING;Zou H, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Parzen E, 1962, THE ANNALS OF MATHEMATICAL STATISTICS;Yuan M, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Candès E, 2006, IEEE TRANSACTIONS ON INFORMATION THEORY;Dantzig G, 1963, RAND CORPORATION EBOOKS;Daubechies I, 2004, COMMUNICATIONS ON PURE AND APPLIED MATHEMATICS;Bertsimas D, 2004, OPERATIONS RESEARCH;Goldstein T, 2009, SIAM JOURNAL ON IMAGING SCIENCES;Tropp J, 2004, IEEE TRANSACTIONS ON INFORMATION THEORY;Recht B, 2010, SIAM REVIEW;Wainwright M, 2007, NOW PUBLISHERS, INC. EBOOKS;Tibshirani R, 2004, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Hoyer P, 2004, ;Bickel P, 2009, THE ANNALS OF STATISTICS;Wright S, 2009, IEEE TRANSACTIONS ON SIGNAL PROCESSING;Daubechies I, 2009, COMMUNICATIONS ON PURE AND APPLIED MATHEMATICS;Wainwright M, 2009, IEEE TRANSACTIONS ON INFORMATION THEORY;Rockafellar R, 1976, MATHEMATICS OF OPERATIONS RESEARCH;Bottou L, 2011, THE MIT PRESS EBOOKS;Srebro N, 2004, ;Hazan E, 2007, MACHINE LEARNING;Dullerud G, 2000, ;Fine S, 2002, ;Combettes P, 2007, IEEE JOURNAL OF SELECTED TOPICS IN SIGNAL PROCESSING;Varma M, 2009, ;Werner T, 2007, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Mannor S, 2004, ;Globerson A, 2006, ;Komodakis N, 2007, ;Dagum P, 2000, SIAM JOURNAL ON COMPUTING;Teo C, 2010, ;Dani V, 2007, SCHOLARLYCOMMONS (UNIVERSITY OF PENNSYLVANIA);Sun Z, 2010, NEURAL INFORMATION PROCESSING SYSTEMS;Xu Z, 2008, ;Kim S, 2005, ;Roux N, 2012, ;Cui R, 1999, NEUROIMAGE;Tomioka R, 2009, IEEE SIGNAL PROCESSING LETTERS;Gao J, 2007, ;Besnerais G, 1999, IEEE TRANSACTIONS ON INFORMATION THEORY;Jagarlapudi S, 2009, NEURAL INFORMATION PROCESSING SYSTEMS;Szafranski M, 2008, ;Hazan E, 2009, ;Woodsend K, 2009, COMPUTATIONAL OPTIMIZATION AND APPLICATIONS;Ortiz L, 2000, ;Friedman J, 2010, ARXIV (CORNELL UNIVERSITY);Xu H, 2010, ARXIV (CORNELL UNIVERSITY);Boyd S, 2004, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Donoho D, 2004, ;Candès E, 2006, IEEE TRANSACTIONS ON INFORMATION THEORY;Donoho D, 1995, IEEE TRANSACTIONS ON INFORMATION THEORY;Bell J, 1978, MATHEMATICS OF COMPUTATION;Auer P, 2002, MACHINE LEARNING;Karmarkar N, 1984, COMBINATORICA;Ramsay P, 1993, TECHNOMETRICS;Nesterov Y, 2004, MATHEMATICAL PROGRAMMING;Bartlett P, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Zhao P, 2006, ;Argyriou A, 2007, THE MIT PRESS EBOOKS;Sonnenburg S, 2006, MPG.PURE (MAX PLANCK SOCIETY);Deza M, 1997, ALGORITHMS AND COMBINATORICS;Nesterov Y, 2007, REPEC: RESEARCH PAPERS IN ECONOMICS;Krishnapuram B, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Genkin A, 2007, TECHNOMETRICS;d’Aspremont A, 2007, SIAM REVIEW;Lanckriet G, 2004, BIOINFORMATICS;Blankertz B, 2004, IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING;Ghaoui L, 1997, ;Kalai A, 2004, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Audibert J, 2009, THEORETICAL COMPUTER SCIENCE;Obozinski G, 2009, STATISTICS AND COMPUTING;Shevade S, 2003, BIOINFORMATICS;Blankertz B, 2002, THE MIT PRESS EBOOKS;Even-Dar E, 2006, JOURNAL OF MACHINE LEARNING RESEARCH;Wipf D, 2007, ;Negahban S, 2012, STATISTICAL SCIENCE;Rakotomamonjy A, 2007, ;Maron O, 2018, RESEARCH SHOWCASE @ CARNEGIE MELLON UNIVERSITY (CARNEGIE MELLON UNIVERSITY);Mukherjee S, 2006, ADVANCES IN COMPUTATIONAL MATHEMATICS;Shivaswamy P, 2006, NOT FOUND REPOSITORY (INDIAN INSTITUTE OF SCIENCE BANGALORE);Sontag D, 2007, ;Subrahmanya N, 2009, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Koster A, 1998, OPERATIONS RESEARCH LETTERS;Xu Z, 2009, ;Werner T, 2009, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Woodsend K, 2009, ;, 2011, SIAM EBOOKS;Hazan E, 2008, ;Szafranski M, 2007, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Goldfarb D, 2008, IMA JOURNAL OF NUMERICAL ANALYSIS;Xu H, 2008, ARXIV (CORNELL UNIVERSITY);Bach F, 2007, ARXIV.ORG;Johnson J, 2007, ARXIV.ORG;Sontag D, 2007, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Braun W, 2021, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Bertsekas D, 1997, JOURNAL OF THE OPERATIONAL RESEARCH SOCIETY;Zhou K, 1997, ;Нестеров Ю, 2014, MEDICAL ENTOMOLOGY AND ZOOLOGY;Infanger G, 2010, INTERNATIONAL SERIES IN MANAGEMENT SCIENCE/OPERATIONS RESEARCH/INTERNATIONAL SERIES IN OPERATIONS RESEARCH & MANAGEMENT SCIENCE;Wu T, 2008, ;Bartlett P, 2002, ;Moreau J, 1962, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Hoerl A, 1962, ;Bach F, 2011, THE MIT PRESS EBOOKS;Lounici K, 2009, ;Khemani D, 2024, ;Lemarã‰chal C, 1995, REPEC: RESEARCH PAPERS IN ECONOMICS;Huang J, 2009, ARXIV.ORG;Xu H, 2010, ARXIV (CORNELL UNIVERSITY);Tomioka R, 2009, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Sra S, 2011, THE MIT PRESS EBOOKS","Sra S, 2011, THE MIT PRESS EBOOKS" +https://openalex.org/W2138178898,10.1147/rd.441.0206,Some studies in machine learning using the game of checkers,2000,en,article,1443,IBM JOURNAL OF RESEARCH AND DEVELOPMENT,IBM Journal of Research and Development,Arthur L. Samuel,A. L. Samuel,,"A. L. Samuel (corresponding author), ","Two machine-learning procedures have been investigated in some detail using the game of checkers. Enough work has been done to verify the fact that a computer can be programmed so that it will learn to play a better game of checkers than can be played by the person who wrote the program. Furthermore, it can learn to do this in a remarkably short period of time (8 or 10 hours of machine-playing time) when given only the rules of the game, a sense of direction, and a redundant and incomplete list of parameters which are thought to have something to do with the game, but whose correct signs and relative weights are unknown and unspecified. The principles of machine learning verified by these experiments are, of course, applicable to many other situations.",44,1.2,206,226,Computer science;Artificial intelligence;Machine learning,,"Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Shannon, 1950, THE PHILOSOPHICAL MAGAZINE A JOURNAL OF THEORETICAL EXPERIMENTAL AND APPLIED PHYSICS;Newell A, 1958, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Rochester N, 1956, IEEE TRANSACTIONS ON INFORMATION THEORY;Strachey C, 1952, ;Bernstein A, 1958, SCIENTIFIC AMERICAN;Kister J, 1957, JOURNAL OF THE ACM;McCulloch W, 1949, ",,,OPENALEX,"Samuel A, 2000, IBM JOURNAL OF RESEARCH AND DEVELOPMENT","Samuel A, 2000, IBM JOURNAL OF RESEARCH AND DEVELOPMENT" +https://openalex.org/W4206070770,10.1108/k.2010.06739hae.001,Introduction to Machine Learning,2010,en,article,1198,KYBERNETES,Kybernetes,Bernhard Schölkopf,Bernhard Schölkopf,,"Bernhard Schölkopf (corresponding author), ",,39,8,,,Cybernetics;Computer science;Artificial intelligence,,,,,OPENALEX,"Schölkopf B, 2010, KYBERNETES","Schölkopf B, 2010, KYBERNETES" +https://openalex.org/W2464234006,10.1080/21693277.2016.1192517,"Machine learning in manufacturing: advantages, challenges, and applications",2016,en,article,1255,PRODUCTION & MANUFACTURING RESEARCH,Production & Manufacturing Research,Thorsten Wuest;D. R. Weimer;Christopher Irgens;Klaus‐Dieter Thoben,Thorsten Wuest;Daniel Weimer;Christopher Irgens;Klaus-Dieter Thoben,"Industrial and Management Systems Engineering, West Virginia University, Morgantown, WV 26506, USA;ICT Applications for Production, BIBA – Bremer Institut fuer Produktion und Logistik, 28359 Bremen, Germany;Design, Manufacture & Engineering Management, University of Strathclyde, Glasgow G1 1XJ, UK;Department of Integrated Product Development, University of Bremen, 28359 Bremen, Germany","Thorsten Wuest (corresponding author), Industrial and Management Systems Engineering, West Virginia University, Morgantown, WV 26506, USA","The nature of manufacturing systems faces ever more complex, dynamic and at times even chaotic behaviors. In order to being able to satisfy the demand for high-quality products in an efficient manner, it is essential to utilize all means available. One area, which saw fast pace developments in terms of not only promising results but also usability, is machine learning. Promising an answer to many of the old and new challenges of manufacturing, machine learning is widely discussed by researchers and practitioners alike. However, the field is very broad and even confusing which presents a challenge and a barrier hindering wide application. Here, this paper contributes in presenting an overview of available machine learning techniques and structuring this rather complicated area. A special focus is laid on the potential benefit, and examples of successful applications in a manufacturing environment.",4,1,23,45,Computer science;Pace;Structuring;Field (mathematics);Quality (philosophy);Usability;Artificial intelligence;Risk analysis (engineering);Human–computer interaction,US;GB;DE,"Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Jain A, 1999, ACM COMPUTING SURVEYS;D M, 2009, CHOICE REVIEWS ONLINE;Johnson J, 2000, NEUROCOMPUTING;Dietterich T, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Kotsiantis S, 2007, ;Sammut C, 2010, ;Yu L, 2003, ;Zhou Z, 2012, ;, 2005, CHOICE REVIEWS ONLINE;Susto G, 2014, IEEE TRANSACTIONS ON INDUSTRIAL INFORMATICS;Ben‐Hur A, 2009, METHODS IN MOLECULAR BIOLOGY;Burbidge R, 2001, COMPUTERS & CHEMISTRY;Kabacoff R, 2011, OPEN MIND;Tay F, 2002, NEUROCOMPUTING;Li H, 2008, CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS;Wuest T, 2013, JOURNAL OF INTELLIGENT MANUFACTURING;Wiendahl H, 1994, CIRP ANNALS;Salahshoor K, 2010, ENERGY;Monostori L, 1996, CIRP ANNALS;Pham D, 2005, PROCEEDINGS OF THE INSTITUTION OF MECHANICAL ENGINEERS PART B JOURNAL OF ENGINEERING MANUFACTURE;Azadeh A, 2012, APPLIED SOFT COMPUTING;Kim D, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Evgeniou T, 2000, INTERNATIONAL JOURNAL OF COMPUTER VISION;Elangovan M, 2015, PROCEDIA COMPUTER SCIENCE;Simon H, 1983, MACHINE LEARNING;Monostori L, 1993, CIRP ANNALS;Manallack D, 1999, EUROPEAN JOURNAL OF MEDICINAL CHEMISTRY;Sun J, 2004, INTERNATIONAL JOURNAL OF MACHINE TOOLS AND MANUFACTURE;Li T, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Yang K, 2004, CHOICE REVIEWS ONLINE;Corne D, 2012, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Kang P, 2008, PATTERN RECOGNITION;Do T, 2010, STUDIES IN COMPUTATIONAL INTELLIGENCE;Cherkassky V, 2009, NEURAL NETWORKS;Wuest T, 2014, PROCEDIA CIRP;Lee J, 2008, INFORMATION SCIENCES;Gagliardi F, 2011, ARTIFICIAL INTELLIGENCE IN MEDICINE;Wang K, 2005, PRODUCTION PLANNING & CONTROL;Thomas A, 2012, JOURNAL OF MANUFACTURING TECHNOLOGY MANAGEMENT;Shiang L, 2011, ASIA PACIFIC BUSINESS REVIEW;Akay D, 2011, SAFETY SCIENCE;Doltsinis S, 2012, ;Steel D, 2011, ELSEVIER EBOOKS;Cook D, 2005, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Bar-Or A, 2005, ;Monostori L, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Scheidat T, 2009, ;Hickey R, 2001, KNOWLEDGE-BASED SYSTEMS;Hochreiter S, 1997, NEURAL COMPUTATION;Krizhevsky A, 2017, COMMUNICATIONS OF THE ACM;Cortes C, 1995, MACHINE LEARNING;S S, 2005, IEEE TRANSACTIONS ON NEURAL NETWORKS;LeCun Y, 1989, NEURAL COMPUTATION;Kaelbling L, 1996, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Opitz D, 1999, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;, 2007, CHOICE REVIEWS ONLINE;Furey T, 2000, BIOINFORMATICS;Keerthi S, 2003, NEURAL COMPUTATION;Widodo A, 2007, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Carpenter G, 1988, COMPUTER;Lee J, 2013, MANUFACTURING LETTERS;Huang Z, 2003, DECISION SUPPORT SYSTEMS;Wiering M, 2012, ADAPTATION, LEARNING, AND OPTIMIZATION;Oladipupo T, 2010, INTECH EBOOKS;Brunato M, 2005, COMPUTER NETWORKS;El-Naqa I, 2002, IEEE TRANSACTIONS ON MEDICAL IMAGING;Harding J, 2005, JOURNAL OF MANUFACTURING SCIENCE AND ENGINEERING;Köksal G, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Siklóssy L, 1993, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Günther J, 2014, PROCEDIA TECHNOLOGY;Borin A, 2006, ANALYTICA CHIMICA ACTA;Çaydaş U, 2010, JOURNAL OF INTELLIGENT MANUFACTURING;Hwa R, 2003, ;Monostori L, 2003, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Jurković Z, 2016, JOURNAL OF INTELLIGENT MANUFACTURING;Kang P, 2016, EXPERT SYSTEMS WITH APPLICATIONS;Loyer J, 2016, INTERNATIONAL JOURNAL OF PRODUCTION ECONOMICS;Ribeiro B, 2005, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Chinnam R, 2002, INTERNATIONAL JOURNAL OF PRODUCTION RESEARCH;Evgeniou T, 2002, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Wu Q, 2009, JOURNAL OF COMPUTATIONAL AND APPLIED MATHEMATICS;Lu S, 1990, COMPUTERS IN INDUSTRY;Kwak D, 2011, EXPERT SYSTEMS WITH APPLICATIONS;Rastogi R, 2007, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Hansson K, 2016, DALARNA UNIVERSITY COLLEGE ELECTRONIC ARCHIVE;Wuest T, 2015, SPRINGER THESES;Dutt V, 2012, COMPUTERS IN HUMAN BEHAVIOR;Gordon J, 2001, INTERNATIONAL JOURNAL OF OPERATIONS & PRODUCTION MANAGEMENT;Apte C, 2002, ;Guo X, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Filipič B, 2000, COMPUTERS IN INDUSTRY;Zheng Y, 2010, EXPERT SYSTEMS WITH APPLICATIONS;Wang X, 2012, COMPUTATIONAL AND MATHEMATICAL METHODS IN MEDICINE;Okamoto S, 2003, THEORETICAL COMPUTER SCIENCE;Koltchinskii V, 2001, APPLIED MATHEMATICS AND COMPUTATION;González C, 2012, JOURNAL OF COMPUTATIONAL SCIENCE;Margolis D, 2011, PROCEDIA COMPUTER SCIENCE;Breiman L, 2001, MACHINE LEARNING;LeCun Y, 2015, NATURE;Samuel A, 1959, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Zhou Z, 2012, ;Graham J, 2012, ;Martens D, 2007, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Davis J, 2015, ANNUAL REVIEW OF CHEMICAL AND BIOMOLECULAR ENGINEERING;, 2019, INTERNATIONAL JOURNAL ON COMPUTER SCIENCE AND ENGINEERING",,,OPENALEX,"Wuest T, 2016, PRODUCTION & MANUFACTURING RESEARCH","Wuest T, 2016, PRODUCTION & MANUFACTURING RESEARCH" +https://openalex.org/W3079760979,10.1016/j.compag.2020.105709,Crop yield prediction using machine learning: A systematic literature review,2020,en,article,1695,COMPUTERS AND ELECTRONICS IN AGRICULTURE,Computers and Electronics in Agriculture,Thomas van Klompenburg;Ayalew Kassahun;Cagatay Catal,Thomas van Klompenburg;Ayalew Kassahun;Cagatay Catal,"Information Technology Group, Wageningen University & Research, Wageningen, the Netherlands;Information Technology Group, Wageningen University & Research, Wageningen, the Netherlands;Department of Computer Engineering, Bahcesehir University, Istanbul, Turkey","Cagatay Catal (corresponding author), Department of Computer Engineering, Bahcesehir University, Istanbul, Turkey","Machine learning is an important decision support tool for crop yield prediction, including supporting decisions on what crops to grow and what to do during the growing season of the crops. Several machine learning algorithms have been applied to support crop yield prediction research. In this study, we performed a Systematic Literature Review (SLR) to extract and synthesize the algorithms and features that have been used in crop yield prediction studies. Based on our search criteria, we retrieved 567 relevant studies from six electronic databases, of which we have selected 50 studies for further analysis using inclusion and exclusion criteria. We investigated these selected studies carefully, analyzed the methods and features used, and provided suggestions for further research. According to our analysis, the most used features are temperature, rainfall, and soil type, and the most applied algorithm is Artificial Neural Networks in these models. After this observation based on the analysis of machine learning-based 50 papers, we performed an additional search in electronic databases to identify deep learning-based studies, reached 30 deep learning-based papers, and extracted the applied deep learning algorithms. According to this additional analysis, Convolutional Neural Networks (CNN) is the most widely used deep learning algorithm in these studies, and the other widely used deep learning algorithms are Long-Short Term Memory (LSTM) and Deep Neural Networks (DNN).",177,,105709,105709,Artificial intelligence;Machine learning;Deep learning;Computer science;Convolutional neural network;Artificial neural network;Support vector machine;Yield (engineering),NL;TR,"Mnih V, 2015, NATURE;Witten I, 2011, ELSEVIER EBOOKS;Vincent P, 2008, ;Baldi P, 2011, ;Jeong J, 2016, PLOS ONE;Pantazi X, 2015, COMPUTERS AND ELECTRONICS IN AGRICULTURE;You J, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Bargoti S, 2017, JOURNAL OF FIELD ROBOTICS;Everingham Y, 2016, AGRONOMY FOR SUSTAINABLE DEVELOPMENT;Šmite D, 2009, EMPIRICAL SOFTWARE ENGINEERING;González-Sánchez A, 2014, SPANISH JOURNAL OF AGRICULTURAL RESEARCH;Goldstein A, 2017, PRECISION AGRICULTURE;Gandhi N, 2016, ;Li B, 2018, PLANTS;McQueen R, 1995, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Ali I, 2016, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;Paul M, 2015, ;Bose P, 2016, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Cheng H, 2017, JOURNAL OF IMAGING;Su Y, 2017, SAUDI JOURNAL OF BIOLOGICAL SCIENCES;Fernandes J, 2017, INTERNATIONAL JOURNAL OF REMOTE SENSING;Ahamed A, 2015, ;Matsumura K, 2014, THE JOURNAL OF AGRICULTURAL SCIENCE;Barh D, 2015, ;Sujatha R, 2016, ;Shekoofa A, 2014, PLOS ONE;Romero J, 2013, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Ruß G, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Everingham Y, 2008, AGRICULTURAL AND FOREST METEOROLOGY;Gandhi N, 2016, ;Marizel B, 2018, INTERNATIONAL JOURNAL OF ADVANCED COMPUTER SCIENCE AND APPLICATIONS;Rahnemoonfar M, 2017, PROCEEDINGS OF SPIE, THE INTERNATIONAL SOCIETY FOR OPTICAL ENGINEERING/PROCEEDINGS OF SPIE;Ananthara M, 2013, ;Mola‐Yudego B, 2015, GCB BIOENERGY;Çakır Y, 2014, ;Osama K, 2015, ;Rozman Č, 2012, ERWERBS-OBSTBAU;Gandhi N, 2016, AUSTRALASIAN JOURNAL OF PARAMEDICINE;Rahman M, 2014, ;Baral S, 2011, COMMUNICATIONS IN COMPUTER AND INFORMATION SCIENCE;Kunapuli S, 2015, ;Pantazi X, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Ruß G, 2010, LECTURE NOTES IN COMPUTER SCIENCE;Carlosena A, 2006, 2006 49TH IEEE INTERNATIONAL MIDWEST SYMPOSIUM ON CIRCUITS AND SYSTEMS;Osman T, 2016, ADVANCES IN INTELLIGENT SYSTEMS AND COMPUTING;Johnson M, 2013, CIRCLE (UNIVERSITY OF BRITISH COLUMBIA);Ren S, 2015, ARXIV (CORNELL UNIVERSITY);, 2007, ;Ruder S, 2017, ARXIV (CORNELL UNIVERSITY);Λιάκος Κ, 2018, SENSORS;Chlingaryan A, 2018, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Maimaitijiang M, 2019, REMOTE SENSING OF ENVIRONMENT;Khaki S, 2019, FRONTIERS IN PLANT SCIENCE;Khaki S, 2020, FRONTIERS IN PLANT SCIENCE;Nevavuori P, 2019, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Crane‐Droesch A, 2018, ENVIRONMENTAL RESEARCH LETTERS;Elavarasan D, 2020, IEEE ACCESS;Schwalbert R, 2020, AGRICULTURAL AND FOREST METEOROLOGY;Yang Q, 2019, FIELD CROPS RESEARCH;Sun J, 2019, SENSORS;Elavarasan D, 2018, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Khanal S, 2018, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Filippi P, 2019, PRECISION AGRICULTURE;Wang X, 2020, REMOTE SENSING;Wang A, 2018, ;Kang Y, 2020, ENVIRONMENTAL RESEARCH LETTERS;Jiang H, 2019, GLOBAL CHANGE BIOLOGY;Saeed K, 2019, IOWA STATE UNIVERSITY DIGITAL REPOSITORY (IOWA STATE UNIVERSITY);Chen Y, 2019, REMOTE SENSING;Kouadio L, 2018, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Wolanin A, 2020, ENVIRONMENTAL RESEARCH LETTERS;Ghazvinei P, 2018, ENGINEERING APPLICATIONS OF COMPUTATIONAL FLUID MECHANICS;Wang Y, 2020, REMOTE SENSING;Chu Z, 2020, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Zhang L, 2019, REMOTE SENSING;Xu X, 2019, ECOLOGICAL INDICATORS;Bhojani S, 2020, NEURAL COMPUTING AND APPLICATIONS;Ahmad I, 2018, JOURNAL OF THE INDIAN SOCIETY OF REMOTE SENSING;Tedesco D, 2020, COMPUTERS AND ELECTRONICS IN AGRICULTURE;Shah A, 2018, LECTURE NOTES ON DATA ENGINEERING AND COMMUNICATIONS TECHNOLOGIES;Ranjan A, 2019, SPATIAL INFORMATION RESEARCH;Shidnal S, 2019, INTERNATIONAL JOURNAL OF INFORMATION TECHNOLOGY;Terliksiz A, 2019, ;Charoen-Ung P, 2018, ADVANCES IN INTELLIGENT SYSTEMS AND COMPUTING;Nguyen L, 2019, LECTURE NOTES IN COMPUTER SCIENCE;Lee S, 2019, SUSTAINABILITY;Yalçın H, 2019, ;Alwis S, 2019, LECTURE NOTES IN COMPUTER SCIENCE;Girish L, 2019, ;Beulah R, 2019, INTERNATIONAL JOURNAL OF COMPUTER SCIENCES AND ENGINEERING;Ju S, 2020, ;Saravi B, 2019, NEURAL COMPUTING AND APPLICATIONS;Zhong H, 2018, ENVIRONMENT SYSTEMS & DECISIONS;Rao T, 2019, INTERNATIONAL JOURNAL ON FUTURE REVOLUTION IN COMPUTER SCIENCE & COMMUNICATION ENGINEERING;Fathi M, 2020, ADVANCES IN INTELLIGENT SYSTEMS AND COMPUTING;Mayuri K, 2018, INTERNATIONAL JOURNAL OF ADVANCED RESEARCH IN COMPUTER SCIENCE;Monga T, 2018, LECTURE NOTES IN COMPUTER SCIENCE;, 2018, ;Kacprzyk J, 2023, LECTURE NOTES IN NETWORKS AND SYSTEMS;, 2019, ",,,OPENALEX,"Klompenburg T, 2020, COMPUTERS AND ELECTRONICS IN AGRICULTURE","Klompenburg T, 2020, COMPUTERS AND ELECTRONICS IN AGRICULTURE" +https://openalex.org/W2972418846,10.1002/inf2.12028,Machine learning in materials science,2019,en,article,1001,INFOMAT,InfoMat,Jing Wei;Xuan Chu;Xiangyu Sun;Kun Xu;Hui‐Xiong Deng;Ji-Gen Chen;Zhongming Wei;Ming Lei,Jing Wei;Xuan Chu;Xiang‐Yu Sun;Kun Xu;Hui‐Xiong Deng;Jigen Chen;Zhongming Wei;Ming Lei,"State Key Laboratory of Information Photonics and Optical Communications Beijing University of Posts and Telecommunications Beijing China;State Key Laboratory of Information Photonics and Optical Communications, Beijing University of Posts and Telecommunications, Beijing, China;State Key Laboratory of Information Photonics and Optical Communications Beijing University of Posts and Telecommunications Beijing China;State Key Laboratory of Information Photonics and Optical Communications, Beijing University of Posts and Telecommunications, Beijing, China;State Key Laboratory of Information Photonics and Optical Communications Beijing University of Posts and Telecommunications Beijing China;State Key Laboratory of Information Photonics and Optical Communications, Beijing University of Posts and Telecommunications, Beijing, China;State Key Laboratory of Information Photonics and Optical Communications Beijing University of Posts and Telecommunications Beijing China;State Key Laboratory of Information Photonics and Optical Communications, Beijing University of Posts and Telecommunications, Beijing, China;State Key Laboratory of Superlattices and Microstructures, Institute of Semiconductors, Chinese Academy of Sciences, Center of Materials Science and Optoelectronics Engineering University of Chinese Academy of Sciences Beijing China;State Key Laboratory of Superlattices and Microstructures, Institute of Semiconductors, Chinese Academy of Sciences, Center of Materials Science and Optoelectronics Engineering, University of Chinese Academy of Sciences, Beijing, China;Zhejiang Provincial Key Laboratory for Cutting Tools Taizhou University Taizhou China;Zhejiang Provincial Key Laboratory for Cutting Tools, Taizhou University, Taizhou, China;Beijing Academy of Quantum Information Sciences Beijing China;State Key Laboratory of Superlattices and Microstructures, Institute of Semiconductors, Chinese Academy of Sciences, Center of Materials Science and Optoelectronics Engineering University of Chinese Academy of Sciences Beijing China;Beijing Academy of Quantum Information Sciences, Beijing, China;State Key Laboratory of Superlattices and Microstructures, Institute of Semiconductors, Chinese Academy of Sciences, Center of Materials Science and Optoelectronics Engineering, University of Chinese Academy of Sciences, Beijing, China;State Key Laboratory of Information Photonics and Optical Communications Beijing University of Posts and Telecommunications Beijing China;State Key Laboratory of Information Photonics and Optical Communications, Beijing University of Posts and Telecommunications, Beijing, China","Ming Lei (corresponding author), State Key Laboratory of Information Photonics and Optical Communications Beijing University of Posts and Telecommunications Beijing China; State Key Laboratory of Information Photonics and Optical Communications, Beijing University of Posts and Telecommunications, Beijing, China","Abstract Traditional methods of discovering new materials, such as the empirical trial and error method and the density functional theory (DFT)‐based method, are unable to keep pace with the development of materials science today due to their long development cycles, low efficiency, and high costs. Accordingly, due to its low computational cost and short development cycle, machine learning is coupled with powerful data processing and high prediction performance and is being widely used in material detection, material analysis, and material design. In this article, we discuss the basic operational procedures in analyzing material properties via machine learning, summarize recent applications of machine learning algorithms to several mature fields in materials science, and discuss the improvements that are required for wide‐ranging application.",1,3,338,358,Pace;Computer science;Machine learning;Artificial intelligence;Development (topology);Ranging;Industrial engineering;Engineering;Mathematics,CN,"Szegedy C, 2015, ;Jain A, 2013, APL MATERIALS;Allen F, 2002, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Rogers D, 2010, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Waser R, 2009, ADVANCED MATERIALS;Arlot S, 2010, STATISTICS SURVEYS;Farabet C, 2012, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Bartók A, 2013, PHYSICAL REVIEW B;Saal J, 2013, JOM;Rupp M, 2012, PHYSICAL REVIEW LETTERS;Brown M, 2000, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Kononenko I, 2001, ARTIFICIAL INTELLIGENCE IN MEDICINE;Xiong H, 2014, SCIENCE;Ma J, 2015, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Curtarolo S, 2012, COMPUTATIONAL MATERIALS SCIENCE;Helmstaedter M, 2013, NATURE;Ruoff R, 1993, THE JOURNAL OF PHYSICAL CHEMISTRY;Cambria E, 2014, IEEE COMPUTATIONAL INTELLIGENCE MAGAZINE;Cully A, 2015, NATURE;Hand D, 2001, INTERNATIONAL STATISTICAL REVIEW;Pilania G, 2013, SCIENTIFIC REPORTS;Hansen K, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Hansen K, 2013, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Tsai C, 2014, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Hautier G, 2010, CHEMISTRY OF MATERIALS;Schütt K, 2014, PHYSICAL REVIEW B;Seko A, 2014, PHYSICAL REVIEW B;Kalidindi S, 2015, ANNUAL REVIEW OF MATERIALS RESEARCH;Hachmann J, 2013, ENERGY & ENVIRONMENTAL SCIENCE;Nantasenamat C, 2010, EXPERT OPINION ON DRUG DISCOVERY;Epa V, 2012, NANO LETTERS;Feng N, 2013, INFORMATION SCIENCES;Dey P, 2013, COMPUTATIONAL MATERIALS SCIENCE;Butcher J, 2013, COMPUTER-AIDED CIVIL AND INFRASTRUCTURE ENGINEERING;Ciodaro T, 2012, JOURNAL OF PHYSICS CONFERENCE SERIES;Maghsoudi M, 2014, SPECTROCHIMICA ACTA PART A MOLECULAR AND BIOMOLECULAR SPECTROSCOPY;Mántaras R, 1998, DATA & KNOWLEDGE ENGINEERING;Sadowski Ł, 2012, ARCHIVES OF CIVIL AND MECHANICAL ENGINEERING;Cho K, 2002, JOURNAL OF MOLECULAR STRUCTURE;Amato F, 2012, TALANTA;Zhao G, 2013, ;Postolache O, 2010, COMPUTER STANDARDS & INTERFACES;Yang L, 2013, PHYSICAL REVIEW B;Akbarpour H, 2013, COMPUTATIONAL MATERIALS SCIENCE;Castelli I, 2014, MODELLING AND SIMULATION IN MATERIALS SCIENCE AND ENGINEERING;Prabhu D, 1993, REVIEW OF PROGRESS IN QUANTITATIVE NONDESTRUCTIVE EVALUATION;Pyrgiotakis G, 2011, JOURNAL OF RAMAN SPECTROSCOPY;Bardsley W, 1986, BIOMETRIKA;Cifarelli C, 2006, OPTIMIZATION METHODS & SOFTWARE;Barton T, 2002, ;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Silver D, 2016, NATURE;Hinton G, 2012, IEEE SIGNAL PROCESSING MAGAZINE;Joachims T, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Shin H, 2016, IEEE TRANSACTIONS ON MEDICAL IMAGING;Gómez-Bombarelli R, 2018, ACS CENTRAL SCIENCE;Wu Z, 2017, CHEMICAL SCIENCE;Kirklin S, 2015, NPJ COMPUTATIONAL MATERIALS;Smith J, 2017, CHEMICAL SCIENCE;Raccuglia P, 2016, NATURE;Kearnes S, 2016, JOURNAL OF COMPUTER-AIDED MOLECULAR DESIGN;Ward L, 2016, NPJ COMPUTATIONAL MATERIALS;Schütt K, 2017, NATURE COMMUNICATIONS;Agrawal A, 2016, APL MATERIALS;Janssens O, 2016, JOURNAL OF SOUND AND VIBRATION;Li L, 2016, SENSORS;Meredig B, 2014, PHYSICAL REVIEW B;Zhao R, 2017, SENSORS;Hachmann J, 2011, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Jing L, 2017, MEASUREMENT;Ulissi Z, 2017, NATURE COMMUNICATIONS;Zhang P, 1993, THE ANNALS OF STATISTICS;Lal T, 2004, IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING;Oh E, 2016, NATURE NANOTECHNOLOGY;Leung M, 2014, BIOINFORMATICS;Gibert X, 2016, IEEE TRANSACTIONS ON INTELLIGENT TRANSPORTATION SYSTEMS;Ward L, 2017, PHYSICAL REVIEW. B./PHYSICAL REVIEW. B;Oliynyk A, 2016, CHEMISTRY OF MATERIALS;Warmuth M, 2003, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Liu H, 2017, PHYSICS OF LIFE REVIEWS;Agrawal A, 2014, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Lima A, 2016, EXPERT OPINION ON DRUG DISCOVERY;Zhao R, 2016, ;Meng M, 2017, NEUROCOMPUTING;Legrain F, 2017, THE JOURNAL OF PHYSICAL CHEMISTRY B;Saad Y, 2012, PHYSICAL REVIEW B;Giben X, 2015, ;Pankajakshan P, 2017, CHEMISTRY OF MATERIALS;Hirn M, 2017, MULTISCALE MODELING AND SIMULATION;Serra J, 2007, COMBINATORIAL CHEMISTRY & HIGH THROUGHPUT SCREENING;Dong J, 2017, JOURNAL OF CHEMINFORMATICS;Garcia‐Molina G, 2003, EURASIP JOURNAL ON ADVANCES IN SIGNAL PROCESSING;Liu J, 2015, SIGNAL PROCESSING;Uruchurtu J, 2012, RECENT PATENTS ON CORROSION SCIENCE;Field A, 1921, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Duvenaud D, 2015, ARXIV (CORNELL UNIVERSITY);Wallach I, 2015, ARXIV (CORNELL UNIVERSITY);Goh G, 2017, ARXIV (CORNELL UNIVERSITY);LeCun Y, 2015, NATURE;Cortes C, 1995, MACHINE LEARNING;Quinlan J, 1986, MACHINE LEARNING;Silver D, 2017, NATURE;Butler K, 2018, NATURE;Arlot S, 2009, ;Deng L, 2014, FOUNDATIONS AND TRENDS® IN SIGNAL PROCESSING;Zhao R, 2018, MECHANICAL SYSTEMS AND SIGNAL PROCESSING;Schütt K, 2018, THE JOURNAL OF CHEMICAL PHYSICS;Sánchez-Lengeling B, 2018, SCIENCE;Gómez‐Bombarelli R, 2018, CAMBRIDGE UNIVERSITY ENGINEERING DEPARTMENT PUBLICATIONS DATABASE;Cha Y, 2017, COMPUTER-AIDED CIVIL AND INFRASTRUCTURE ENGINEERING;Zhang A, 2017, COMPUTER-AIDED CIVIL AND INFRASTRUCTURE ENGINEERING;Lu S, 2018, NATURE COMMUNICATIONS;Faber F, 2017, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Lin Y, 2017, COMPUTER-AIDED CIVIL AND INFRASTRUCTURE ENGINEERING;, 1998, MACHINE LEARNING;Wainberg M, 2018, NATURE BIOTECHNOLOGY;Feinberg E, 2018, ACS CENTRAL SCIENCE;Jia F, 2017, NEUROCOMPUTING;Jha D, 2018, SCIENTIFIC REPORTS;Kim E, 2017, CHEMISTRY OF MATERIALS;Zhou Z, 2017, ACS CENTRAL SCIENCE;Carrete J, 2014, ZENODO (CERN EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Atha D, 2017, STRUCTURAL HEALTH MONITORING;Cecen A, 2017, ACTA MATERIALIA;Elton D, 2018, SCIENTIFIC REPORTS;Wei J, 2018, ADVANCED MATERIALS;Jang K, 2019, STRUCTURAL HEALTH MONITORING;Nash W, 2018, NPJ MATERIALS DEGRADATION;Zhu Q, 2018, NATURE COMMUNICATIONS;Kim K, 2018, NPJ COMPUTATIONAL MATERIALS;Wang X, 2017, ;Hou W, 2018, JOURNAL OF PHYSICS CONFERENCE SERIES;Wang M, 2019, SMALL METHODS;Sun B, 2017, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Burman P, 1989, BIOMETRIKA;Widener A, 2013, CHEMICAL & ENGINEERING NEWS;Liu L, 2018, ;Huang B, 2017, ARXIV (CORNELL UNIVERSITY);Wu W, 2018, ZHONGGUO KEXUE. WULIXUE LIXUE TIANWENXUE;Sutskever I, 2014, ARXIV (CORNELL UNIVERSITY);Gilmer J, 2017, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Wei J, 2019, INFOMAT","Wei J, 2019, INFOMAT" +https://openalex.org/W2342603028,10.21037/atm.2016.03.37,Introduction to machine learning: k-nearest neighbors,2016,en,article,1255,ANNALS OF TRANSLATIONAL MEDICINE,Annals of Translational Medicine,Zhongheng Zhang,Zhongheng Zhang,"Department of Critical Care Medicine, Jinhua Municipal Central Hospital, Jinhua Hospital of Zhejiang University, Jinhua 321000, China;Municipal Central Hospital. He graduated from School of Medicine, Zhejiang University in 2009;Department of Critical Care Medicine, Jinhua Municipal Central Hospital, Jinhua Hospital of Zhejiang University;351#, Mingyue Road, Jinhua 321000, China","Zhongheng Zhang (corresponding author), Department of Critical Care Medicine, Jinhua Municipal Central Hospital, Jinhua Hospital of Zhejiang University, Jinhua 321000, China; Municipal Central Hospital. He graduated from School of Medicine, Zhejiang University in 2009; Department of Critical Care Medicine, Jinhua Municipal Central Hospital, Jinhua Hospital of Zhejiang University; 351#, Mingyue Road, Jinhua 321000, China","Machine learning techniques have been widely used in many scientific fields, but its use in medical literature is limited partly because of technical difficulties. k-nearest neighbors (kNN) is a simple method of machine learning. The article introduces some basic ideas underlying the kNN algorithm, and then focuses on how to perform kNN modeling with R. The dataset should be prepared before running the knn() function in R. After prediction of outcome with kNN algorithm, the diagnostic performance of the model should be checked. Average accuracy is the mostly widely used statistic to reflect the kNN algorithm. Factors such as k value, distance calculation and choice of appropriate predictors all have significant impact on the model performance.",4,11,218,218,k-nearest neighbors algorithm;Computer science;Statistic;Artificial intelligence;Machine learning;Simple (philosophy);Value (mathematics);Function (biology);Data mining;Statistics;Mathematics,CN,"Weinberger K, 2009, JOURNAL OF MACHINE LEARNING RESEARCH;Linden A, 2006, JOURNAL OF EVALUATION IN CLINICAL PRACTICE;Cost S, 1993, MACHINE LEARNING;James D, 1996, TECHNOMETRICS;Short R, 1981, IEEE TRANSACTIONS ON INFORMATION THEORY;Zhang Z, 2014, PUBMED;Hernández-Torruco J, 2015, ;Thompson J, 2001, STATISTICS IN MEDICINE;, 2023, IEEE TRANSACTIONS ON INFORMATION THEORY",,,OPENALEX,"Zhang Z, 2016, ANNALS OF TRANSLATIONAL MEDICINE","Zhang Z, 2016, ANNALS OF TRANSLATIONAL MEDICINE" +https://openalex.org/W2803881474,10.1109/access.2018.2836950,Machine Learning and Deep Learning Methods for Cybersecurity,2018,en,article,1188,IEEE ACCESS,IEEE Access,Yang Xin;Lingshuang Kong;Zhi Liu;Yuling Chen;Yanmiao Li;Hongliang Zhu;Mingcheng Gao;Haixia Hou;Chunhua Wang,Yang Xin;Lingshuang Kong;Zhi Liu;Yuling Chen;Yanmiao Li;Hongliang Zhu;Mingcheng Gao;Haixia Hou;Chunhua Wang,"Guizhou Provincial Key Laboratory of Public Big Data, Guizhou University, Guiyang, China;School of Information Science and Engineering, Shandong University, Jinan, China;School of Information Science and Engineering, Shandong University, Jinan, China;Guizhou Provincial Key Laboratory of Public Big Data, Guizhou University, Guiyang, China;Centre of Information Security, Beijing University of Posts and Telecommunications, Beijing, China;Centre of Information Security, Beijing University of Posts and Telecommunications, Beijing, China;Centre of Information Security, Beijing University of Posts and Telecommunications, Beijing, China;Centre of Information Security, Beijing University of Posts and Telecommunications, Beijing, China;China Changfeng Science Technology Industry Group Corporation, Beijing, China",,"With the development of the Internet, cyber-attacks are changing rapidly and the cyber security situation is not optimistic. This survey report describes key literature surveys on machine learning (ML) and deep learning (DL) methods for network analysis of intrusion detection and provides a brief tutorial description of each ML/DL method. Papers representing each method were indexed, read, and summarized based on their temporal or thermal correlations. Because data are so important in ML/DL methods, we describe some of the commonly used network datasets used in ML/DL, discuss the challenges of using ML/DL for cybersecurity and provide suggestions for research directions.",6,,35365,35381,Computer science;Key (lock);Intrusion detection system;Deep learning;Artificial intelligence;The Internet;Network security;Machine learning;Computer security;Intrusion;Cyber threats;Data science;World Wide Web,CN,"LeCun Y, 1998, PROCEEDINGS OF THE IEEE;Jordan M, 2015, SCIENCE;Tavallaee M, 2009, ;Buczak A, 2015, IEEE COMMUNICATIONS SURVEYS & TUTORIALS;Deng L, 2014, NOW PUBLISHERS, INC. EBOOKS;Patcha A, 2007, COMPUTER NETWORKS;Lippmann R, 2002, ;Modi C, 2012, JOURNAL OF NETWORK AND COMPUTER APPLICATIONS;Hinton G, 2009, SCHOLARPEDIA;Goetz J, 2015, COMPUTERS & GEOSCIENCES;Kim J, 2016, ;Kolosnjaji B, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Revathi S, 2013, ;Alom M, 2015, ;Bontemps L, 2016, LECTURE NOTES IN COMPUTER SCIENCE;Pervez M, 2014, ;Kokila R, 2014, ;Bolón‐Canedo V, 2010, EXPERT SYSTEMS WITH APPLICATIONS;Staudemeyer R, 2015, SOUTH AFRICAN COMPUTER JOURNAL;Milenkoski A, 2015, ACM COMPUTING SURVEYS;Bonillo M, 2016, JOURNAL OF BUSINESS RESEARCH;Moon D, 2015, THE JOURNAL OF SUPERCOMPUTING;Gao N, 2014, ;Coelho I, 2017, APPLIED ENERGY;Žliobaitė I, 2014, MACHINE LEARNING;Viegas E, 2016, IEEE TRANSACTIONS ON COMPUTERS;Meng W, 2015, SECURITY AND COMMUNICATION NETWORKS;Ding Y, 2016, ;Saxena H, 2014, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Xie M, 2014, ;Modi C, 2016, THE JOURNAL OF SUPERCOMPUTING;Xie M, 2014, LECTURE NOTES IN COMPUTER SCIENCE;Kotpalliwar M, 2015, ;Relan N, 2015, ;Sharma R, 2015, SMART INNOVATION, SYSTEMS AND TECHNOLOGIES;Ammar A, 2015, JOURNAL OF COMPUTER AND COMMUNICATIONS;Nadeem M, 2016, DIGITALCOMMONS - KENNESAW STATE UNIVERSITY (KENNESAW STATE UNIVERSITY);Chandrasekhar A, 2014, ;Puthran S, 2016, COMMUNICATIONS IN COMPUTER AND INFORMATION SCIENCE;Tan Q, 2016, ;Shao Z, 2005, JISUANJI YINGYONG YANJIU;Azad C, 2015, INTERNATIONAL JOURNAL OF COMPUTER NETWORK AND INFORMATION SECURITY;Jo S, 2015, JOURNAL OF THE KOREA SOCIETY OF DIGITAL INDUSTRY AND INFORMATION MANAGEMENT;Man-fu Y, 2011, IFIP INTERNATIONAL FEDERATION FOR INFORMATION PROCESSING/IFIP;Soni M, 2015, ;Modinat M, 2015, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Ha H, 2020, JOURNAL OF CONVERGENCE INFORMATION TECHNOLOGY;Selvi R, 2014, ADVANCES IN INTELLIGENT SYSTEMS AND COMPUTING;Kim G, 2016, ARXIV (CORNELL UNIVERSITY);LeCun Y, 2015, NATURE;Deng L, 2014, FOUNDATIONS AND TRENDS® IN SIGNAL PROCESSING;Yin C, 2017, IEEE ACCESS;Wang W, 2017, ;Wang W, 2017, ;Kwon D, 2017, CLUSTER COMPUTING;Alrawashdeh K, 2016, ;Agarap A, 2018, ;Λουρίδας Π, 2016, IEEE SOFTWARE;Yu Y, 2017, SECURITY AND COMMUNICATION NETWORKS;Meena G, 2017, ;Zhao G, 2017, ;Ingre B, 2017, SMART INNOVATION, SYSTEMS AND TECHNOLOGIES;Le T, 2017, ;Ergen T, 2017, IEEE TRANSACTIONS ON NEURAL NETWORKS AND LEARNING SYSTEMS;Malik A, 2017, CLUSTER COMPUTING;Kolosnjaji B, 2017, ;Shapoorifard H, 2017, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Aftergood S, 2017, NATURE;Rao B, 2017, INDIAN JOURNAL OF SCIENCE AND TECHNOLOGY;Smith R, 2017, AI MAGAZINE;Rao V, 2017, INDIAN JOURNAL OF SCIENCE AND TECHNOLOGY;Vishwakarma S, 2017, INTERNATIONAL JOURNAL OF COMPUTER APPLICATIONS;Bu S, 2017, LECTURE NOTES IN COMPUTER SCIENCE;Sahoo D, 2017, ARXIV (CORNELL UNIVERSITY);Saxe J, 2017, ARXIV (CORNELL UNIVERSITY);Saxe J, 2017, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Xin Y, 2018, IEEE ACCESS","Xin Y, 2018, IEEE ACCESS" +https://openalex.org/W3047863327,10.1016/j.ejor.2020.07.063,Machine learning for combinatorial optimization: A methodological tour d'horizon,2021,en,article,1332,ARCHIVIO ISTITUZIONALE DELLA RICERCA (ALMA MATER STUDIORUM UNIVERSITÀ DI BOLOGNA),Archivio istituzionale della ricerca (Alma Mater Studiorum Università di Bologna),Yoshua Bengio;Andrea Lodi;Antoine Prouvost,Bengio Y.;Lodi A.;Prouvost A.,"Université de Montréal, Département d’Informatique et de Recherche Opérationelle, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada;Mila, Institut Québecois d’Intelligence Artificielle, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada;Mila, Institut Québecois d’Intelligence Artificielle, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada;Canada Excellence Research Chair in Data Science for Decision Making, École Polytechnique de Montréal, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada;Canada Excellence Research Chair in Data Science for Decision Making, École Polytechnique de Montréal, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada;Mila, Institut Québecois d’Intelligence Artificielle, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada","Lodi A. (corresponding author), Mila, Institut Québecois d’Intelligence Artificielle, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada; Canada Excellence Research Chair in Data Science for Decision Making, École Polytechnique de Montréal, Pavillon André-Aisenstadt 2920, Chemin de la TourMontreal, Qc, H3T 1J4 Canada","This paper surveys the recent attempts, both from the machine learning and operations research communities, at leveraging machine learning to solve combinatorial optimization problems. Given the hard nature of these problems, state-of-the-art algorithms rely on handcrafted heuristics for making decisions that are otherwise too expensive to compute or mathematically not well defined. Thus, machine learning looks like a natural candidate to make such decisions in a more principled and optimized way. We advocate for pushing further the integration of machine learning and combinatorial optimization and detail a methodology to do so. A main point of the paper is seeing generic optimization problems as data points and inquiring what is the relevant distribution of problems to use for learning on a given task.",,,,,Computer science;Heuristics;Artificial intelligence;Machine learning;Combinatorial optimization;Point (geometry);Task (project management);Optimization problem;Mathematical optimization;Online machine learning;Active learning (machine learning);Mathematics;Algorithm,CA,"S S, 2005, IEEE TRANSACTIONS ON NEURAL NETWORKS;Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Silver D, 2016, NATURE;Flach P, 2015, ;Teymur O, 2016, ;Murphy K, 2012, ;Goodfellow I, 2016, MIT PRESS EBOOKS;Johnson J, 2000, NEUROCOMPUTING;Glover F, 2003, INTERNATIONAL SERIES IN MANAGEMENT SCIENCE/OPERATIONS RESEARCH/INTERNATIONAL SERIES IN OPERATIONS RESEARCH & MANAGEMENT SCIENCE;Nemhauser G, 1972, REPEC: RESEARCH PAPERS IN ECONOMICS;, 2007, CHOICE REVIEWS ONLINE;McCormick G, 1976, MATHEMATICAL PROGRAMMING;, 2007, CHOICE REVIEWS ONLINE;Vinyals O, 2015, NEURAL INFORMATION PROCESSING SYSTEMS;Thrun S, 1998, ;Hussein A, 2017, ACM COMPUTING SURVEYS;, 1982, JOURNAL OF CRIMINAL JUSTICE;Chrpa L, 2016, ;Hochreiter S, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Michael J, 2009, ;Smith‐Miles K, 1999, INFORMS JOURNAL ON COMPUTING;Schmidhuber J, 1992, NEURAL COMPUTATION;Khalil E, 2016, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Bengio Y, 1991, ;Bischl B, 2016, ARTIFICIAL INTELLIGENCE;Hoos H, 2011, ;Dorffner G, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Alvarez A, 2017, INFORMS JOURNAL ON COMPUTING;Lodi A, 2017, TOP;He H, 2014, ;Özcan E, 2010, INTERNATIONAL JOURNAL OF APPLIED METAHEURISTIC COMPUTING;Chan T, 2014, OPERATIONS RESEARCH;Wierstra D, 2009, LOGIC JOURNAL OF IGPL;Kruber M, 2017, LECTURE NOTES IN COMPUTER SCIENCE;Gass S, 1982, TRANSPORTATION RESEARCH PART A GENERAL;Fortun M, 1993, SOCIAL STUDIES OF SCIENCE;Smith‐Miles K, 2015, COMPUTERS & OPERATIONS RESEARCH;Mascia F, 2014, COMPUTERS & OPERATIONS RESEARCH;Liberto G, 2015, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Karapetyan D, 2017, EUROPEAN JOURNAL OF OPERATIONAL RESEARCH;Alvarez A, 2014, ;Ansótegui C, 2017, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Georgatzis K, 2016, ;Alvarez A, 2016, ;Fitzgerald T, 2021, PROCEEDINGS OF THE INTERNATIONAL SYMPOSIUM ON COMBINATORIAL SEARCH;Malitsky Y, 2016, LECTURE NOTES IN COMPUTER SCIENCE;, 2007, ;Bello I, 2016, ARXIV (CORNELL UNIVERSITY);Fischetti M, 2011, WILEY ENCYCLOPEDIA OF OPERATIONS RESEARCH AND MANAGEMENT SCIENCE;Li K, 2017, ARXIV (CORNELL UNIVERSITY);Vaswani A, 2025, ;Creswell A, 2018, IEEE SIGNAL PROCESSING MAGAZINE;Cohen W, 2006, ;Ravi S, 2017, INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS;Simonotto E, 2006, ;Ahuja R, 2001, OPERATIONS RESEARCH;Giunchiglia F, 2017, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Conforti M, 2014, GRADUATE TEXTS IN MATHEMATICS;Khalil E, 2017, ;Bonami P, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Delft C, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Kool W, 2018, ARXIV (CORNELL UNIVERSITY);Lindauer M, 2018, PROCEEDINGS OF THE AAAI CONFERENCE ON ARTIFICIAL INTELLIGENCE;Özcan E, 2012, IGI GLOBAL EBOOKS;Larsen E, 2018, ;Ansótegui C, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Nair V, 2018, UNCERTAINTY IN ARTIFICIAL INTELLIGENCE;Lombardi M, 2017, LECTURE NOTES IN COMPUTER SCIENCE;Hottung A, 2019, COMPUTERS & OPERATIONS RESEARCH;Sergeyev Y, 2017, LECTURE NOTES IN COMPUTER SCIENCE;Kalamata L, 2018, LECTURE NOTES IN COMPUTER SCIENCE;Gilmer J, 2017, ARXIV (CORNELL UNIVERSITY);Finn C, 2017, ARXIV (CORNELL UNIVERSITY);Lang J, 2019, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE);Gasse M, 2019, ARXIV (CORNELL UNIVERSITY);Selsam D, 2018, ARXIV (CORNELL UNIVERSITY);Dai H, 2016, ARXIV (CORNELL UNIVERSITY);Dai H, 2017, ARXIV (CORNELL UNIVERSITY);Nowak A, 2017, ARXIV (CORNELL UNIVERSITY);Mahmood R, 2018, ARXIV (CORNELL UNIVERSITY);Emami P, 2018, ARXIV (CORNELL UNIVERSITY);Nagarajan P, 2018, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Bengio Y, 2021, ARCHIVIO ISTITUZIONALE DELLA RICERCA (ALMA MATER STUDIORUM UNIVERSITÀ DI BOLOGNA)","Bengio Y, 2021, ARCHIVIO ISTITUZIONALE DELLA RICERCA (ALMA MATER STUDIORUM UNIVERSITÀ DI BOLOGNA)" +https://openalex.org/W2742835787,10.1016/j.jmat.2017.08.002,Materials discovery and design using machine learning,2017,en,article,1229,JOURNAL OF MATERIOMICS,Journal of Materiomics,Yue Liu;Tianlu Zhao;Wangwei Ju;Siqi Shi,Yue Liu;Tianlu Zhao;Wangwei Ju;Siqi Shi,"School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China;School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China;School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China;Materials Genome Institute, Shanghai University, Shanghai 200444, China;School of Materials Science and Engineering, Shanghai University, Shanghai 200444, China","Siqi Shi (corresponding author), Materials Genome Institute, Shanghai University, Shanghai 200444, China; School of Materials Science and Engineering, Shanghai University, Shanghai 200444, China","The screening of novel materials with good performance and the modelling of quantitative structure-activity relationships (QSARs), among other issues, are hot topics in the field of materials science. Traditional experiments and computational modelling often consume tremendous time and resources and are limited by their experimental conditions and theoretical foundations. Thus, it is imperative to develop a new method of accelerating the discovery and design process for novel materials. Recently, materials discovery and design using machine learning have been receiving increasing attention and have achieved great improvements in both time efficiency and prediction accuracy. In this review, we first outline the typical mode of and basic procedures for applying machine learning in materials science, and we classify and compare the main algorithms. Then, the current research status is reviewed with regard to applications of machine learning in material property prediction, in new materials discovery and for other purposes. Finally, we discuss problems related to machine learning in materials science, propose possible solutions, and forecast potential directions of future research. By directly combining computational studies with experiments, we hope to provide insight into the parameters that affect the properties of materials, thereby enabling more efficient and target-oriented research on materials discovery and design. Machine learning provides a new means of screening novel materials with good performance, developing quantitative structure-activity relationships (QSARs) and other models, predicting the properties of materials, discovering new materials and performing other materials-relateds studies. • The typical mode of and basic procedures for applying machine learning in materials science are summarized and discussed. • For various points of application, the machine learning methods used for different purposes are comprehensively reviewed. • Existing problems are discussed, possible solutions are proposed and potential directions of future research are suggested.",3,3,159,177,Materials science;Nanotechnology;Systems engineering;Construction engineering;Engineering,CN,"Friedman J, 2001, THE ANNALS OF STATISTICS;Nasrabadi N, 2007, JOURNAL OF ELECTRONIC IMAGING;Jain A, 2013, APL MATERIALS;Rosenblatt F, 1958, PSYCHOLOGICAL REVIEW;Allen F, 2002, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Murphy K, 2012, ;Alder B, 1959, THE JOURNAL OF CHEMICAL PHYSICS;Mitchell T, 1999, COMMUNICATIONS OF THE ACM;Snyder J, 2012, PHYSICAL REVIEW LETTERS;Hansen K, 2013, JOURNAL OF CHEMICAL THEORY AND COMPUTATION;Hautier G, 2010, CHEMISTRY OF MATERIALS;Schütt K, 2014, PHYSICAL REVIEW B;Fischer C, 2006, NATURE MATERIALS;Maddox J, 1988, NATURE;Curtarolo S, 2003, PHYSICAL REVIEW LETTERS;Hautier G, 2010, INORGANIC CHEMISTRY;Klass V, 2014, JOURNAL OF POWER SOURCES;Olson G, 2000, SCIENCE;Isarankura‐Na‐Ayudhya C, 2009, PUBMED;Peck R, 2000, ;Topçu İ, 2007, COMPUTATIONAL MATERIALS SCIENCE;Hutchinson J, 2009, JOURNAL OF THERMAL ANALYSIS AND CALORIMETRY;Fleischer C, 2013, JOURNAL OF POWER ELECTRONICS;Javed S, 2006, COMPUTATIONAL MATERIALS SCIENCE;Chen B, 2008, COMPUTATIONAL MATERIALS SCIENCE;Fang S, 2008, MATERIALS & DESIGN (1980-2015);Zhu Q, 2003, ACTA MATERIALIA;Alzghoul A, 2014, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Carrera G, 2008, TETRAHEDRON;Liu X, 2009, COMPUTATIONAL MATERIALS SCIENCE;Chen X, 2007, INTERNATIONAL JOURNAL OF INTELLIGENT SYSTEMS;Yu X, 2008, JOURNAL OF THEORETICAL AND COMPUTATIONAL CHEMISTRY;Martínez R, 2014, COMPUTATIONAL MATERIALS SCIENCE;Castin N, 2014, COMPUTATIONAL MATERIALS SCIENCE;Abbod M, 2002, MATERIALS SCIENCE AND ENGINEERING A;Farrusseng D, 2008, COMPUTATIONAL MATERIALS SCIENCE;Zhou Z, 2004, JOURNAL OF COMPUTER SCIENCE AND TECHNOLOGY;Zhang X, 2007, MATERIALS & DESIGN (1980-2015);Camacho-Zúñiga C, 2003, INDUSTRIAL & ENGINEERING CHEMISTRY RESEARCH;Majid A, 2011, COMPUTATIONAL MATERIALS SCIENCE;Sivasankaran S, 2009, COMPUTATIONAL MATERIALS SCIENCE;Mohn C, 2008, COMPUTATIONAL MATERIALS SCIENCE;Pei J, 2012, MACROMOLECULAR THEORY AND SIMULATIONS;Han Y, 2010, COMPUTATIONAL MATERIALS SCIENCE;Paszkowicz W, 2008, COMPUTATIONAL MATERIALS SCIENCE;Hachmann J, 2011, BULLETIN OF THE AMERICAN PHYSICAL SOCIETY;Cavaliere P, 2006, COMPUTATIONAL MATERIALS SCIENCE;Joze H, 2010, ;1943- B, 2009, ;Vapnik V, 1995, ;Cortes C, 1995, MACHINE LEARNING;Kumar D, 1995, CHOICE REVIEWS ONLINE;De’ath G, 2000, ECOLOGY;Chen L, 2002, ANNUAL REVIEW OF MATERIALS RESEARCH;Rahman A, 1964, PHYSICAL REVIEW;Boettinger W, 2002, ANNUAL REVIEW OF MATERIALS RESEARCH;Belsky A, 2002, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Steinbach I, 2009, MODELLING AND SIMULATION IN MATERIALS SCIENCE AND ENGINEERING;Ghiringhelli L, 2015, PHYSICAL REVIEW LETTERS;Larrañaga P, 2006, BRIEFINGS IN BIOINFORMATICS;Pilania G, 2013, SCIENTIFIC REPORTS;Charkhgard M, 2010, IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS;Hachmann J, 2011, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Mylopoulos J, 1999, COMMUNICATIONS OF THE ACM;Fernández M, 2014, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Kusne A, 2014, SCIENTIFIC REPORTS;Fujimura K, 2013, ADVANCED ENERGY MATERIALS;Lilienfeld O, 2015, INTERNATIONAL JOURNAL OF QUANTUM CHEMISTRY;Olsson F, 2009, KTH PUBLICATION DATABASE DIVA (KTH ROYAL INSTITUTE OF TECHNOLOGY);Hautier G, 2012, JOURNAL OF MATERIALS SCIENCE;Sundararaghavan V, 2004, COMPUTATIONAL MATERIALS SCIENCE;Altun F, 2007, COMPUTATIONAL MATERIALS SCIENCE;Wu S, 2013, RELIABILITY ENGINEERING & SYSTEM SAFETY;Fang S, 2008, COMPUTATIONAL MATERIALS SCIENCE;Scott D, 2007, JOURNAL OF THE EUROPEAN CERAMIC SOCIETY;Gajewski J, 2013, COMPUTATIONAL MATERIALS SCIENCE;Beran G, 2014, ANGEWANDTE CHEMIE INTERNATIONAL EDITION;Guo Z, 2004, COMPUTATIONAL MATERIALS SCIENCE;Majid A, 2010, COMPUTATIONAL MATERIALS SCIENCE;Salahinejad M, 2012, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Addin O, 2006, MATERIALS & DESIGN (1980-2015);Lee D, 2007, IEEE TRANSACTIONS ON VEHICULAR TECHNOLOGY;Rao H, 1996, COMPUTATIONAL MATERIALS SCIENCE;Phillips C, 2013, SOFT MATTER;Ceder G, 2006, MRS BULLETIN;Li C, 2003, JOURNAL OF PHYSICS AND CHEMISTRY OF SOLIDS;Raj R, 2008, COMPUTATIONAL MATERIALS SCIENCE;Ning X, 2011, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Gharagheizi F, 2012, CHEMICAL ENGINEERING SCIENCE;Reich Y, 1995, MATERIALS & DESIGN (1980-2015);Eminağaoğlu M, 2010, ;Pei J, 2012, JOURNAL OF THEORETICAL AND COMPUTATIONAL CHEMISTRY;Bertinetto‬ ‪, 2008, JOURNAL OF MOLECULAR GRAPHICS AND MODELLING;Li G, 2009, ;Yu K, 2006, TALANTA;Vahed A, 2003, ;Li C, 1996, JOURNAL OF PHYSICS AND CHEMISTRY OF SOLIDS;Liu Y, 2004, IEEE INTERNATIONAL CONFERENCE ON COGNITIVE INFORMATICS;Yang L, 2005, JOURNAL OF CHEMICAL INFORMATION AND MODELING;Kohn W, 1965, PHYSICAL REVIEW;Cortes C, 1995, MACHINE LEARNING;Vapnik V, 2000, ;GuyonIsabelle, 2003, JOURNAL OF MACHINE LEARNING RESEARCH;Buckland S, 1994, BIOMETRICS;, 2007, CHOICE REVIEWS ONLINE;Kirklin S, 2015, NPJ COMPUTATIONAL MATERIALS;Raccuglia P, 2016, NATURE;Ward L, 2016, NPJ COMPUTATIONAL MATERIALS;Agrawal A, 2016, APL MATERIALS;Gómez‐Bombarelli R, 2016, NATURE MATERIALS;Meredig B, 2014, PHYSICAL REVIEW B;Marriott P, 1995, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Lufaso M, 2001, ACTA CRYSTALLOGRAPHICA SECTION B STRUCTURAL SCIENCE;Isayev O, 2017, NATURE COMMUNICATIONS;, 1998, MACHINE LEARNING;Shi S, 2016, CHINESE PHYSICS B;Sendek A, 2016, ENERGY & ENVIRONMENTAL SCIENCE;Ward C, 2012, 23RD ADVANCED AEROSPACE MATERIALS AND PROCESSES (AEROMAT) CONFERENCE AND EXPOSITION;Blaiszik B, 2016, JOM;Heumann C, 2016, ;Binder K, 1995, TOPICS IN APPLIED PHYSICS;Jain A, 2016, JOURNAL OF MATERIALS RESEARCH/PRATT'S GUIDE TO VENTURE CAPITAL SOURCES;Helma C, 2004, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Liu R, 2015, SCIENTIFIC REPORTS;Hill J, 2016, MRS BULLETIN;Ward L, 2016, CURRENT OPINION IN SOLID STATE AND MATERIALS SCIENCE;Rajan K, 2005, STATISTICAL ANALYSIS AND DATA MINING THE ASA DATA SCIENCE JOURNAL;Bolstad W, 2004, ;Häse F, 2016, CHEMICAL SCIENCE;Saldana D, 2012, ENERGY & FUELS;Puchala B, 2016, JOM;Balachandran P, 2015, SCIENTIFIC REPORTS;Wang X, 2017, JOURNAL OF MATERIOMICS;Liu R, 2015, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Chen C, 2017, SCIENTIFIC REPORTS;Roekeghem A, 2016, PHYSICAL REVIEW X;Wu H, 2017, COMPUTATIONAL MATERIALS SCIENCE;Kalidindi S, 2016, JOM;Sumpter B, 2015, NPJ COMPUTATIONAL MATERIALS;Liu R, 2017, INTEGRATING MATERIALS AND MANUFACTURING INNOVATION;Chen L, 2015, NPJ COMPUTATIONAL MATERIALS;Liu Y, 2004, ;Mueller T, 2016, REVIEWS IN COMPUTATIONAL CHEMISTRY",,,OPENALEX,"Liu Y, 2017, JOURNAL OF MATERIOMICS","Liu Y, 2017, JOURNAL OF MATERIOMICS" +https://openalex.org/W2943491685,10.1016/s1470-2045(19)30149-4,Big data and machine learning algorithms for health-care delivery,2019,en,review,1424,THE LANCET ONCOLOGY,The Lancet Oncology,Kee Yuan Ngiam;Ing Wei Khor,Kee Yuan Ngiam;Ing Wei Khor,"Department of Surgery, National University of Singapore, Singapore Division of General Surgery (Thyroid and Endocrine Surgery), University Surgical Cluster, National University Hospital, Singapore National University Health System Corporate Office, Singapore. Electronic address: kee_yuan_ngiam@nuhs.edu.sg;National University Health System Corporate Office, Singapore;Department of Surgery, National University of Singapore, Singapore;Division of General Surgery (Thyroid and Endocrine Surgery), University Surgical Cluster, National University Hospital, Singapore;Department of Medicine, Yong Loo Lin School of Medicine, National University of Singapore, Singapore","Kee Yuan Ngiam (corresponding author), Department of Surgery, National University of Singapore, Singapore; Division of General Surgery (Thyroid and Endocrine Surgery), University Surgical Cluster, National University Hospital, Singapore; National University Health System Corporate Office, Singapore. Electronic address: kee_yuan_ngiam@nuhs.edu.sg; National University Health System Corporate Office, Singapore; Department of Surgery, National University of Singapore, Singapore; Division of General Surgery (Thyroid and Endocrine Surgery), University Surgical Cluster, National University Hospital, Singapore",,20,5,e262,e273,Machine learning;Artificial intelligence;Computer science;Big data;Health care;Scalability;Flexibility (engineering);Data science;Data mining;Database,SG,"LeCun Y, 2015, NATURE;Esteva A, 2017, NATURE;Topol E, 2018, NATURE MEDICINE;Jiang F, 2017, STROKE AND VASCULAR NEUROLOGY;Samuel A, 1959, IBM JOURNAL OF RESEARCH AND DEVELOPMENT;Bodenreider O, 2003, NUCLEIC ACIDS RESEARCH;Concato J, 2000, NEW ENGLAND JOURNAL OF MEDICINE;Deo R, 2015, CIRCULATION;Greenspan H, 2016, IEEE TRANSACTIONS ON MEDICAL IMAGING;Abràmoff M, 2018, NPJ DIGITAL MEDICINE;Rajpurkar P, 2018, PLOS MEDICINE;Laranjo L, 2018, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Bates D, 2014, HEALTH AFFAIRS;Ekeland A, 2010, INTERNATIONAL JOURNAL OF MEDICAL INFORMATICS;Mobadersany P, 2018, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Faust O, 2018, COMPUTER METHODS AND PROGRAMS IN BIOMEDICINE;Tuckson R, 2017, NEW ENGLAND JOURNAL OF MEDICINE;, 2005, ;Jha S, 2016, JAMA;Wainberg M, 2018, NATURE BIOTECHNOLOGY;Henry K, 2015, SCIENCE TRANSLATIONAL MEDICINE;Nam J, 2018, RADIOLOGY;Titano J, 2018, NATURE MEDICINE;Grinsven M, 2016, IEEE TRANSACTIONS ON MEDICAL IMAGING;Kuo C, 2016, JOURNAL OF VISUAL COMMUNICATION AND IMAGE REPRESENTATION;Haendel M, 2018, NEW ENGLAND JOURNAL OF MEDICINE;Luxton D, 2014, ARTIFICIAL INTELLIGENCE IN MEDICINE;Loh E, 2018, BMJ LEADER;Lin H, 2018, PLOS MEDICINE;Zarrinpar A, 2016, SCIENCE TRANSLATIONAL MEDICINE;Pantuck A, 2018, ADVANCED THERAPEUTICS;Kantarjian H, 2015, JAMA ONCOLOGY;Gawehn E, 2018, EXPERT OPINION ON DRUG DISCOVERY;Kowatsch T, 2017, ALEXANDRIA (UNISG) (UNIVERSITY OF ST.GALLEN);Kerlikowske K, 2018, ANNALS OF INTERNAL MEDICINE;Wang H, 2018, QUANTITATIVE IMAGING IN MEDICINE AND SURGERY;Hainc N, 2017, FRONTIERS IN NEUROLOGY;Azizi S, 2017, INTERNATIONAL JOURNAL OF COMPUTER ASSISTED RADIOLOGY AND SURGERY;Zheng K, 2017, ;Silver D, 2011, LECTURE NOTES IN COMPUTER SCIENCE;Mazzanti M, 2018, CURRENT CARDIOLOGY REPORTS;Gelhaus P, 2011, JOURNAL OF EVALUATION IN CLINICAL PRACTICE;Voelker R, 2018, JAMA;Marcus G, 2018, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Ngiam K, 2019, THE LANCET ONCOLOGY","Ngiam K, 2019, THE LANCET ONCOLOGY" +https://openalex.org/W2967663220,10.11989/jest.1674-862x.80904120,Hyperparameter Optimization for Machine Learning Models Based on Bayesian Optimization,2019,en,article,1413,HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE),HAL (Le Centre pour la Communication Scientifique Directe),Jia Wu;Xiu Yun Chen;Hao Zhang;Li Dong Xiong;Hang Lei;Sihao Deng,Jia Wu;Xiu Yun Chen;Hao Zhang;Li Dong Xiong;Hang Lei;Sihao Deng,"School of Information and Software Engineering,University of Electronic Science and technology of China,Chengdu 610054,China);School of Information and Software Engineering,University of Electronic Science and technology of China,Chengdu 610054,China);School of Information and Software Engineering,University of Electronic Science and technology of China,Chengdu 610054,China);School of Information and Software Engineering,University of Electronic Science and technology of China,Chengdu 610054,China);School of Information and Software Engineering,University of Electronic Science and technology of China,Chengdu 610054,China);Universite de Technologie de Belfort-Montbeliard, Belfort, 90010, France",,,17,1,26,40,Hyperparameter;Bayesian optimization;Machine learning;Computer science;Artificial intelligence;Hyperparameter optimization;Gaussian process;Random forest;Artificial neural network;Gaussian;Support vector machine,CN;FR,,,,OPENALEX,"Wu J, 2019, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE)","Wu J, 2019, HAL (LE CENTRE POUR LA COMMUNICATION SCIENTIFIQUE DIRECTE)" +https://openalex.org/W2096118443,10.1109/surv.2008.080406,A survey of techniques for internet traffic classification using machine learning,2008,en,article,1634,IEEE COMMUNICATIONS SURVEYS & TUTORIALS,IEEE Communications Surveys & Tutorials,Thuy Nguyen Thi Thu;Grenville Armitage,Thuy T.T. Nguyen;Grenville Armitage,"Centre for Advanced Internet Architectures, Swinburne University of Technology, Melbourne, Australia;Swinburne Univ. of Technology, Melbourne, Australia#TAB#;Centre for Advanced Internet Architectures, Swinburne University of Technology, Melbourne, Australia;Swinburne Univ. of Technology, Melbourne, Australia#TAB#",,"The research community has begun looking for IP traffic classification techniques that do not rely on `well known' TCP or UDP port numbers, or interpreting the contents of packet payloads. New work is emerging on the use of statistical traffic characteristics to assist in the identification and classification process. This survey paper looks at emerging research into the application of Machine Learning (ML) techniques to IP traffic classification - an inter-disciplinary blend of IP networking and data mining techniques. We provide context and motivation for the application of ML techniques to IP traffic classification, and review 18 significant works that cover the dominant period from 2004 to early 2007. These works are categorized and reviewed according to their choice of ML strategies and primary contributions to the literature. We also discuss a number of key requirements for the employment of ML-based traffic classifiers in operational IP networks, and qualitatively critique the extent to which the reviewed works meet these requirements. Open issues and challenges in the field are also discussed.",10,4,56,76,Computer science;Traffic classification;Deep packet inspection;Context (archaeology);The Internet;Identification (biology);Internet traffic;Open research;Statistical classification;Process (computing);Port (circuit theory);Network packet;Data science;Machine learning;Artificial intelligence;Data mining;World Wide Web;Computer network,AU,"Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Witten I, 2011, ELSEVIER EBOOKS;Goldberg D, 1988, ;Kohavi R, 1997, ARTIFICIAL INTELLIGENCE;Rand W, 1971, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Blake S, 1998, ;Witten I, 2002, ACM SIGMOD RECORD;Paxson V, 1999, COMPUTER NETWORKS;Duda R, 2000, WILEY-INTERSCIENCE EBOOKS;Hall M, 2003, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Moore A, 2005, ;Karagiannis T, 2005, ;Cheeseman P, 1996, ;Sen S, 2004, ;Moore A, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Erman J, 2006, ;Williams N, 2006, ACM SIGCOMM COMPUTER COMMUNICATION REVIEW;Paxson V, 1994, IEEE/ACM TRANSACTIONS ON NETWORKING;Bernaille L, 2006, ACM SIGCOMM COMPUTER COMMUNICATION REVIEW;McGregor A, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Zander S, 2005, ;Karagiannis T, 2005, ;Auld T, 2007, IEEE TRANSACTIONS ON NEURAL NETWORKS;Halkidi M, 2002, ACM SIGMOD RECORD;Haffner P, 2005, ;Bonfiglio D, 2007, ;Madhukar A, 2006, ;Erman J, 2007, ;Ma J, 2006, ;Simon H, 1983, MACHINE LEARNING;Fisher D, 1991, ;Dewes C, 2003, ;Erman J, 2007, ;Lang T, 2004, ;Park J, 2006, ;Erman J, 2007, ;Winston P, 1984, ADDISON-WESLEY LONGMAN PUBLISHING CO., INC. EBOOKS;REICH Y, 1991, ELSEVIER EBOOKS;Armitage G, 2000, SWINBURNE RESEARCH BANK (SWINBURNE UNIVERSITY OF TECHNOLOGY);Shi Z, 1992, INTERNATIONAL ACADEMIC PUBLISHERS EBOOKS;Burgstahler L, 2003, ;Stewart L, 2005, ;Michie D, 1990, KNOWLEDGE-BASED SYSTEMS;Silver B, 1990, ;Schulzrinne H, 2003, ;Braden R, 1994, ;Armitage G, 2003, SWINBURNE RESEARCH BANK (SWINBURNE UNIVERSITY OF TECHNOLOGY);Baker F, 2004, ;T. N, 2006, SWINBURNE RESEARCH BANK (SWINBURNE UNIVERSITY OF TECHNOLOGY);Williams N, 2006, SWINBURNE RESEARCH BANK (SWINBURNE UNIVERSITY OF TECHNOLOGY);, 1989, CHOICE REVIEWS ONLINE;Witten I, 2008, ;Xu R, 2005, IEEE TRANSACTIONS ON NEURAL NETWORKS;Chiang L, 2001, ADVANCED TEXTBOOKS IN CONTROL AND SIGNAL PROCESSING;Rand W, 1971, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Halkidi M, 2002, ACM SIGMOD RECORD;Roughan M, 2004, ;Crotti M, 2007, ACM SIGCOMM COMPUTER COMMUNICATION REVIEW;Thu T, 2006, CONFERENCE ON LOCAL COMPUTER NETWORKS;Park J, 2006, INFORMATION HIDING;Carmichael O, 2004, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Park J, 2006, ;But J, 2006, MURDOCH RESEARCH REPOSITORY (MURDOCH UNIVERSITY);Blake S, 1998, ;Group A, 1996, ",,,OPENALEX,"Thu T, 2008, IEEE COMMUNICATIONS SURVEYS & TUTORIALS","Thu T, 2008, IEEE COMMUNICATIONS SURVEYS & TUTORIALS" +https://openalex.org/W1728842521,,API design for machine learning software: experiences from the scikit-learn project,2013,en,preprint,1801,ARXIV (CORNELL UNIVERSITY),arXiv (Cornell University),"Lars Buitinck;Gilles Louppe;Mathieu Blondel;Fabián Pedregosa;Andreas Mueller;Olivier Grisel;Vlad Niculae;Peter Prettenhofer;Alexandre Gramfort;Jaques Grobler;Robert Layton;Jake Vanderplas;Arnaud Joly;Brian Holt;Varoquaux, Ga\""el","Buitinck, Lars;Louppe, Gilles;Blondel, Mathieu;Pedregosa, Fabian;Mueller, Andreas;Grisel, Olivier;Niculae, Vlad;Prettenhofer, Peter;Gramfort, Alexandre;Grobler, Jaques;Layton, Robert;Vanderplas, Jake;Joly, Arnaud;Holt, Brian;Varoquaux, Ga\""el","ILPS - Information and Language Processing Systems (ISLA, University of Amsterdam, Science Park 904, 1098 XH Amsterdam - Netherlands);Systems and Modeling Research Unit (Institute Montefiore (B28, P32) Grande Traverse, 10 Sart-Tilman B-4000 Liège, Belgium. - Belgium);Kobe University (Japan);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);Autonomous Intelligent Systems Group (Rheinische Friedrich-Wilhelms-Universität Bonn Institut für Informatik VI Friedrich-Ebert-Allee 144 53113 Bonn - Germany);Chercheur indépendant (France);Computational Linguistics (Romania);Ciuvo GMBH (Austria);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);LTCI - Laboratoire Traitement et Communication de l'Information (46 rue Barrault F-75634 Paris Cedex 13 - France);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France);Science Information Technology and Engineering (Suite 9, Greenhill Enterprise Centre Ballarat 3350 - Australia);University of Washington [Seattle] (Seattle, Washington 98105 - United States);Systems and Modeling Research Unit (Institute Montefiore (B28, P32) Grande Traverse, 10 Sart-Tilman B-4000 Liège, Belgium. - Belgium);Samsung Electronics Research Institute (Communications House, South Street Staines, Middlesex TW18 4QE - United Kingdom);PARIETAL - Modelling brain structure, function and variability based on high-field MRI data (Neurospin, CEA Saclay, Bâtiment 145, 91191 Gif-sur-Yvette Cedex - France)",,"Scikit-learn is an increasingly popular machine learning li- brary. Written in Python, it is designed to be simple and efficient, accessible to non-experts, and reusable in various contexts. In this paper, we present and discuss our design choices for the application programming interface (API) of the project. In particular, we describe the simple and elegant interface shared by all learning and processing units in the library and then discuss its advantages in terms of composition and reusability. The paper also comments on implementation details specific to the Python ecosystem and analyzes obstacles faced by users and developers of the library.",,,,,Python (programming language);Computer science;Reusability;Software engineering;Application programming interface;Software;Programming language;Interface (matter);Operating system,NL;BE;JP;FR;DE;RO;US;GB,"Chang C, 2011, ACM TRANSACTIONS ON INTELLIGENT SYSTEMS AND TECHNOLOGY;Hunter J, 2007, COMPUTING IN SCIENCE & ENGINEERING;Hall M, 2009, ACM SIGKDD EXPLORATIONS NEWSLETTER;Walt S, 2011, COMPUTING IN SCIENCE & ENGINEERING;Bergstra J, 2012, ;Pérez F, 2007, COMPUTING IN SCIENCE & ENGINEERING;Rek R, 2010, ;Dagum L, 1998, IEEE COMPUTATIONAL SCIENCE AND ENGINEERING;Behnel S, 2010, COMPUTING IN SCIENCE & ENGINEERING;Gansner E, 2000, SOFTWARE PRACTICE AND EXPERIENCE;Weinberger K, 2009, ;Demšar J, 2004, LECTURE NOTES IN COMPUTER SCIENCE;VanderPlas J, 2012, ;Guazzelli A, 2009, THE R JOURNAL;Blondel M, 2013, MACHINE LEARNING;Seibel P, 2009, ;Pedregosa F, 2012, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"Buitinck L, 2013, ARXIV (CORNELL UNIVERSITY)","Buitinck L, 2013, ARXIV (CORNELL UNIVERSITY)" +https://openalex.org/W2165698076,10.1109/tkde.2009.191,A Survey on Transfer Learning,2009,en,article,23129,IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING,IEEE Transactions on Knowledge and Data Engineering,Sinno Jialin Pan;Qiang Yang,Sinno Jialin Pan;Qiang Yang,"Department of Computer Science and Engineering, Hong Kong University of Science and Technology, Hong Kong, China;Department of Computer Science and Engineering, Hong Kong University of Science and Technology, Hong Kong, China",,"A major assumption in many machine learning and data mining algorithms is that the training and future data must be in the same feature space and have the same distribution. However, in many real-world applications, this assumption may not hold. For example, we sometimes have a classification task in one domain of interest, but we only have sufficient training data in another domain of interest, where the latter data may be in a different feature space or follow a different data distribution. In such cases, knowledge transfer, if done successfully, would greatly improve the performance of learning by avoiding much expensive data-labeling efforts. In recent years, transfer learning has emerged as a new learning framework to address this problem. This survey focuses on categorizing and reviewing the current progress on transfer learning for classification, regression, and clustering problems. In this survey, we discuss the relationship between transfer learning and other related machine learning techniques such as domain adaptation, multitask learning and sample selection bias, as well as covariate shift. We also explore some potential future issues in transfer learning research.",22,10,1345,1359,Transfer of learning;Computer science;Inductive transfer;Machine learning;Artificial intelligence;Multi-task learning;Cluster analysis;Semi-supervised learning;Feature (linguistics);Domain (mathematical analysis);Instance-based learning;Online machine learning;Feature vector;Task (project management);Robot learning,HK,"Blum A, 1998, ;Nigam K, 2000, MACHINE LEARNING;Joachims T, 1999, ;Richardson M, 2006, MACHINE LEARNING;Lee H, 2007, THE MIT PRESS EBOOKS;Dai W, 2007, ;Shimodaira H, 2000, JOURNAL OF STATISTICAL PLANNING AND INFERENCE;Huang J, 2007, THE MIT PRESS EBOOKS;Raina R, 2007, ;Yang Q, 2006, INTERNATIONAL JOURNAL OF INFORMATION TECHNOLOGY & DECISION MAKING;Bonilla E, 2007, EDINBURGH RESEARCH EXPLORER;Daumé H, 2006, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Zadrozny B, 2004, ;Jiang J, 2007, INSTITUTIONAL KNOWLEDGE (INK) - INSTITUTIONAL KNOWLEDGE AT SINGAPORE MANAGEMENT UNIVERSITY (SINGAPORE MANAGEMENT UNIVERSITY);Sugiyama M, 2007, ;Pan S, 2008, RARE & SPECIAL E-ZONE (THE HONG KONG UNIVERSITY OF SCIENCE AND TECHNOLOGY);Ben-David S, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Bickel S, 2007, ;Blitzer J, 2007, ;Li B, 2009, ;Li B, 2009, ;Lawrence N, 2004, ;Dai W, 2007, ;Gao J, 2008, ;Dai W, 2007, ;Wu P, 2004, ;Dai W, 2008, ;Jebara T, 2004, ;Arnold A, 2007, ;Liao X, 2005, ;Xue G, 2008, ;Yin J, 2005, ;Kuncheva L, 2007, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Pan S, 2010, IEEE TRANSACTIONS ON NEURAL NETWORKS;Shi X, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Xiao L, 2008, ;Xiao L, 2008, ;Pan S, 2007, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Yang Q, 2008, IEEE INTELLIGENT SYSTEMS;Mihalkova L, 2008, ;Wang Z, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Yin X, 2006, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Argyriou A, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Ramon J, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Eaton E, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Pan S, 2008, ;Zhu X, 2006, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Zhuo H, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Daumé H, 2009, ARXIV.ORG;Wu Y, 1999, TECHNOMETRICS;Caruana R, 1997, MACHINE LEARNING;Wu X, 2007, KNOWLEDGE AND INFORMATION SYSTEMS;Zhu X, 2005, MINDS AT UW (UNIVERSITY OF WISCONSIN);Blitzer J, 2007, ;Ben-David S, 2007, THE MIT PRESS EBOOKS;Blitzer J, 2006, ;, 2008, THE MIT PRESS EBOOKS;Evgeniou T, 2004, ;Argyriou A, 2007, THE MIT PRESS EBOOKS;Caruana R, 1998, ;Daum H, 2007, ;Huang J, 2007, ANU OPEN RESEARCH (AUSTRALIAN NATIONAL UNIVERSITY);Rosenstein M, 2005, ;Raina R, 2006, ;Mihalkova L, 2007, ;Wang C, 2008, ;Davis J, 2009, ;Fung G, 2006, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Ando R, 2005, ;Argyriou A, 2007, ;Taylor M, 2007, ;Arnold A, 2007, ;Schwaighofer A, 2004, ;Lee S, 2007, ;Raykar V, 2008, ;Zheng V, 2008, ;Zheng V, 2008, ;Mahmud M, 2007, ILLINOIS DIGITAL ENVIRONMENT FOR ACCESS TO LEARNING AND SCHOLARSHIP (UNIVERSITY OF ILLINOIS AT URBANA-CHAMPAIGN);Baralis E, 2008, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Al-Mubaid H, 2006, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Pan S, 2008, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Kuhlmann G, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Yang Q, 2006, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Sarinnapakorn K, 2007, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING;Fan W, 2006, ;Xing D, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Rückert U, 2008, LECTURE NOTES IN COMPUTER SCIENCE;Ramachandran S, 1998, ;Fabio E, 2004, ITALICA",,,OPENALEX,"Pan S, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING","Pan S, 2009, IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING" +https://openalex.org/W2125908420,10.1007/s10994-010-5188-5,The security of machine learning,2010,en,article,842,MACHINE LEARNING,Machine Learning,Marco Barreno;Blaine Nelson;Anthony D. Joseph;J. D. Tygar,Marco Barreno;Blaine Nelson;Anthony D. Joseph;J. D. Tygar,"Computer Science Division, University of California, Berkeley, CA, 94720-1776, USA;Computer Science Division, University of California, Berkeley, USA 94720-1776#TAB#;Computer Science Division, University of California, Berkeley, CA, 94720-1776, USA;Computer Science Division, University of California, Berkeley, USA 94720-1776#TAB#;Computer Science Division, University of California, Berkeley, CA, 94720-1776, USA;Computer Science Division, University of California, Berkeley, USA 94720-1776#TAB#;Computer Science Division, University of California, Berkeley, CA, 94720-1776, USA;Computer Science Division, University of California, Berkeley, USA 94720-1776#TAB#","Marco Barreno (corresponding author), Computer Science Division, University of California, Berkeley, CA, 94720-1776, USA; Computer Science Division, University of California, Berkeley, USA 94720-1776#TAB#","Machine learning’s ability to rapidly evolve to changing and complex situations has helped it become a fundamental tool for computer security. That adaptability is also a vulnerability: attackers can exploit machine learning systems. We present a taxonomy identifying and analyzing attacks against machine learning systems. We show how these classes influence the costs for the attacker and defender, and we give a formal structure defining their interaction. We use our framework to survey and analyze the literature of attacks against machine learning systems. We also illustrate our taxonomy by showing how it can guide attacks against SpamBayes, a popular statistical spam filter. Finally, we discuss how our taxonomy suggests new lines of defenses.",81,2,121,148,Computer science;Adaptability;Exploit;Taxonomy (biology);Adversarial machine learning;Machine learning;Vulnerability (computing);Artificial intelligence;Computer security;Deep learning,US,"Shannon C, 1948, BELL SYSTEM TECHNICAL JOURNAL;Huber P, 1981, WILEY SERIES IN PROBABILITY AND STATISTICS;Valiant L, 1984, ;Ruppert D, 1987, TECHNOMETRICS;, 1947, THE AMERICAN STATISTICIAN;Valiant L, 1984, COMMUNICATIONS OF THE ACM;Cesa‐Bianchi N, 2006, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Jennison C, 1987, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (GENERAL);Maronna R, 2006, ;Maronna R, 2006, WILEY SERIES IN PROBABILITY AND STATISTICS;Dalvi N, 2004, ;Barreno M, 2006, ;Moore D, 2006, ACM TRANSACTIONS ON COMPUTER SYSTEMS;Lowd D, 2005, ;Wagner D, 2002, ;Newsome J, 2005, ;Klimt B, 2004, CONFERENCE ON EMAIL AND ANTI-SPAM;Mahoney M, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Wagner D, 2004, ;Kearns M, 1993, SIAM JOURNAL ON COMPUTING;Nelson B, 2008, EDINBURGH RESEARCH EXPLORER (UNIVERSITY OF EDINBURGH);Globerson A, 2006, ;Wang K, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Lowd D, 2005, CONFERENCE ON EMAIL AND ANTI-SPAM;Newsome J, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Wittel G, 2004, ;Valiant L, 1985, ;Mosteller F, 1948, THE AMERICAN STATISTICIAN;Fogla P, 2006, ;Christmann A, 2003, TECHNISCHE UNIVERSITÄT DORTMUND ELDORADO (TECHNISCHE UNIVERSITÄT DORTMUND);Tan K, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Chung S, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Tan K, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Dredze M, 2007, CONFERENCE ON EMAIL AND ANTI-SPAM;Tan K, 2005, ;Meyer T, 2004, CONFERENCE ON EMAIL AND ANTI-SPAM;Robinson G, 2003, LINUX JOURNAL;Wang Z, 2007, ;Sculley D, 2006, ;Cormack G, 2005, ;Kim H, 2004, ;Chung S, 2007, LECTURE NOTES IN COMPUTER SCIENCE;Kolmogorov A, 2010, KLUWER ACADEMIC EBOOKS;Shiryayev A, 2010, ;Abu‐Mostafa Y, 1988, ;Yao A, 1998, ;Moore D, 2001, ",,,OPENALEX,"Barreno M, 2010, MACHINE LEARNING","Barreno M, 2010, MACHINE LEARNING" +https://openalex.org/W1566376227,10.1109/anziis.1994.396988,WEKA: a machine learning workbench,2002,en,article,936,,,Geoffrey Holmes;A. Donkin;Ian H. Witten,G. Holmes;A. Donkin;I.H. Witten,"Department of Computer Science, University of Waikato, Hamilton, New Zealand;Department of Computer Science, Waikato University, Hamilton, New Zealand;Department of Computer Science, University of Waikato, Hamilton, New Zealand;Department of Computer Science, Waikato University, Hamilton, New Zealand;Department of Computer Science, University of Waikato, Hamilton, New Zealand;Department of Computer Science, Waikato University, Hamilton, New Zealand",,"WEKA is a workbench for machine learning that is intended to aid in the application of machine learning techniques to a variety of real-world problems, in particular, those arising from agricultural and horticultural domains. Unlike other machine learning projects, the emphasis is on providing a working environment for the domain specialist rather than the machine learning expert. Lessons learned include the necessity of providing a wealth of interactive tools for data manipulation, result visualization, database linkage, and cross-validation and comparison of rule sets, to complement the basic machine learning tools.< >",,,357,361,Workbench;Computer science;Machine learning;Complement (music);Artificial intelligence;Domain (mathematical analysis);Visualization;Linkage (software);Variety (cybernetics);Software engineering,NZ,"Quinlan J, 1992, ;Salzberg S, 1994, ;Fisher D, 1987, MACHINE LEARNING;Fisher D, 1987, MACHINE LEARNING;Cendrowska J, 1987, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Quinlan J, 1993, LECTURE NOTES IN COMPUTER SCIENCE;Cheeseman P, 1988, ELSEVIER EBOOKS;Tierney L, 1990, WILEY SERIES IN PROBABILITY AND STATISTICS;Kohavi R, 2002, ;Murthy S, 1993, ;Porter B, 1990, ARTIFICIAL INTELLIGENCE;Ousterhout J, 1994, ADDISON-WESLEY EBOOKS;Mooney R, 1995, MACHINE LEARNING;Mooney R, 1995, MACHINE LEARNING;Monk T, 1994, ;Nilsson N, 1995, ",,,OPENALEX,"Holmes G, 2002, ","Holmes G, 2002, " +https://openalex.org/W2098740506,10.1093/bib/bbk007,Machine learning in bioinformatics,2006,en,review,865,BRIEFINGS IN BIOINFORMATICS,Briefings in Bioinformatics,Pedro Larrañaga;Borja Calvo;Roberto Santana;Concha Bielza;Josu Galdiano;Iñaki Inza;José A. Lozano;Rubén Armañanzas;Guzmán Santafé;Aritz Pérez;Vı́ctor Robles,Pedro Larrañaga;Borja Calvo;Roberto Santana;Concha Bielza;Josu Galdiano;Iñaki Inza;José A. Lozano;Rubén Armañanzas;Guzmán Santafé;Aritz Pérez;Victor Robles,"Borja Calvo received MS in Biochemistry in 1999 and Bachelor degree in Computer Science in 2004, both from the University of the Basque Country. Currently he is a PhD student at the University of the Basque Country and a member of the Intelligent Systems Group. His research interests include machine learning methods applied to bioinformatics;Roberto Santana received PhD in Mathematics from the University of Havana in 2005. At present, he is at the University of the Basque Country as a member of the Intelligent Systems Group. His research interests include estimation of distribution algorithms and bioinformatics;Concha Bielza received her MS degree in Mathematics in 1989 from Complutense University, Madrid, and PhD in Computer Science in 1996 from Technical University of Madrid, Madrid. She is an Associate Professor of Statistics and Operation Research in the School of Computer Science at Madrid Technical University. Her research interests are primarily in the areas of probabilistic graphical models, dec;Josu Galdiano is currently doing his MS in Computer Science at the University of the Basque Country. His research interests include machine learning methods applied to bioinformatics;Iñaki Inza is a Lecturer at the Intelligent Systems Group of the University of the Basque Country. His research interests include data mining and search heuristics in general, with special focus on probabilistic graphical models and bioinformatic applications;José A. Lozano received his BS degrees in Mathematics and Computer Science and the PhD degree from the University of the Basque Country, Spain in 1991, 1992 and 1998, respectively. Since 1999, he has been an Associate Professor of Computer Science at the University of the Basque Country. He has edited three books and has published over 25 refereed journal papers. His main research interests are e;Rubén Armañanzas received his MS in Computer Science from the University of the Basque Country in 2004. At present, he is a PhD student and member of the Intelligent Systems Group. His research interests include feature selection, computational biology and bioinformatics;Guzmán Santafé received his MS in Computer Science from the University of the Basque Country in 2002. At present, he is a PhD student at the University of the Basque Country and member of the Intelligent Systems Group. His research interests include machine learning techniques applied to bioinformatics;Aritz Pérez received her Computer Science degree from the University of the Basque Country. He is currently pursuing PhD in Computer Science in the Department of Computer Science and Artificial Intelligence. His research interests include machine learning, data mining and bioinformatics. Currently, he is working on supervised classification using Bayesian networks, variable selection and density;Victor Robles received the MS degree in Computer Engineering and PhD from the Universidad Politécnica de Madrid, in 1998 and 2003, respectively. During 2004, he was a postdoctoral researcher at Harvard Medical School. He is currently an associate professor in the Department of Computer Systems Architecture and Technology at the Universidad Politécnica de Madrid. His research interests include bio;Pedro Larrañaga is Professor of Computer Science and Artificial Intelligence at the University of the Basque Country. He received MS degree in mathematics from the University of Valladolid in 1981, and PhD in computer science from the University of the Basque Country in 1995. He has published over 40 refereed journal papers. His main research interests are in the areas of evolutionary computation",,"This article reviews machine learning methods for bioinformatics. It presents modelling methods, such as supervised classification, clustering and probabilistic graphical models for knowledge discovery, as well as deterministic and stochastic heuristics for optimization. Applications in genomics, proteomics, systems biology, evolution and text mining are also shown.",7,1,86,112,Computer science;Heuristics;Cluster analysis;Graphical model;Artificial intelligence;Machine learning;Probabilistic logic;Genomics;Bioinformatics;Data mining;Biology;Genome,ES;US;FR;PR,"Rumelhart D, 1986, NATURE;Hastie T, 2001, SPRINGER SERIES IN STATISTICS;Goldberg D, 1988, ;Koza J, 1992, MEDICAL ENTOMOLOGY AND ZOOLOGY;Wolpert D, 1992, NEURAL NETWORKS;Gersho A, 1992, ;Schölkopf B, 1999, ;Salzberg S, 1994, ;Devroye L, 1996, STOCHASTIC MODELLING AND APPLIED PROBABILITY;Vapnik V, 1995, MEDICAL ENTOMOLOGY AND ZOOLOGY;Liu H, 1998, ;Forgy E, 1965, BIOMETRICS;Larraanaga P, 2001, KLUWER ACADEMIC PUBLISHERS EBOOKS;Cowell R, 1999, INFORMATION SCIENCE AND STATISTICS;Huber P, 1997, SPRINGER SERIES IN STATISTICS;Dawid A, 1979, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Narendra, 1977, IEEE TRANSACTIONS ON COMPUTERS;Vert J, 2004, THE MIT PRESS EBOOKS;Xing E, 2001, ;Pevzner P, 2000, THE MIT PRESS EBOOKS;, 2001, THE MIT PRESS EBOOKS;Ananiadou S, 2005, ;Sahami M, 1996, ;Murphy K, 2006, ;Spirtes P, 2018, FIGSHARE;Pazzani M, 1996, LECTURE NOTES IN STATISTICS;Fogel G, 2007, JOURNAL OF INTELLIGENT & FUZZY SYSTEMS;Jiang T, 2002, ;Husmeier D, 2005, ;Ben-Bassat M, 1982, HANDBOOK OF STATISTICS;Istrail S, 2000, MEDICAL ENTOMOLOGY AND ZOOLOGY;Inza I, 2002, JOURNAL OF INTELLIGENT & FUZZY SYSTEMS;Hwang K, 2002, ;Smith J, 2006, STUDIES IN FUZZINESS AND SOFT COMPUTING;Fogel G, 2003, ELSEVIER EBOOKS;Hong Y, 2003, BMC BIOINFORMATICS;1967- S, 2005, STUDIES IN FUZZINESS AND SOFT COMPUTING;, 2002, BIOTECH SOFTWARE & INTERNET REPORT;Nguyen H, 2002, PUBMED;Santana R, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Ando S, 2003, ;Frasconi P, 2003, MEDICAL ENTOMOLOGY AND ZOOLOGY;Lamont G, 2003, ELSEVIER EBOOKS;Baldi P, 1998, ;Sheng Q, 2005, ;Larrañaga P, 2005, ;Bhandarkar S, 2003, ;Husmeier D, 2005, ADVANCED INFORMATION AND KNOWLEDGE PROCESSING;Shi W, 2005, ;Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Kirkpatrick S, 1983, SCIENCE;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;McCulloch W, 1943, BULLETIN OF MATHEMATICAL BIOLOGY;Fisher R, 1936, ANNALS OF EUGENICS;Green D, 1966, ;Kohavi R, 1997, ARTIFICIAL INTELLIGENCE;Friedman N, 1997, MACHINE LEARNING;Jensen F, 2007, INFORMATION SCIENCE AND STATISTICS;Durbin R, 1998, ;Pudil P, 1994, PATTERN RECOGNITION LETTERS;Dudoit S, 2002, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Fix E, 1989, INTERNATIONAL STATISTICAL REVIEW;Fiser A, 2000, PROTEIN SCIENCE;Efron B, 1983, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Cooper G, 1990, ARTIFICIAL INTELLIGENCE;White B, 1963, THE AMERICAN JOURNAL OF PSYCHOLOGY;Orbach J, 1962, ARCHIVES OF GENERAL PSYCHIATRY;Leunissen J, 2002, BRIEFINGS IN BIOINFORMATICS;Wang Y, 2005, COMPUTATIONAL BIOLOGY AND CHEMISTRY;Vagis A, 2002, JOURNAL OF AUTOMATION AND INFORMATION SCIENCES;Shachter R, 1989, MANAGEMENT SCIENCE;Inza I, 2000, ARTIFICIAL INTELLIGENCE;Ritchie M, 2003, BMC BIOINFORMATICS;Tufféry P, 1991, JOURNAL OF BIOMOLECULAR STRUCTURE AND DYNAMICS;Liang F, 2001, THE JOURNAL OF CHEMICAL PHYSICS;Wang R, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Vision T, 2000, GENETICS;Meyer I, 2004, NUCLEIC ACIDS RESEARCH;Kumar S, 1996, MOLECULAR BIOLOGY AND EVOLUTION;Hsu H, 2003, PHYSICAL REVIEW. E, STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS;Ando S, 2002, INFORMATION SCIENCES;Böhning D, 2003, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Sherlock G, 2001, BRIEFINGS IN BIOINFORMATICS;Cawley S, 2003, BIOINFORMATICS;Liew A, 2005, ;Kim K, 2004, NEUROCOMPUTING;Riaz T, 2004, ASIA-PACIFIC BIOINFORMATICS CONFERENCE;Greenspan G, 2004, BIOINFORMATICS;Christof T, 1997, JOURNAL OF COMPUTATIONAL BIOLOGY;Błażewicz J, 2005, ARTIFICIAL INTELLIGENCE IN MEDICINE;Błażewicz J, 2004, COMPUTATIONAL BIOLOGY AND CHEMISTRY;Liu Z, 2002, PROTEINS STRUCTURE FUNCTION AND BIOINFORMATICS;Koehl P, 1998, THE JOURNAL OF CHEMICAL PHYSICS;Moreira A, 2004, THEORETICAL COMPUTER SCIENCE;, 2011, SPRINGERREFERENCE;Wren J, 2004, DNA AND CELL BIOLOGY;Błażewicz J, 2005, LECTURE NOTES IN COMPUTER SCIENCE;Metropolis N, 1953, THE JOURNAL OF CHEMICAL PHYSICS;Guindon S, 2003, SYSTEMATIC BIOLOGY;Spellman P, 1998, MOLECULAR BIOLOGY OF THE CELL;Thompson M, 1974, LEONARDO;McNemar Q, 1947, PSYCHOMETRIKA;Glover F, 1986, COMPUTERS & OPERATIONS RESEARCH;Brown M, 2000, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Baldi P, 2000, BIOINFORMATICS;Krogh A, 1994, JOURNAL OF MOLECULAR BIOLOGY;Ambroise C, 2002, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Sammut C, 2017, ENCYCLOPEDIA OF MACHINE LEARNING AND DATA MINING;Michiels S, 2005, THE LANCET;Braga-Neto U, 2004, BIOINFORMATICS;Pe’er D, 2001, BIOINFORMATICS;Husmeier D, 2003, BIOINFORMATICS;Hartemink A, 2000, ;Yeang C, 2001, BIOINFORMATICS;Kimura S, 2004, BIOINFORMATICS;Bockhorst J, 2003, BIOINFORMATICS;Salamov A, 1995, JOURNAL OF MOLECULAR BIOLOGY;Looger L, 2001, JOURNAL OF MOLECULAR BIOLOGY;Bao L, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Valafar F, 2002, ANNALS OF THE NEW YORK ACADEMY OF SCIENCES;Yan C, 2004, BIOINFORMATICS;Kleinbaum D, 1982, COMMUNICATION IN STATISTICS- THEORY AND METHODS;Sebban M, 2002, BIOINFORMATICS;Nariai N, 2003, ;Hautaniemi S, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Saeys Y, 2004, BMC BIOINFORMATICS;Wit E, 2005, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C (APPLIED STATISTICS);Yi T, 1993, JOURNAL OF MOLECULAR BIOLOGY;Koza J, 2000, ;Ellrott K, 2002, BIOINFORMATICS;Won K, 2004, BIOINFORMATICS;Błażewicz J, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Durbin B, 2004, BIOINFORMATICS;Schneider T, 1996, DISCRETE APPLIED MATHEMATICS;Krishnapuram B, 2004, JOURNAL OF COMPUTATIONAL BIOLOGY;Neuwald A, 2004, BMC BIOINFORMATICS;Yang J, 2002, PROTEIN SCIENCE;Stapley B, 2001, ;Fogel G, 2002, NUCLEIC ACIDS RESEARCH;Kuncheva L, 1993, INFORMATION PROCESSING LETTERS;Gilman A, 1995, BIOPHYSICAL JOURNAL;Jung H, 2002, BIOINFORMATICS;Li H, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Robles J, 2004, BIOINFORMATICS;Percus A, 1999, SYMPOSIUM ON DISCRETE ALGORITHMS;Brown D, 2000, ;Ling C, 2005, IEEE/ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS;Quinlan J, 1992, ;Efron B, 1979, THE ANNALS OF STATISTICS;Stone M, 1974, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Linde Y, 1980, IEEE TRANSACTIONS ON COMMUNICATIONS;Bagui S, 2005, TECHNOMETRICS;Tamayo P, 1999, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Hawkins D, 1993, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Friedman N, 2004, SCIENCE;Nadeau C, 2003, MACHINE LEARNING;Statnikov A, 2004, BIOINFORMATICS;Schäfer J, 2004, BIOINFORMATICS;Yeung K, 2001, BIOINFORMATICS;Li L, 2001, BIOINFORMATICS;Pan W, 2002, BIOINFORMATICS;Wu B, 2003, BIOINFORMATICS;Inza I, 2004, ARTIFICIAL INTELLIGENCE IN MEDICINE;Tan A, 2003, PUBMED;Knudsen S, 1999, BIOINFORMATICS;Troyanskaya O, 2002, BIOINFORMATICS;López-Bigas N, 2004, NUCLEIC ACIDS RESEARCH;Huang Y, 2003, BIOINFORMATICS;Steffen M, 2002, BMC BIOINFORMATICS;Sakamoto E, 2002, ;Perner P, 2002, ARTIFICIAL INTELLIGENCE IN MEDICINE;Tamada Y, 2003, BIOINFORMATICS;Krallinger M, 2005, DRUG DISCOVERY TODAY;Hirosawa M, 1995, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Degroeve S, 2002, BIOINFORMATICS;Cai D, 2000, BIOINFORMATICS;Bryan K, 2005, ;Middendorf M, 2004, BIOINFORMATICS;Peña J, 2005, BIOINFORMATICS;Satten G, 2004, BIOINFORMATICS;Salzberg S, 1995, JOURNAL OF COMPUTATIONAL BIOLOGY;Wu J, 2004, BIOINFORMATICS;Husmeier D, 2003, BIOCHEMICAL SOCIETY TRANSACTIONS;Selbig J, 1999, BIOINFORMATICS;Pavlović V, 2002, BIOINFORMATICS;Ishikawa M, 1993, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Glick M, 2002, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Wu X, 2003, ;Endo T, 2004, BIOINFORMATICS;Lee P, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Aerts S, 2004, BIOINFORMATICS;Kim S, 2003, BIOINFORMATICS;Smith P, 1998, ;Olshen A, 2002, BIOINFORMATICS;Huang J, 2003, BIOINFORMATICS;Larrañaga P, 2004, ARTIFICIAL INTELLIGENCE IN MEDICINE;Allen J, 2004, GENOME RESEARCH;Vapnik V, 1995, ;Pearl J, 1988, ;Wolpert D, 1997, IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION;Zuker M, 2003, NUCLEIC ACIDS RESEARCH;Fukunaga K, 1990, ELSEVIER EBOOKS;Bradley A, 1997, PATTERN RECOGNITION;Dietterich T, 1998, NEURAL COMPUTATION;Chow C, 1968, IEEE TRANSACTIONS ON INFORMATION THEORY;Herrero J, 2001, BIOINFORMATICS;Mathé C, 2002, NUCLEIC ACIDS RESEARCH;Geiger D, 1994, ELSEVIER EBOOKS;Alpaydm E, 1999, NEURAL COMPUTATION;Kikuchi S, 2003, BIOINFORMATICS;Lee J, 2004, COMPUTATIONAL STATISTICS & DATA ANALYSIS;Ooi C, 2002, BIOINFORMATICS;Segal E, 2001, BIOINFORMATICS;Sheng Q, 2003, BIOINFORMATICS;Kittler J, 1978, ;Jarvis R, 2004, BIOINFORMATICS;Lesh N, 2003, ;Barker D, 2004, BIOINFORMATICS;Shmulevich I, 2002, BIOINFORMATICS;Ong I, 2002, BIOINFORMATICS;Nachman I, 2004, BIOINFORMATICS;Carter R, 2001, NUCLEIC ACIDS RESEARCH;Smet F, 2002, BIOINFORMATICS;Lukashin A, 2001, BIOINFORMATICS;Pollastri G, 2002, BIOINFORMATICS;Hsu H, 2002, THE JOURNAL OF CHEMICAL PHYSICS;Fu W, 2005, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Li L, 2004, BIOINFORMATICS;Zhou G, 2005, BMC BIOINFORMATICS;Imoto S, 2004, ;Baumgärtner C, 2004, BIOINFORMATICS;Noman N, 2005, ;Saeys Y, 2003, BIOINFORMATICS;MacCallum R, 2004, BIOINFORMATICS;Jacob E, 2004, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Sima C, 2004, BIOINFORMATICS;Keith J, 2002, BIOINFORMATICS;Imoto S, 2002, PROCEEDINGS GENOME INFORMATICS WORKSHOP/GENOME INFORMATICS;Castelo R, 2004, BIOINFORMATICS;Raval A, 2002, BIOINFORMATICS;Ribeiro C, 2005, INTERNATIONAL TRANSACTIONS IN OPERATIONAL RESEARCH;Jojic V, 2004, BIOINFORMATICS;Sugimoto N, 2004, PUBMED;Hoon M, 2004, BIOINFORMATICS;Moreau Y, 2002, PROCEEDINGS OF THE IEEE;Leone M, 2004, BIOINFORMATICS;Kim J, 1996, COMPUTER APPLICATIONS IN THE BIOSCIENCES;Breiman L, 2001, MACHINE LEARNING;, 1989, CHOICE REVIEWS ONLINE;Seel N, 2012, ;Breiman L, 1996, MACHINE LEARNING;Breiman L, 1996, MACHINE LEARNING;Duda R, 1973, ;, 2002, THE MIT PRESS EBOOKS;Ingleby J, 1967, JOURNAL OF SOUND AND VIBRATION;Hitchcock C, 2001, THE PHILOSOPHICAL REVIEW;Abe S, 2001, ;Friedman N, 2000, JOURNAL OF COMPUTATIONAL BIOLOGY;Durbin R, 1998, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Kuncheva L, 2004, ;, 2000, CHOICE REVIEWS ONLINE;Lauritzen S, 1996, ;, 2001, ;Jensen F, 2001, ;Webb A, 2002, ;Pedro L, 2002, GENETIC ALGORITHMS AND EVOLUTIONARY COMPUTATION;McLachlan G, 1992, WILEY SERIES IN PROBABILITY AND STATISTICS;Ben‐Dor A, 2000, JOURNAL OF COMPUTATIONAL BIOLOGY;Jardine N, 1974, JOURNAL OF MARKETING RESEARCH;Granada P, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Larrañaga P, 1996, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS - PART A SYSTEMS AND HUMANS;Chen T, 2001, JOURNAL OF COMPUTATIONAL BIOLOGY;Geary D, 1989, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);, 2003, ELSEVIER EBOOKS;Lee C, 1991, JOURNAL OF MOLECULAR BIOLOGY;Park L, 1997, MEDICAL & BIOLOGICAL ENGINEERING & COMPUTING;Au T, 2003, ;Fávero L, 2023, ELSEVIER EBOOKS;Maeyer M, 2003, HUMANA PRESS EBOOKS;Chang J, 2005, KLUWER ACADEMIC PUBLISHERS EBOOKS;Ritchie M, 2019, CAROLINA DIGITAL REPOSITORY (UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL);, 2000, ;Falkenauer E, 2003, ELSEVIER EBOOKS;Fix E, 1951, PSYCEXTRA DATASET;Chickering D, 2013, ARXIV (CORNELL UNIVERSITY);Ashlock D, 2003, ELSEVIER EBOOKS",,,OPENALEX,"Larrañaga P, 2006, BRIEFINGS IN BIOINFORMATICS","Larrañaga P, 2006, BRIEFINGS IN BIOINFORMATICS" +https://openalex.org/W4232533539,10.1007/978-981-15-1967-3,Machine Learning,2021,en,book,767,,,Zhi‐Hua Zhou,Zhi-Hua Zhou,"Nanjing University, Nanjing, China","Zhi-Hua Zhou (corresponding author), Nanjing University, Nanjing, China",,,,,,Unsupervised learning;Computer science;Artificial intelligence;Semi-supervised learning;Machine learning;Active learning (machine learning);Reinforcement learning;Algorithmic learning theory;Supervised learning;Error-driven learning;Artificial neural network,CN,"Zhou Z, 2021, MACHINE LEARNING;Zhou Z, 2021, MACHINE LEARNING",,,OPENALEX,"Zhou Z, 2021, ","Zhou Z, 2021, " +https://openalex.org/W3025104221,10.1038/s41586-020-2242-8,Accelerated discovery of CO2 electrocatalysts using active machine learning,2020,en,article,1498,NATURE,Nature,Miao Zhong;Kevin Tran;Yimeng Min;Chuanhao Wang;Ziyun Wang;Cao‐Thang Dinh;Phil De Luna;Zongqian Yu;Armin Sedighian Rasouli;Peter Brodersen;Song Sun;Oleksandr Voznyy;Chih‐Shan Tan;Mikhail Askerka;Fanglin Che;Min Liu;Ali Seifitokaldani;Yuanjie Pang;Shen-Chuan Lo;Alexander H. Ip;Zachary W. Ulissi;Edward H. Sargent,Miao Zhong;Kevin Tran;Yimeng Min;Chuanhao Wang;Ziyun Wang;Cao-Thang Dinh;Phil De Luna;Zongqian Yu;Armin Sedighian Rasouli;Peter Brodersen;Song Sun;Oleksandr Voznyy;Chih-Shan Tan;Mikhail Askerka;Fanglin Che;Min Liu;Ali Seifitokaldani;Yuanjie Pang;Shen-Chuan Lo;Alexander Ip;Zachary Ulissi;Edward H. Sargent,"College of Engineering and Applied Sciences, National Laboratory of Solid State Microstructures, Collaborative Innovation Center of Advanced Microstructure, Nanjing University, Nanjing, China;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Materials Science Engineering, University of Toronto, Toronto, Ontario, Canada;National Research Council of Canada, Ottawa, Ontario, Canada;Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Ontario Centre for Characterization of Advanced Materials (OCCAM), University of Toronto, Toronto, Ontario, Canada;National Synchrotron Radiation Laboratory, University of Science and Technology of China, Hefei, China;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Industrial Technology Research Institute, Material and Chemical Research Laboratories, Hsinchu, Taiwan;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada;Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA. zulissi@andrew.cmu.edu;Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada. ted.sargent@utoronto.ca;Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada","Edward H. Sargent (corresponding author), Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada. ted.sargent@utoronto.ca; Department of Electrical and Computer Engineering, University of Toronto, Toronto, Ontario, Canada",,581,7807,178,183,Faraday efficiency;Renewable energy;Electrocatalyst;Materials science;Ethylene glycol;Electrochemistry;Fossil fuel;Ethylene;Electrode;Nanotechnology;Chemical engineering;Chemistry;Catalysis;Electrical engineering;Organic chemistry,CA;CN;US;TW,"Jain A, 2013, APL MATERIALS;Seh Z, 2017, SCIENCE;Nørskov J, 2005, JOURNAL OF THE ELECTROCHEMICAL SOCIETY;Peterson A, 2010, ENERGY & ENVIRONMENTAL SCIENCE;Luna P, 2019, SCIENCE;Lin S, 2015, SCIENCE;Dinh C, 2018, SCIENCE;Maaten L, 2014, ;Kortlever R, 2015, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Liu M, 2016, NATURE;Gao S, 2016, NATURE;Li C, 2014, NATURE;Lu Q, 2014, NATURE COMMUNICATIONS;Whipple D, 2010, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Mistry H, 2016, NATURE COMMUNICATIONS;Luna P, 2018, NATURE CATALYSIS;Tran K, 2018, NATURE CATALYSIS;Liu X, 2017, NATURE COMMUNICATIONS;Hori Y, 2001, THE JOURNAL OF PHYSICAL CHEMISTRY B;Xiao H, 2017, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Schreier M, 2017, NATURE ENERGY;Wang L, 2018, ACS CATALYSIS;Li Y, 2017, NANO LETTERS;Xiao H, 2016, JOURNAL OF THE AMERICAN CHEMICAL SOCIETY;Lum Y, 2017, THE JOURNAL OF PHYSICAL CHEMISTRY C;Li Y, 2016, ACS ENERGY LETTERS;Zeng Z, 2017, NATURE ENERGY;Jeanty P, 2018, JOURNAL OF CO2 UTILIZATION;Larrazábal G, 2017, THE JOURNAL OF PHYSICAL CHEMISTRY LETTERS;Xiao Q, 2017, PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES;Yano H, 2003, JOURNAL OF ELECTROANALYTICAL CHEMISTRY;Montoya J, 2017, NPJ COMPUTATIONAL MATERIALS;Persson K, 2012, PHYSICAL REVIEW B",,,OPENALEX,"Zhong M, 2020, NATURE","Zhong M, 2020, NATURE" +https://openalex.org/W3118299338,10.38094/jastt1457,A Review on Linear Regression Comprehensive in Machine Learning,2020,en,review,1210,JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS,Journal of Applied Science and Technology Trends,Dastan Hussen Maulud;Adnan Mohsin Abdulazeez,Dastan Maulud;Adnan M. Abdulazeez,"Duhok Polytechnic University, Duhok, Kurdistan Region, Iraq;Presidency of Duhok Polytechnic University, Duhok, Kurdistan Region, Iraq",,"Perhaps one of the most common and comprehensive statistical and machine learning algorithms are linear regression. Linear regression is used to find a linear relationship between one or more predictors. The linear regression has two types: simple regression and multiple regression (MLR). This paper discusses various works by different researchers on linear regression and polynomial regression and compares their performance using the best approach to optimize prediction and precision. Almost all of the articles analyzed in this review is focused on datasets; in order to determine a model's efficiency, it must be correlated with the actual values obtained for the explanatory variables.",1,2,140,147,Proper linear model;Linear regression;Polynomial regression;Regression diagnostic;Regression analysis;Simple linear regression;Linear predictor function;Regression;Bayesian multivariate linear regression;Computer science;Linear model;Machine learning;Local regression;Multivariate adaptive regression splines;Statistics;Artificial intelligence;Cross-sectional regression;Mathematics,IQ,"Murphy K, 2012, ;Domingos P, 2012, COMMUNICATIONS OF THE ACM;Shalev‐Shwartz S, 2015, ;F S, 2013, ;Ober P, 2013, JOURNAL OF APPLIED STATISTICS;Kılıç S, 2013, JOURNAL OF MOOD DISORDERS;Xin Y, 2009, WORLD SCIENTIFIC PUBLISHING CO. PTE. LTD. EBOOKS;Montgomery D, 2007, WILEY-INTERSCIENCE EBOOKS;Wolberg J, 2005, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Kavitha S, 2016, ;Finlayson G, 2015, IEEE TRANSACTIONS ON IMAGE PROCESSING;Acharya M, 2019, ;Zeebaree D, 2019, ;Xue J, 2011, IEEE TRANSACTIONS ON IMAGE PROCESSING;Roopa H, 2019, IEEE ACCESS;Sreehari E, 2018, ;Kafazi I, 2017, ;Niu H, 2018, ;Zhang Z, 2019, ;Gopalakrishnan T, 2018, ;Mohammed N, 2017, ;Bakibayev T, 2018, ;Yang Y, 2018, ;Mohammed N, 2017, ;Lim H, 2019, ;Roziqin M, 2016, ;Akgün B, 2015, ;Luminto, 2017, ;Feng Q, 2015, ;Sarkar M, 2015, ;Chi H, 2015, ;Dehghan M, 2015, ;Chen Y, 2018, ;Sabnis N, 2019, ;Harimurti R, 2018, 2018 INTERNATIONAL CONFERENCE ON INFORMATION AND COMMUNICATIONS TECHNOLOGY (ICOIACT);Feng X, 2017, ;Fujita Y, 2019, IEEE TRANSACTIONS ON MAGNETICS;Peng Z, 2018, ;Wang D, 2017, ;Prasad A, 2015, ;Wang X, 2016, ;Xiong C, 2019, MATHEMATICAL MODELLING AND ENGINEERING PROBLEMS;Kwon S, 2019, ;Maeda T, 2015, ;Wei D, 2018, ;Nnachi G, 2017, ;Zhang R, 2016, ;Jie H, 2019, ;Jackson E, 2018, ;Zebari D, 2020, IEEE ACCESS;Bargarai F, 2020, INTERNATIONAL JOURNAL OF INTERACTIVE MOBILE TECHNOLOGIES (IJIM);Sulaiman M, 2020, INTERNATIONAL JOURNAL OF SUSTAINABLE CONSTRUCTION ENGINEERING AND TECHNOLOGY (UNIVERSITI TUN HUSSEIN ONN MALAYSIA);Abdulazeez A, 2020, INTERNATIONAL JOURNAL OF INTERACTIVE MOBILE TECHNOLOGIES (IJIM);Wu J, 2019, ;Al-Imam A, 2020, MODERN APPLIED SCIENCE;Grondin F, 2020, ",,,OPENALEX,"Maulud D, 2020, JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS","Maulud D, 2020, JOURNAL OF APPLIED SCIENCE AND TECHNOLOGY TRENDS" +https://openalex.org/W2335322260,10.1016/0022-2496(83)90037-8,Machine learning: An artificial intelligence approach,1983,en,article,1375,JOURNAL OF MATHEMATICAL PSYCHOLOGY,Journal of Mathematical Psychology,Earl Hunt,Earl Hunt,,"Earl Hunt (corresponding author), ",,27,4,456,460,Artificial intelligence;Computer science;Machine learning,,,,,OPENALEX,"Hunt E, 1983, JOURNAL OF MATHEMATICAL PSYCHOLOGY","Hunt E, 1983, JOURNAL OF MATHEMATICAL PSYCHOLOGY" +https://openalex.org/W2096352448,10.1007/s10462-007-9052-3,Machine learning: a review of classification and combining techniques,2006,en,review,1542,ARTIFICIAL INTELLIGENCE REVIEW,Artificial Intelligence Review,Sotiris Kotsiantis;Ioannis D. Zaharakis;Panagiotis Pintelas,S. B. Kotsiantis;I. D. Zaharakis;P. E. Pintelas,"Department of Computer Science and Technology, University of Peloponnese, Peloponnese, Greece;Educational Software Development Laboratory, Department of Mathematics, University of Patras, P. O. Box 1399, Patras, Greece;Computer Technology Institute, Patras, Greece;Department of Computer Science and Technology, University of Peloponnese, Peloponnese, Greece;Educational Software Development Laboratory, Department of Mathematics, University of Patras, P. O. Box 1399, Patras, Greece",,,26,3,159,190,Computer science;Artificial intelligence;Machine learning;Classifier (UML);One-class classification;Class (philosophy);Multilayer perceptron;Perceptron;Naive Bayes classifier;Supervised learning;Pattern recognition (psychology);Artificial neural network;Support vector machine,GR,"Witten I, 2011, ELSEVIER EBOOKS;Cristianini N, 2000, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Friedman N, 1997, MACHINE LEARNING;Cristianini N, 2000, EPRINTS SOTON (UNIVERSITY OF SOUTHAMPTON);Cohen W, 1995, ELSEVIER EBOOKS;Japkowicz N, 2002, INTELLIGENT DATA ANALYSIS;Dietterich T, 2000, MACHINE LEARNING;Jordan M, 1998, ;Frank E, 1998, RESEARCH COMMONS (THE UNIVERSITY OF WAIKATO);Quinlan J, 1996, NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE;Murthy S, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Wettschereck D, 1997, ARTIFICIAL INTELLIGENCE REVIEW;Veropoulos K, 1999, ;Crammer K, 2002, MACHINE LEARNING;Genton M, 2002, ;Reeves C, 2002, ;Ting K, 1999, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Fürnkranz J, 1999, ARTIFICIAL INTELLIGENCE REVIEW;Gehrke J, 2000, DATA MINING AND KNOWLEDGE DISCOVERY;Cheng J, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Zheng Z, 2000, MACHINE LEARNING;Liu H, 2001, ;Todorovski L, 2003, MACHINE LEARNING;Roli F, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Setiono R, 2000, APPLIED INTELLIGENCE;Elomaa T, 1999, MACHINE LEARNING;Maclin R, 1995, ;Srivastava A, 1999, DATA MINING AND KNOWLEDGE DISCOVERY;Siddique M, 2002, ;Klusch M, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Yang Y, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Xie Z, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Srivastava A, 2006, KLUWER ACADEMIC PUBLISHERS EBOOKS;Neocleous C, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Bonarini A, 2000, LECTURE NOTES IN COMPUTER SCIENCE;An A, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Elomaa T, 1999, LECTURE NOTES IN COMPUTER SCIENCE;Fürnkranz J, 2001, ;Saitta L, 1998, MACHINE LEARNING;Zheng Z, 2000, MACHINE LEARNING;An A, 2000, LECTURE NOTES IN COMPUTER SCIENCE;Lindgren T, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Kuncheva L, 2001, LECTURE NOTES IN COMPUTER SCIENCE;Baik S, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Wang L, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Lazkano E, 2003, LECTURE NOTES IN COMPUTER SCIENCE;, 2007, ;Shavlik J, 1998, ;Cowell R, 2013, ARXIV (CORNELL UNIVERSITY);Quinlan J, 1992, ;Freund Y, 1997, JOURNAL OF COMPUTER AND SYSTEM SCIENCES;Jain A, 1999, ACM COMPUTING SURVEYS;Blake C, 1998, MEDICAL ENTOMOLOGY AND ZOOLOGY;Ho T, 1998, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Opitz D, 1999, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Guo G, 2003, LECTURE NOTES IN COMPUTER SCIENCE;Littlestone N, 1994, INFORMATION AND COMPUTATION;Zhang G, 2000, IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C (APPLICATIONS AND REVIEWS);Muggleton S, 1995, NEW GENERATION COMPUTING;Wilson D, 2000, MACHINE LEARNING;Freund Y, 1998, ;Nadeau C, 2003, MACHINE LEARNING;Batista G, 2003, APPLIED ARTIFICIAL INTELLIGENCE;Cheng J, 2002, ARTIFICIAL INTELLIGENCE;Dantsin E, 2001, ACM COMPUTING SURVEYS;Webb G, 2000, MACHINE LEARNING;Zhang S, 2003, APPLIED ARTIFICIAL INTELLIGENCE;Džeroski S, 2006, ;Utgoff P, 1997, MACHINE LEARNING;Breslow L, 1997, THE KNOWLEDGE ENGINEERING REVIEW;Raedt L, 1996, ;Castellano G, 1997, IEEE TRANSACTIONS ON NEURAL NETWORKS;MTW, 2000, JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION;Platt J, 1998, ;Melville P, 2003, ;Parekh R, 2000, IEEE TRANSACTIONS ON NEURAL NETWORKS;Blum A, 1997, MACHINE LEARNING;Quinlan J, 1995, NEW GENERATION COMPUTING;Ratanamahatana C, 2003, APPLIED ARTIFICIAL INTELLIGENCE;Reinartz T, 2002, DATA MINING AND KNOWLEDGE DISCOVERY;Schapire R, 1998, ;Markovitch S, 2002, MACHINE LEARNING;Zhou Z, 2002, KNOWLEDGE-BASED SYSTEMS;Fürnkranz J, 1997, MACHINE LEARNING;Acid S, 2003, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Muggleton S, 1999, ARTIFICIAL INTELLIGENCE;Mántaras R, 1998, DATA & KNOWLEDGE ENGINEERING;Kalousis A, 2004, MACHINE LEARNING;McSherry D, 1999, KNOWLEDGE-BASED SYSTEMS;Zhou Z, 2004, JOURNAL OF COMPUTER SCIENCE AND TECHNOLOGY;Howlett R, 2001, ;Wall R, 2003, ARTIFICIAL INTELLIGENCE IN MEDICINE;Kon M, 2000, NEURAL NETWORKS;Vivarelli F, 2001, NEURAL NETWORKS;Okamoto S, 2003, THEORETICAL COMPUTER SCIENCE;Zheng Z, 1998, KNOWLEDGE-BASED SYSTEMS;Widman L, 1997, ARTIFICIAL INTELLIGENCE IN MEDICINE;Sánchez J, 2002, LECTURE NOTES IN COMPUTER SCIENCE;Breiman L, 1996, MACHINE LEARNING;Burges C, 1998, DATA MINING AND KNOWLEDGE DISCOVERY;Goldberg D, 2002, ;Witten I, 2008, ;Dietterich T, 1998, NEURAL COMPUTATION;Hodge J, 2004, ARTIFICIAL INTELLIGENCE REVIEW;C.J.C. B, 1998, ;Heckerman D, 1995, MACHINE LEARNING;Domingos P, 1997, MACHINE LEARNING;Bauer E, 1999, MACHINE LEARNING;Merz C, 1996, MEDICAL ENTOMOLOGY AND ZOOLOGY;Yu L, 2004, ;Amendola G, 2017, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Lim T, 2000, MACHINE LEARNING;Vilalta R, 2002, ARTIFICIAL INTELLIGENCE REVIEW;Friedman N, 2003, MACHINE LEARNING;, 1998, THE MIT PRESS EBOOKS;Freund Y, 1999, MACHINE LEARNING;Brighton H, 2002, DATA MINING AND KNOWLEDGE DISCOVERY;Ting K, 1999, ;Brazdil P, 2003, MACHINE LEARNING;Reeves C, 2002, OPERATIONS RESEARCH, COMPUTER SCIENCE. INTERFACE SERIES;, 2001, ;Keerthi S, 2002, MACHINE LEARNING;Gama J, 2000, MACHINE LEARNING;Heckerman D, 2006, STUDIES IN FUZZINESS AND SOFT COMPUTING;Aha D, 1997, ;Bouckaert R, 2003, ;Neil M, 2006, ;Dězeroski S, 2001, SPRINGER EBOOKS;Yam J, 2001, IEEE TRANSACTIONS ON NEURAL NETWORKS;Domeniconi C, 2002, THE MIT PRESS EBOOKS;Dutton D, 1997, THE KNOWLEDGE ENGINEERING REVIEW;Bouckaert R, 2004, LECTURE NOTES IN COMPUTER SCIENCE;Gama J, 1999, INTELLIGENT DATA ANALYSIS;Camargo L, 2001, NEURAL COMPUTATION;Yen G, 2002, ;Roy A, 2000, IEEE TRANSACTIONS ON FUZZY SYSTEMS;Basak J, 2004, NEURAL COMPUTATION;Gama J, 1999, INTELLIGENT DATA ANALYSIS;Flach P, 2000, ;Kivinen J, 2003, LECTURE NOTES IN COMPUTER SCIENCE;, 2019, ;Bengio Y, 1999, ÉRUDIT DOCUMENTS AND DATA REPOSITORY (ÉRUDIT CONSORTIUM, UNIVERSITY OF MONTREAL);Aha D, 1997, ",,,OPENALEX,"Kotsiantis S, 2006, ARTIFICIAL INTELLIGENCE REVIEW","Kotsiantis S, 2006, ARTIFICIAL INTELLIGENCE REVIEW" +https://openalex.org/W753012316,,Torch7: A Matlab-like Environment for Machine Learning,2011,en,article,1262,INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE),Infoscience (Ecole Polytechnique Fédérale de Lausanne),Ronan Collobert;Koray Kavukcuoglu;Clément Farabet,Ronan Collobert;Koray Kavukcuoglu;Clément Farabet,"Princeton University, Princeton, United States;New York University, New York, United States;New York University, New York, United States",,"Torch7 is a versatile numeric computing framework and machine learning library that extends Lua. Its goal is to provide a flexible environment to design and train learning machines. Flexibility is obtained via Lua, an extremely lightweight scripting language. High performance is obtained via efficient OpenMP/SSE and CUDA implementations of low-level numeric routines. Torch7 can easily be in-terfaced to third-party software thanks to Lua’s light interface. 1 Torch7 Overview With Torch7, we aim at providing a framework with three main advantages: (1) it should ease the development of numerical algorithms, (2) it should be easily extended (including the use of other libraries), and (3) it should be fast. We found that a scripting (interpreted) language with a good C API appears as a convenient solu-tion to “satisfy ” the constraint (2). First, a high-level language makes the process of developing a program simpler and more understandable than a low-level language. Second, if the programming language is interpreted, it becomes also easier to quickly try various ideas in an interactive manner.",,,,,Computer science;Scripting language;Implementation;Flexibility (engineering);MATLAB;Interface (matter);CUDA;Software;Computer architecture;Programming language;Operating system,US,"Bottou L, 1988, ",,,OPENALEX,"Collobert R, 2011, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE)","Collobert R, 2011, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE)" +https://openalex.org/W2162630660,10.1023/a:1009953814988,Automating the Construction of Internet Portals with Machine Learning,2000,en,article,1265,INFORMATION RETRIEVAL,Information Retrieval,Andrew Kachites McCallum;Kamal Nigam;Jason D. M. Rennie;Kristie Seymore,Andrew Kachites McCallum;Kamal Nigam;Jason Rennie;Kristie Seymore,"Just Research and Carnegie Mellon University, The Netherlands;Carnegie Mellon University, The Netherlands;Massachusetts Institute of Technology, USA;Carnegie Mellon University, The Netherlands",,,3,2,127,163,Computer science;World Wide Web;The Internet;Popularity;Domain (mathematical analysis);Identification (biology);Population;Information retrieval;Information extraction;Data science,US,"Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B (STATISTICAL METHODOLOGY);Rabiner L, 1989, PROCEEDINGS OF THE IEEE;Cios K, 1997, NEUROCOMPUTING;Bellman R, 1966, SCIENCE;Kleinberg J, 1999, JOURNAL OF THE ACM;Kaelbling L, 1996, JOURNAL OF ARTIFICIAL INTELLIGENCE RESEARCH;Viterbi A, 1967, IEEE TRANSACTIONS ON INFORMATION THEORY;Blum A, 1998, ;Dempster A, 1977, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A (STATISTICS IN SOCIETY);Sutton R, 1988, MACHINE LEARNING;McCallum A, 1998, ;Nigam K, 2000, MACHINE LEARNING;Yarowsky D, 1995, ;Lewis D, 1998, LECTURE NOTES IN COMPUTER SCIENCE;Chen S, 1999, COMPUTER SPEECH & LANGUAGE;Chakrabarti S, 1999, COMPUTER NETWORKS;Baum L, 1972, MEDICAL ENTOMOLOGY AND ZOOLOGY;Giles C, 1998, ;Cho J, 1998, COMPUTER NETWORKS AND ISDN SYSTEMS;Bikel D, 1997, ;Riloff E, 1999, ;Joachims T, 1997, ;Craven M, 1998, ;Lawrence S, 1999, COMPUTER;Ney H, 1994, COMPUTER SPEECH & LANGUAGE;Mérialdo B, 1994, ;Kupiec J, 1992, COMPUTER SPEECH & LANGUAGE;Bollacker K, 1998, ;Freitag D, 2000, ;Rennie J, 1999, ;Freitag D, 1999, ;Kearns M, 1999, ;Yamron J, 2002, ;Boyan J, 1999, ;Mulbregt P, 1998, ;Torgo L, 1997, INTELLIGENT DATA ANALYSIS;Hofmann T, 1998, DSPACE@MIT (MASSACHUSETTS INSTITUTE OF TECHNOLOGY);Torgo L, 1997, INTELLIGENT DATA ANALYSIS;Menczer F, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Cohen W, 1998, ;Stolcke A, 1998, ERA;Franz A, 1997, AI MAGAZINE;Cohen W, 1999, COMPUTER NETWORKS;Menczer F, 1997, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Witten I, 1998, COMMUNICATIONS OF THE ACM;McCallum A, 2022, ;Tesauro G, 2025, ARXIV (CORNELL UNIVERSITY)",,,OPENALEX,"McCallum A, 2000, INFORMATION RETRIEVAL","McCallum A, 2000, INFORMATION RETRIEVAL" +https://openalex.org/W4296886862,10.1177/117693510600200030,Applications of Machine Learning in Cancer Prediction and Prognosis,2006,en,article,1098,CANCER INFORMATICS,Cancer Informatics,Joseph A. Cruz;David S. Wishart,Joseph A. Cruz;David S. Wishart,"Departments of Biological Science and Computing Science, University of Alberta Edmonton, AB, Canada T6G 2E8;Departments of Biological Science and Computing Science, University of Alberta Edmonton, AB, Canada T6G 2E8",,"Machine learning is a branch of artificial intelligence that employs a variety of statistical, probabilistic and optimization techniques that allows computers to “learn” from past examples and to detect hard-to-discern patterns from large, noisy or complex data sets. This capability is particularly well-suited to medical applications, especially those that depend on complex proteomic and genomic measurements. As a result, machine learning is frequently used in cancer diagnosis and detection. More recently machine learning has been applied to cancer prognosis and prediction. This latter approach is particularly interesting as it is part of a growing trend towards personalized, predictive medicine. In assembling this review we conducted a broad survey of the different types of machine learning methods being used, the types of data being integrated and the performance of these methods in cancer prediction and prognosis. A number of trends are noted, including a growing dependence on protein biomarkers and microarray data, a strong bias towards applications in prostate and breast cancer, and a heavy reliance on “older” technologies such artificial neural networks (ANNs) instead of more recently developed or more easily interpretable machine learning methods. A number of published studies also appear to lack an appropriate level of validation or testing. Among the better designed and validated studies it is clear that machine learning methods can be used to substantially (15–25%) improve the accuracy of predicting cancer susceptibility, recurrence and mortality. At a more fundamental level, it is also evident that machine learning is also helping to improve our basic understanding of cancer development and progression.",2,,117693510600200,117693510600200,Machine learning;Artificial intelligence;Computer science;Artificial neural network;Probabilistic logic,CA,"Holland J, 1992, THE MIT PRESS EBOOKS;Rumelhart D, 1986, NATURE;McCulloch W, 1943, BULLETIN OF MATHEMATICAL BIOLOGY;Kohonen T, 1982, BIOLOGICAL CYBERNETICS;Delen D, 2004, ARTIFICIAL INTELLIGENCE IN MEDICINE;Weston A, 2004, JOURNAL OF PROTEOME RESEARCH;Dumitrescu R, 2005, JOURNAL OF CELLULAR AND MOLECULAR MEDICINE;Petricoin E, 2004, CURRENT OPINION IN BIOTECHNOLOGY;Somorjai R, 2003, BIOINFORMATICS;Baldus S, 2004, CRITICAL REVIEWS IN CLINICAL LABORATORY SCIENCES;Patrick E, 1970, INFORMATION AND CONTROL;Lundin J, 1999, ONCOLOGY;McCarthy J, 2004, ANNALS OF THE NEW YORK ACADEMY OF SCIENCES;Zupan B, 2000, ARTIFICIAL INTELLIGENCE IN MEDICINE;Maclin P, 1991, JOURNAL OF MEDICAL SYSTEMS;Crawford E, 2000, CANCER;Piccart M, 2001, ONCOLOGY;O’Neill M, 2003, BMC BIOINFORMATICS;Jefferson M, 1997, CANCER;Gulliford S, 2004, RADIOTHERAPY AND ONCOLOGY;Duffy M, 2001, CLINICAL BIOCHEMISTRY;Sato F, 2005, CANCER;Bollschweiler E, 2004, ANNALS OF SURGICAL ONCOLOGY;Cicchetti D, 1992, CLINICAL CHEMISTRY;Ehlers J, 2005, CLINICAL CANCER RESEARCH;Ripley R, 2004, STATISTICS IN MEDICINE;Nagata T, 2005, INTERNATIONAL JOURNAL OF ORAL AND MAXILLOFACIAL SURGERY;Fielding L, 1992, CANCER;Rodvold D, 2001, THE PROSTATE;Snow P, 2001, CANCER;Ando T, 2002, JAPANESE JOURNAL OF CANCER RESEARCH;Leenhouts H, 1999, RADIATION AND ENVIRONMENTAL BIOPHYSICS;Kaiserman I, 2005, OPHTHALMOLOGY;Hanai T, 2003, CANCER SCIENCE;Kim Y, 2003, BIOMEDICINE & PHARMACOTHERAPY;Rodriguez‐Luna H, 2005, TRANSPLANTATION;Han M, 2000, UROLOGY;Claus E, 2001, FAMILIAL CANCER;Ochi T, 2002, INTERNATIONAL JOURNAL OF CLINICAL ONCOLOGY;Fujikawa K, 2003, INTERNATIONAL JOURNAL OF UROLOGY;́n F, 2004, EUROPEAN JOURNAL OF CANCER PREVENTION;Mašić N, 1998, AMERICAN JOURNAL OF HEMATOLOGY;Naguib R, 1998, BRITISH JOURNAL OF CANCER;Mattfeldt, 1999, BRITISH JOURNAL OF UROLOGY;Ando T, 2003, CANCER SCIENCE;Porter C, 2005, UROLOGY;Murphy G, 2000, THE PROSTATE;Savage K, 2004, INTERNATIONAL JOURNAL OF HEMATOLOGY;Marvin N, 1999, ARTIFICIAL INTELLIGENCE IN MEDICINE;Mántaras R, 2013, ARXIV (CORNELL UNIVERSITY);Shipp M, 2002, NATURE MEDICINE;Bach P, 2003, JNCI JOURNAL OF THE NATIONAL CANCER INSTITUTE;Dettling M, 2004, BIOINFORMATICS;Iizuka N, 2003, THE LANCET;Hagerty R, 2005, ANNALS OF ONCOLOGY;Shavlik J, 1998, ;Aha D, 1992, INTERNATIONAL JOURNAL OF MAN-MACHINE STUDIES;Burke H, 1997, CANCER;Dai H, 2005, CANCER RESEARCH;Jerez J, 2002, ARTIFICIAL INTELLIGENCE IN MEDICINE;Bottaci L, 1997, THE LANCET;Listgarten J, 2004, CLINICAL CANCER RESEARCH;Duffy M, 2005, CLINICAL CHEMISTRY;Lisböa P, 2003, ARTIFICIAL INTELLIGENCE IN MEDICINE;Ohira M, 2005, CANCER CELL;Domchek S, 2003, JOURNAL OF CLINICAL ONCOLOGY;Kattan M, 2003, THE JOURNAL OF UROLOGY;Man T, 2005, CANCER RESEARCH;Zhou X, 2004, JOURNAL OF BIOMEDICAL INFORMATICS;Şeker H, 2003, IEEE TRANSACTIONS ON INFORMATION TECHNOLOGY IN BIOMEDICINE;Michael A, 2005, CLINICAL CANCER RESEARCH;Pass H, 2004, CLINICAL CANCER RESEARCH;Mian S, 2005, JOURNAL OF CLINICAL ONCOLOGY;Bocchi L, 2004, MEDICAL ENGINEERING & PHYSICS;Simes R, 1985, JOURNAL OF CHRONIC DISEASES;Mariani L, 1997, BREAST CANCER RESEARCH AND TREATMENT;Su M, 2005, MEDICAL PHYSICS;Santos‐García G, 2003, ARTIFICIAL INTELLIGENCE IN MEDICINE;Poulakis V, 2004, UROLOGY;Cochran A, 1997, PIGMENT CELL RESEARCH;Colozza M, 2005, CLINICAL BREAST CANCER;Bryce T, 1998, INTERNATIONAL JOURNAL OF RADIATION ONCOLOGY*BIOLOGY*PHYSICS;Hayashida Y, 2005, CLINICAL CANCER RESEARCH;Taktak A, 2003, PHYSICS IN MEDICINE AND BIOLOGY;Hamamoto I, 1995, COMPUTERS IN BIOLOGY AND MEDICINE;Spyridonos P, 2002, MEDICAL INFORMATICS AND THE INTERNET IN MEDICINE;Mattfeldt T, 2004, MEDICAL & BIOLOGICAL ENGINEERING & COMPUTING;Mian S, 2003, PROTEOMICS;Kan T, 2004, ANNALS OF SURGICAL ONCOLOGY;Lee Y, 2000, DIMACS SERIES IN DISCRETE MATHEMATICS AND THEORETICAL COMPUTER SCIENCE;Tewari A, 2001, MOLECULAR UROLOGY;Grumett S, 2003, CLINICAL COLORECTAL CANCER;Naguib R, 1999, IEEE TRANSACTIONS ON INFORMATION TECHNOLOGY IN BIOMEDICINE;Hsia T, 2003, JOURNAL OF CLINICAL LABORATORY ANALYSIS;Potter S, 1999, UROLOGY;Ziada A, 2001, CANCER;Ji W, 2003, IEEE TRANSACTIONS ON INFORMATION TECHNOLOGY IN BIOMEDICINE;Drago G, 2002, IEEE TRANSACTIONS ON BIOMEDICAL ENGINEERING;Seiwerth S, 2000, JOURNAL OF CHEMICAL INFORMATION AND COMPUTER SCIENCES;Vendrell E, 2004, CANCER LETTERS;Cortes C, 1995, MACHINE LEARNING;Quinlan J, 1986, MACHINE LEARNING;Russell E, 2000, ADVANCES IN INDUSTRIAL CONTROL;Bellman R, 1961, PRINCETON UNIVERSITY PRESS EBOOKS;Shoesmith E, 1984, JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES D (THE STATISTICIAN);Wei J, 2004, CANCER RESEARCH;Poulakis V, 2004, EUROPEAN UROLOGY",,,OPENALEX,"Cruz J, 2006, CANCER INFORMATICS","Cruz J, 2006, CANCER INFORMATICS" +https://openalex.org/W4394857110,10.1136/bmj-2023-078378,TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods,2024,en,article,2137,BMJ,BMJ,Gary S. Collins;Karel G.M. Moons;Paula Dhiman;Richard D Riley;Andrew L. Beam;Ben Van Calster;Marzyeh Ghassemi;Xiaoxuan Liu;Johannes B. Reitsma;Maarten van Smeden;Anne‐Laure Boulesteix;Jennifer Camaradou;Leo Anthony Celi;Spiros Denaxas;Alastair K. Denniston;Ben Glocker;Robert Golub;Hugh Harvey;Georg Heinze;Michael M. Hoffman;André Pascal Kengne;Emily Lam;Naomi Lee;Elizabeth Loder;Lena Maier‐Hein;Bilal A. Mateen;Melissa D. McCradden;Lauren Oakden‐Rayner;Johan Ordish;Richard Parnell;Sherri Rose;Karandeep Singh;Laure Wynants;Patrícia Logullo,Gary S Collins;Karel G M Moons;Paula Dhiman;Richard D Riley;Andrew L Beam;Ben Van Calster;Marzyeh Ghassemi;Xiaoxuan Liu;Johannes B Reitsma;Maarten van Smeden;Anne-Laure Boulesteix;Jennifer Catherine Camaradou;Leo Anthony Celi;Spiros Denaxas;Alastair K Denniston;Ben Glocker;Robert M Golub;Hugh Harvey;Georg Heinze;Michael M Hoffman;André Pascal Kengne;Emily Lam;Naomi Lee;Elizabeth W Loder;Lena Maier-Hein;Bilal A Mateen;Melissa D McCradden;Lauren Oakden-Rayner;Johan Ordish;Richard Parnell;Sherri Rose;Karandeep Singh;Laure Wynants;Patricia Logullo,"Centre for Statistics in Medicine, UK EQUATOR Centre, Nuffield Department of Orthopaedics, Rheumatology, and Musculoskeletal Sciences, University of Oxford, Oxford OX3 7LD, UK;Julius Centre for Health Sciences and Primary Care, University Medical Centre Utrecht, Utrecht University, Utrecht, Netherlands;Centre for Statistics in Medicine, UK EQUATOR Centre, Nuffield Department of Orthopaedics, Rheumatology, and Musculoskeletal Sciences, University of Oxford, Oxford OX3 7LD, UK;Institute of Applied Health Research, College of Medical and Dental Sciences, University of Birmingham, Birmingham, UK;National Institute for Health and Care Research (NIHR) Birmingham Biomedical Research Centre, Birmingham, UK;Department of Epidemiology, Harvard T H Chan School of Public Health, Boston, MA, USA;Department of Biomedical Data Science, Leiden University Medical Centre, Leiden, Netherlands;Department of Development and Regeneration, KU Leuven, Leuven, Belgium;Department of Electrical Engineering and Computer Science, Institute for Medical Engineering and Science, Massachusetts Institute of Technology, Cambridge, MA, USA;Institute of Inflammation and Ageing, College of Medical and Dental Sciences, University of Birmingham, Birmingham, UK;University Hospitals Birmingham NHS Foundation Trust, Birmingham, UK;Julius Centre for Health Sciences and Primary Care, University Medical Centre Utrecht, Utrecht University, Utrecht, Netherlands;Julius Centre for Health Sciences and Primary Care, University Medical Centre Utrecht, Utrecht University, Utrecht, Netherlands;Institute for Medical Information Processing, Biometry and Epidemiology, Faculty of Medicine, Ludwig-Maximilians-University of Munich and Munich Centre of Machine Learning, Germany;Department of Medical Information Processing, Biometry and Epidemiology, Ludwig-Maximilians-University of Munich, Munich, Germany;Patient representative, Health Data Research UK patient and public involvement and engagement group;Patient representative, University of East Anglia, Faculty of Health Sciences, Norwich Research Park, Norwich, UK;Beth Israel Deaconess Medical Center, Boston, MA, USA;Department of Biostatistics, Harvard T H Chan School of Public Health, Boston, MA, USA;Laboratory for Computational Physiology, Massachusetts Institute of Technology, Cambridge, MA, USA;British Heart Foundation Data Science Centre, London, UK;Institute of Health Informatics, University College London, London, UK;Institute of Inflammation and Ageing, College of Medical and Dental Sciences, University of Birmingham, Birmingham, UK;National Institute for Health and Care Research (NIHR) Birmingham Biomedical Research Centre, Birmingham, UK;Department of Computing, Imperial College London, London, UK;Northwestern University Feinberg School of Medicine, Chicago, IL, USA;Hardian Health, Haywards Heath, UK;Section for Clinical Biometrics, Centre for Medical Data Science, Medical University of Vienna, Vienna, Austria;Department of Computer Science, University of Toronto, Toronto, ON, Canada;Department of Medical Biophysics, University of Toronto, Toronto, ON, Canada;Princess Margaret Cancer Centre, University Health Network, Toronto, ON, Canada;Vector Institute for Artificial Intelligence, Toronto, ON, Canada;Department of Medicine, University of Cape Town, Cape Town, South Africa;Patient representative, Health Data Research UK patient and public involvement and engagement group;National Institute for Health and Care Excellence, London, UK;Department of Neurology, Brigham and Women's Hospital, Harvard Medical School, Boston, MA, USA;The BMJ, London, UK;Department of Intelligent Medical Systems, German Cancer Research Centre, Heidelberg, Germany;Alan Turing Institute, London, UK;Institute of Health Informatics, University College London, London, UK;Wellcome Trust, London, UK;Department of Bioethics, Hospital for Sick Children Toronto, ON, Canada;Genetics and Genome Biology, SickKids Research Institute, Toronto, ON, Canada;Australian Institute for Machine Learning, University of Adelaide, Adelaide, SA, Australia;Medicines and Healthcare products Regulatory Agency, London, UK;Patient representative, Health Data Research UK patient and public involvement and engagement group;Department of Health Policy and Center for Health Policy, Stanford University, Stanford, CA, USA;Australian Institute for Machine Learning, University of Adelaide, Adelaide, SA, Australia;Department of Epidemiology, CAPHRI Care and Public Health Research Institute, Maastricht University, Maastricht, Netherlands;Department of Health Policy and Center for Health Policy, Stanford University, Stanford, CA, USA;Department of Epidemiology, CAPHRI Care and Public Health Research Institute, Maastricht University, Maastricht, Netherlands;Centre for Statistics in Medicine, UK EQUATOR Centre, Nuffield Department of Orthopaedics, Rheumatology, and Musculoskeletal Sciences, University of Oxford, Oxford OX3 7LD, UK","Gary S Collins (corresponding author), Centre for Statistics in Medicine, UK EQUATOR Centre, Nuffield Department of Orthopaedics, Rheumatology, and Musculoskeletal Sciences, University of Oxford, Oxford OX3 7LD, UK","The TRIPOD (Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis) statement was published in 2015 to provide the minimum reporting recommendations for studies developing or evaluating the performance of a prediction model. Methodological advances in the field of prediction have since included the widespread use of artificial intelligence (AI) powered by machine learning methods to develop prediction models. An update to the TRIPOD statement is thus needed. TRIPOD+AI provides harmonised guidance for reporting prediction model studies, irrespective of whether regression modelling or machine learning methods have been used. The new checklist supersedes the TRIPOD 2015 checklist, which should no longer be used. This article describes the development of TRIPOD+AI and presents the expanded 27 item checklist with more detailed explanation of each reporting recommendation, and the TRIPOD+AI for Abstracts checklist. TRIPOD+AI aims to promote the complete, accurate, and transparent reporting of studies that develop a prediction model or evaluate its performance. Complete reporting will facilitate study appraisal, model evaluation, and model implementation.",385,,e078378,e078378,Tripod (photography);Checklist;Machine learning;Computer science;Artificial intelligence;Engineering;Psychology,GB;NL;US;BE;DE;AT;CA;ZA;AU,"D’Agostino R, 2008, CIRCULATION;Moons K, 2015, ANNALS OF INTERNAL MEDICINE;Gail M, 1989, JNCI JOURNAL OF THE NATIONAL CANCER INSTITUTE;Wynants L, 2020, BMJ;Collins G, 2015, ANNALS OF INTERNAL MEDICINE;Nashef S, 2012, EUROPEAN JOURNAL OF CARDIO-THORACIC SURGERY;Wolff R, 2018, ANNALS OF INTERNAL MEDICINE;Riley R, 2020, BMJ;Christodoulou E, 2019, JOURNAL OF CLINICAL EPIDEMIOLOGY;Staniszewska S, 2017, RESEARCH INVOLVEMENT AND ENGAGEMENT;Moons K, 2018, ANNALS OF INTERNAL MEDICINE;Moher D, 2010, PLOS MEDICINE;Steyerberg E, 2008, PLOS MEDICINE;Glasziou P, 2014, THE LANCET;Mongan J, 2020, RADIOLOGY ARTIFICIAL INTELLIGENCE;Kanis J, 2007, OSTEOPOROSIS INTERNATIONAL;Riley R, 2018, STATISTICS IN MEDICINE;Damen J, 2016, BMJ;Smeden M, 2018, STATISTICAL METHODS IN MEDICAL RESEARCH;Collins G, 2014, BMC MEDICAL RESEARCH METHODOLOGY;Collins G, 2019, THE LANCET;Bouwmeester W, 2012, PLOS MEDICINE;Liu X, 2020, BMJ;Norgeot B, 2020, NATURE MEDICINE;Smeden M, 2016, BMC MEDICAL RESEARCH METHODOLOGY;Collins G, 2011, BMC MEDICINE;Chen I, 2021, ANNUAL REVIEW OF BIOMEDICAL DATA SCIENCE;Hernandez‐Boussard T, 2020, JOURNAL OF THE AMERICAN MEDICAL INFORMATICS ASSOCIATION;Rivera S, 2020, BMJ;Schwendicke F, 2021, JOURNAL OF DENTISTRY;Riley R, 2018, STATISTICS IN MEDICINE;Ibrahim H, 2021, THE LANCET DIGITAL HEALTH;Stevens L, 2020, CIRCULATION CARDIOVASCULAR QUALITY AND OUTCOMES;McCradden M, 2020, THE LANCET DIGITAL HEALTH;McDermott M, 2021, SCIENCE TRANSLATIONAL MEDICINE;Smeden M, 2021, JOURNAL OF CLINICAL EPIDEMIOLOGY;Sendak M, 2020, NPJ DIGITAL MEDICINE;Altman D, 2008, PUBMED;Mallett S, 2010, BMC MEDICINE;Bellou V, 2019, BMJ;Wang W, 2020, PLOS ONE;Archer L, 2020, STATISTICS IN MEDICINE;Scott I, 2021, BMJ HEALTH & CARE INFORMATICS;Snell K, 2021, JOURNAL OF CLINICAL EPIDEMIOLOGY;Yusuf M, 2020, BMJ OPEN;Miles J, 2020, DIAGNOSTIC AND PROGNOSTIC RESEARCH;Groot O, 2021, JOURNAL OF ORTHOPAEDIC RESEARCH®;Groot O, 2021, ACTA ORTHOPAEDICA;Heus P, 2020, ANNALS OF INTERNAL MEDICINE;Kadakia K, 2021, NEW ENGLAND JOURNAL OF MEDICINE;Staniszewska S, 2017, BMJ;Collins G, 2021, BMJ OPEN;Gattrell W, 2024, PLOS MEDICINE;Collins G, 2024, BMJ;Vasey B, 2022, NATURE MEDICINE;Hond A, 2022, NPJ DIGITAL MEDICINE;Navarro C, 2021, BMJ;Riley R, 2021, STATISTICS IN MEDICINE;Altman D, 2008, THE LANCET;Sounderajah V, 2021, BMJ OPEN;Riley R, 2024, BMJ;Calster B, 2023, BMC MEDICINE;Snell K, 2023, BMJ;Riley R, 2024, BMJ;Dhiman P, 2022, BMC MEDICAL RESEARCH METHODOLOGY;Olczak J, 2021, ACTA ORTHOPAEDICA;Cacciamani G, 2023, NATURE MEDICINE;Navarro C, 2022, BMC MEDICAL RESEARCH METHODOLOGY;Li B, 2022, NPJ DIGITAL MEDICINE;, 2021, ;Dhiman P, 2021, JOURNAL OF CLINICAL EPIDEMIOLOGY;Riley R, 2021, STATISTICS IN MEDICINE;Gichoya J, 2021, BMJ HEALTH & CARE INFORMATICS;Navarro C, 2022, JOURNAL OF CLINICAL EPIDEMIOLOGY;Ganapathi S, 2022, NATURE MEDICINE;Wessler B, 2021, CIRCULATION CARDIOVASCULAR QUALITY AND OUTCOMES;Debray T, 2023, BMJ;Kee O, 2023, CARDIOVASCULAR DIABETOLOGY;Kwong J, 2021, EUROPEAN UROLOGY FOCUS;Dhiman P, 2022, DIAGNOSTIC AND PROGNOSTIC RESEARCH;Navarro C, 2023, JOURNAL OF CLINICAL EPIDEMIOLOGY;Thibault R, 2023, PLOS BIOLOGY;Debray T, 2023, BMJ;Dhiman P, 2023, JOURNAL OF CLINICAL EPIDEMIOLOGY;Araújo A, 2023, ORAL ONCOLOGY;Yang Q, 2022, ACTA OBSTETRICIA ET GYNECOLOGICA SCANDINAVICA;Sheehy J, 2023, ARTIFICIAL INTELLIGENCE IN MEDICINE;McCradden M, 2023, ;Finlayson S, 2023, JAMA PEDIATRICS;Collins G, 2023, JOURNAL OF CLINICAL EPIDEMIOLOGY;Song Z, 2022, FRONTIERS IN CARDIOVASCULAR MEDICINE;Munguía‐Realpozo P, 2023, AUTOIMMUNITY REVIEWS;Dhiman P, 2023, NATURE MACHINE INTELLIGENCE;Camaradou J, 2023, ADVANCES IN THERAPY;Lans A, 2022, JOURNAL OF EVALUATION IN CLINICAL PRACTICE;Rech M, 2023, BRAIN SCIENCES;Hawksworth C, 2023, MEDRXIV",,,OPENALEX,"Collins G, 2024, BMJ","Collins G, 2024, BMJ" +https://openalex.org/W2218047931,10.1016/j.gsf.2015.07.003,Machine learning in geosciences and remote sensing,2015,en,article,1099,GEOSCIENCE FRONTIERS,Geoscience Frontiers,David J. Lary;Amir H. Alavi;Amir H. Gandomi;Annette L. Walker,David J. Lary;Amir H. Alavi;Amir H. Gandomi;Annette L. Walker,"Hanson Center for Space Science, University of Texas at Dallas, Richardson, TX 75080, USA;Department of Civil and Environmental Engineering, Michigan State University, East Lansing, MI 48824, USA;BEACON Center for the Study of Evolution in Action, Michigan State University, East Lansing, MI 48824, USA;Aerosol and Radiation Section, Naval Research Laboratory, 7 Grace Hopper Ave., Stop 2, Monterey, CA 93943-5502, USA","Amir H. Alavi (corresponding author), Department of Civil and Environmental Engineering, Michigan State University, East Lansing, MI 48824, USA","Learning incorporates a broad range of complex procedures. Machine learning (ML) is a subdivision of artificial intelligence based on the biological learning process. The ML approach deals with the design of algorithms to learn from machine readable data. ML covers main domains such as data mining, difficult-to-program applications, and software applications. It is a collection of a variety of algorithms (e.g. neural networks, support vector machines, self-organizing map, decision trees, random forests, case-based reasoning, genetic programming, etc.) that can provide multivariate, nonlinear, nonparametric regression or classification. The modeling capabilities of the ML-based methods have resulted in their extensive applications in science and engineering. Herein, the role of ML as an effective approach for solving problems in geosciences and remote sensing will be highlighted. The unique features of some of the ML techniques will be outlined with a specific attention to genetic programming paradigm. Furthermore, nonparametric regression and classification illustrative examples are presented to demonstrate the efficiency of ML for tackling the geosciences and remote sensing problems.",7,1,3,10,Computer science;Machine learning;Genetic programming;Artificial intelligence;Process (computing);Artificial neural network;Support vector machine;Variety (cybernetics);Decision tree;Range (aeronautics);Random forest;Software;Nonparametric statistics;Data mining,US,"Koza J, 1992, MEDICAL ENTOMOLOGY AND ZOOLOGY;Kohonen T, 1982, BIOLOGICAL CYBERNETICS;Prospero J, 2002, REVIEWS OF GEOPHYSICS;Ginoux P, 2001, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Tegen I, 1996, NATURE;Pope C, 2009, CIRCULATION;Allen M, 2000, NATURE;Lee H, 2011, ATMOSPHERIC CHEMISTRY AND PHYSICS;Shahin M, 2001, ;Alavi A, 2011, ENGINEERING COMPUTATIONS;Yi J, 1996, ENVIRONMENTAL POLLUTION;Samui P, 2007, COMPUTERS AND GEOTECHNICS;Samui P, 2008, ENVIRONMENTAL GEOLOGY;Alavi A, 2010, CONSTRUCTION AND BUILDING MATERIALS;Engel‐Cox J, 2004, JOURNAL OF THE AIR & WASTE MANAGEMENT ASSOCIATION;Alavi A, 2009, ENGINEERING WITH COMPUTERS;Javadi A, 2006, COMPUTERS AND GEOTECHNICS;Das S, 2008, ENGINEERING GEOLOGY;Azamathulla H, 2011, APPLIED SOFT COMPUTING;Boldo E, 2010, ENVIRONMENT INTERNATIONAL;Lary D, 2014, GEOSPATIAL HEALTH;Garg A, 2014, RAPID PROTOTYPING JOURNAL;Garg A, 2014, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Ginoux P, 2003, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Chuang M, 2011, ATMOSPHERIC ENVIRONMENT;Alavi A, 2011, GEOSCIENCE FRONTIERS;Makkeasorn A, 2006, WATER RESOURCES RESEARCH;Liu Y, 2012, ENVIRONMENT INTERNATIONAL;Zahabiyoun B, 2012, CLEAN - SOIL AIR WATER;Azamathulla H, 2011, OCEAN ENGINEERING;Makkeasorn A, 2008, JOURNAL OF ENVIRONMENTAL MANAGEMENT;Shahin M, 2005, COMPUTERS AND GEOTECHNICS;Çabalar A, 2009, COMPUTERS & GEOSCIENCES;Ayala A, 2011, AIR QUALITY ATMOSPHERE & HEALTH;Karakus M, 2010, COMPUTERS & GEOSCIENCES;Liu Y, 2011, ATMOSPHERIC ENVIRONMENT;Garg A, 2014, MECCANICA;Madadi M, 2014, EARTH SCIENCE INFORMATICS;Li W, 2007, INTERNATIONAL JOURNAL OF ROCK MECHANICS AND MINING SCIENCES;Zhai S, 2006, JOURNAL OF CHINA UNIVERSITY OF GEOSCIENCES;Alavi A, 2011, JOURNAL OF EARTHQUAKE ENGINEERING;Samui P, 2010, INTERNATIONAL JOURNAL FOR NUMERICAL AND ANALYTICAL METHODS IN GEOMECHANICS;Ravandi E, 2013, INTERNATIONAL JOURNAL OF MINING SCIENCE AND TECHNOLOGY;John D, 2010, INTECH EBOOKS;Santos J, 2010, ;Nikravesh M, 2007, STUDIES IN FUZZINESS AND SOFT COMPUTING;Chen L, 2003, INTERNATIONAL JOURNAL OF REMOTE SENSING;Lewkowski C, 2010, EGU GENERAL ASSEMBLY CONFERENCE ABSTRACTS;Chen L, 2011, HYDROLOGICAL PROCESSES;Chuang M, 2012, ATMOSPHERIC ENVIRONMENT;Change I, 2014, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Dockery D, 1993, NEW ENGLAND JOURNAL OF MEDICINE;Stocker R, 2013, ;Pope C, 2006, JOURNAL OF THE AIR & WASTE MANAGEMENT ASSOCIATION;Brook R, 2010, CIRCULATION;Foody G, 2002, REMOTE SENSING OF ENVIRONMENT;Grell G, 2005, ATMOSPHERIC ENVIRONMENT;Kohonen T, 1982, BIOLOGICAL CYBERNETICS;Schauer J, 1996, ATMOSPHERIC ENVIRONMENT;Atkinson P, 1997, INTERNATIONAL JOURNAL OF REMOTE SENSING;Hansen J, 1988, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Tegen I, 1996, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Engel‐Cox J, 2004, ATMOSPHERIC ENVIRONMENT;Baykasoğlu A, 2007, EXPERT SYSTEMS WITH APPLICATIONS;Detto M, 2006, WATER RESOURCES RESEARCH;Liu Y, 2006, REMOTE SENSING OF ENVIRONMENT;Schaap M, 2009, ATMOSPHERIC CHEMISTRY AND PHYSICS;Gandomi A, 2011, INFORMATION SCIENCES;Azamathulla H, 2010, APPLIED SOFT COMPUTING;Gandomi A, 2011, ENGINEERING APPLICATIONS OF ARTIFICIAL INTELLIGENCE;Risacher F, 1991, CHEMICAL GEOLOGY;Walker A, 2009, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Brown M, 2008, INTERNATIONAL JOURNAL OF REMOTE SENSING;Carpenter G, 1997, IEEE TRANSACTIONS ON GEOSCIENCE AND REMOTE SENSING;Lary D, 2009, IEEE GEOSCIENCE AND REMOTE SENSING LETTERS;Risacher F, 1991, GEOCHIMICA ET COSMOCHIMICA ACTA;Beiki M, 2010, INTERNATIONAL JOURNAL OF ROCK MECHANICS AND MINING SCIENCES;Özbek A, 2013, JOURNAL OF ROCK MECHANICS AND GEOTECHNICAL ENGINEERING;Imperatore P, 2010, INTECH EBOOKS;Rajeev K, 2008, JOURNAL OF GEOPHYSICAL RESEARCH ATMOSPHERES;Gandomi A, 2013, INTERNATIONAL JOURNAL OF EARTHQUAKE ENGINEERING AND HAZARD MITIGATION (IREHM);Lary D, 2004, ATMOSPHERIC CHEMISTRY AND PHYSICS;Rosin P, 2002, ;Prospero J, 2003, GEOCHIMICA ET COSMOCHIMICA ACTA SUPPLEMENT;Lary D, 2007, GEOPHYSICAL RESEARCH LETTERS;Lee H, 2010, EPIDEMIOLOGY;, 2002, ",,,OPENALEX,"Lary D, 2015, GEOSCIENCE FRONTIERS","Lary D, 2015, GEOSCIENCE FRONTIERS" +https://openalex.org/W2576683119,10.1016/j.neucom.2017.01.026,Machine learning on big data: Opportunities and challenges,2017,en,article,1198,NEUROCOMPUTING,Neurocomputing,Lina Zhou;Shimei Pan;Jianwu Wang;Athanasios V. Vasilakos,Lina Zhou;Shimei Pan;Jianwu Wang;Athanasios V. Vasilakos,"Information Systems Department, UMBC, Baltimore, MD 21250, United States;Information Systems Department, UMBC, Baltimore, MD 21250, United States;Information Systems Department, UMBC, Baltimore, MD 21250, United States;Department of Computer Science, Electrical and Space Engineering, Luleå University of Technology, SE-931 87 Skellefteå, Sweden","Lina Zhou (corresponding author), Information Systems Department, UMBC, Baltimore, MD 21250, United States",,237,,350,361,Big data;Computer science;Scalability;Data science;Preprocessor;Identification (biology);Domain (mathematical analysis);Data pre-processing;Range (aeronautics);Machine learning;Artificial intelligence;Data mining;Database,US;SE,"Jordan M, 2015, SCIENCE;Bottou L, 2010, ;LeCun Y, 2010, ;Chen Y, 2014, ;Collobert R, 2011, INFOSCIENCE (ECOLE POLYTECHNIQUE FÉDÉRALE DE LAUSANNE);Chen X, 2014, IEEE ACCESS;Amershi S, 2014, AI MAGAZINE;Tsai C, 2015, JOURNAL OF BIG DATA;Japkowicz N, 2011, ;Gemulla R, 2011, ;, 2007, THE MIT PRESS EBOOKS;Liou C, 2014, NEUROCOMPUTING;Breiman L, 1999, MACHINE LEARNING;Owen S, 2011, ;Zhai Y, 2014, IEEE COMPUTATIONAL INTELLIGENCE MAGAZINE;Dong J, 2005, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Triguero I, 2014, NEUROCOMPUTING;Rakthanmanon T, 2013, ACM TRANSACTIONS ON KNOWLEDGE DISCOVERY FROM DATA;Peteiro-Barral D, 2012, PROGRESS IN ARTIFICIAL INTELLIGENCE;Bolón‐Canedo V, 2015, APPLIED SOFT COMPUTING;Vaidya J, 2007, KNOWLEDGE AND INFORMATION SYSTEMS;Domingos P, 2001, ;Zhou S, 2013, NEUROCOMPUTING;Sun S, 2015, INFORMATION FUSION;Morales G, 2013, ;Xu K, 2015, ;Al-Jarrah O, 2014, ;Popescu A, 2013, PROCEEDINGS OF THE VLDB ENDOWMENT;Markl V, 2014, PROCEEDINGS OF THE VLDB ENDOWMENT;Bortnikov E, 2012, USENIX CONFERENCE ON HOT TOPICS IN CLOUD CCOMPUTING;Wang J, 2014, ;Mirchevska V, 2013, EXPERT SYSTEMS;Thuraisingham B, 2015, ;Li L, 2013, ;Parker C, 2012, ;Cao L, 2015, ;Luo D, 2012, ;Chen Q, 2015, ;Yui M, 2013, ;You Y, 2014, JOURNAL OF PARALLEL AND DISTRIBUTED COMPUTING;Hsu D, 2011, CAMBRIDGE UNIVERSITY PRESS EBOOKS;Armes T, 2013, ;Su Y, 2014, CLUSTER COMPUTING;Cervantes J, 2006, LECTURE NOTES IN COMPUTER SCIENCE;Nguyen-Dinh L, 2013, ;Yang Q, 2013, ;Luu K, 2014, ;Yang T, 2015, ;Xu W, 2011, ARXIV (CORNELL UNIVERSITY);Kashyap H, 2015, ARXIV (CORNELL UNIVERSITY);Bengio Y, 2013, IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE;Jia Y, 2014, ;Socher R, 2013, ;Vincent P, 2010, ;Zaharia M, 2010, ;Gandomi A, 2014, INTERNATIONAL JOURNAL OF INFORMATION MANAGEMENT;Lake B, 2015, SCIENCE;Dean J, 2012, ;Najafabadi M, 2015, JOURNAL OF BIG DATA;Guo Y, 2015, NEUROCOMPUTING;Erhan D, 2010, ;Low Y, 2012, PROCEEDINGS OF THE VLDB ENDOWMENT;Raina R, 2007, ;Chu C, 2007, THE MIT PRESS EBOOKS;Zhao Z, 2007, ;Ngiam J, 2011, INTERNATIONAL CONFERENCE ON MACHINE LEARNING;Landset S, 2015, JOURNAL OF BIG DATA;Singh D, 2014, JOURNAL OF BIG DATA;Cai X, 2013, INTERNATIONAL JOINT CONFERENCE ON ARTIFICIAL INTELLIGENCE;Collobert R, 2006, ;Ghoting A, 2011, ;Kraska T, 2013, ;Panda B, 2009, PROCEEDINGS OF THE VLDB ENDOWMENT;Xiao T, 2014, ;Wang Z, 2012, ;Azar A, 2014, SOFT COMPUTING;Mozafari B, 2014, PROCEEDINGS OF THE VLDB ENDOWMENT;Mahajan D, 2016, ;Zeng N, 2016, COGNITIVE COMPUTATION;Ramírez‐Gallego S, 2015, WILEY INTERDISCIPLINARY REVIEWS DATA MINING AND KNOWLEDGE DISCOVERY;Lu J, 2016, INSTITUTIONAL KNOWLEDGE (INK) - INSTITUTIONAL KNOWLEDGE AT SINGAPORE MANAGEMENT UNIVERSITY (SINGAPORE MANAGEMENT UNIVERSITY);Tan M, 2014, DR-NTU (NANYANG TECHNOLOGICAL UNIVERSITY);Cavallaro G, 2015, IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING;Jiang X, 2015, NEUROCOMPUTING;Yoo J, 2014, ;Ganjisaffar Y, 2011, ;Borkar V, 2012, ;Kumar A, 2018, FIGSHARE;Suzuki J, 2011, ;Pfeiffer J, 2015, ;Dekel O, 2008, NEURAL INFORMATION PROCESSING SYSTEMS;Hefeeda M, 2012, ;Yu T, 2007, ;Thuraisingham B, 2017, AUERBACH PUBLICATIONS EBOOKS;Çatak F, 2015, SOFT COMPUTING;Zhang G, 2015, INTERNATIONAL JOURNAL OF COMPUTERS IN HEALTHCARE;Xu J, 2013, ;Mason J, 2016, ;Mikolov T, 2013, ARXIV (CORNELL UNIVERSITY);Abadi M, 2016, ARXIV (CORNELL UNIVERSITY);Nilsson N, 1996, ARTIFICIAL INTELLIGENCE;Goodfellow I, 2016, MIT PRESS EBOOKS;VincentPascal, 2010, JOURNAL OF MACHINE LEARNING RESEARCH;Bengio Y, 2007, ;Xing E, 2015, IEEE TRANSACTIONS ON BIG DATA;Zhu J, 2017, NATIONAL SCIENCE REVIEW;Nelson B, 2016, ;Yue K, 2016, NEUROCOMPUTING;Sankar K, 2013, CERN DOCUMENT SERVER (EUROPEAN ORGANIZATION FOR NUCLEAR RESEARCH);Zhang Y, 2014, ;Mason J, 2016, ",,,OPENALEX,"Zhou L, 2017, NEUROCOMPUTING","Zhou L, 2017, NEUROCOMPUTING" +https://openalex.org/W4234556776,10.1017/cbo9780511973000,Machine Learning,2012,en,book,730,CAMBRIDGE UNIVERSITY PRESS EBOOKS,Cambridge University Press eBooks,Peter Flach,Peter Flach,University of Bristol,"Peter Flach (corresponding author), University of Bristol","As one of the most comprehensive machine learning texts around, this book does justice to the field's incredible richness, but without losing sight of the unifying principles. Peter Flach's clear, example-based approach begins by discussing how a spam filter works, which gives an immediate introduction to machine learning in action, with a minimum of technical fuss. Flach provides case studies of increasing complexity and variety with well-chosen examples and illustrations throughout. He covers a wide range of logical, geometric and statistical models and state-of-the-art topics such as matrix factorisation and ROC analysis. Particular attention is paid to the central role played by features. The use of established terminology is balanced with the introduction of new and useful concepts, and summaries of relevant background material are provided with pointers for revision if necessary. These features ensure Machine Learning will set a new standard as an introductory textbook.",,,,,Terminology;Variety (cybernetics);Computer science;Artificial intelligence;Set (abstract data type);Action (physics);Filter (signal processing);Field (mathematics);Machine learning;Mathematics;Linguistics;Programming language;Philosophy,GB,,,,OPENALEX,"Flach P, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS","Flach P, 2012, CAMBRIDGE UNIVERSITY PRESS EBOOKS" diff --git a/test_openalex_200.xlsx b/test_openalex_200.xlsx new file mode 100644 index 000000000..2506eeff4 Binary files /dev/null and b/test_openalex_200.xlsx differ diff --git a/test_pubmed_200.csv b/test_pubmed_200.csv new file mode 100644 index 000000000..3f78580d6 --- /dev/null +++ b/test_pubmed_200.csv @@ -0,0 +1,201 @@ +UT,TI,SO,JI,PY,VL,IS,LA,DT,RP,AU,AF,DI,PMID,BP,EP,CR,AB,C1,AU_CO,DE,ID,TC,DB,SR,SR_FULL +42273607,"Development, validation and use of artificial-intelligence-related technologies to assess basic motor skills in children: a scoping review.",F1000Research,F1000Res,2023,12,,eng,citation,Bazo-Alvarez JC,Figueroa-Quiñones J;Ipanaque-Neyra J;Gómez Hurtado H;Bazo-Alvarez O;Bazo-Alvarez JC,Figueroa-Quiñones J;Ipanaque-Neyra J;Gómez Hurtado H;Bazo-Alvarez O;Bazo-Alvarez JC,10.12688/f1000research.138616.3,42273607,1598,,,,,,,,0,PUBMED,"Figueroa-Quiñones J, 2023, F1000RES","Figueroa-Quiñones J, 2023, F1000RES" +42094871,Craving for a Robust Methodology: A Systematic Review of Machine Learning Algorithms on Substance-Use Disorders Treatment Outcomes.,International journal of mental health and addiction,Int J Ment Health Addict,2026,24,2,eng,citation,Grassi-Oliveira R,de Mattos BP;Mattjie C;Ravazio R;Barros RC;Grassi-Oliveira R,de Mattos BP;Mattjie C;Ravazio R;Barros RC;Grassi-Oliveira R,10.1007/s11469-024-01403-z,42094871,1090,1117,,,,,,,0,PUBMED,"de Mattos B, 2026, INT J MENT HEALTH ADDICT","de Mattos B, 2026, INT J MENT HEALTH ADDICT" +42063297,Imputation of Missing Continuous Glucose Monitor Data.,Journal of diabetes science and technology,J Diabetes Sci Technol,2026,20,3,eng,citation,Scholtens D,Kuang A;Yu Y;Siddique J;Scholtens D,Kuang A;Yu Y;Siddique J;Scholtens D,10.1177/19322968241308217,42063297,815,824,,,,,,,0,PUBMED,"Kuang A, 2026, J DIABETES SCI TECHNOL","Kuang A, 2026, J DIABETES SCI TECHNOL" +42038533,A Machine Learning Approach to Quantitative Analysis of Enamel Microstructure from Scanning Electron Microscopy Images.,Small structures,Small Struct,2025,6,5,eng,citation,Arola D,Marsico C;Renteria C;Grimm JR;Fernandez-Arteaga J;Guillen D;Arola D,Marsico C;Renteria C;Grimm JR;Fernandez-Arteaga J;Guillen D;Arola D,10.1002/sstr.202400510,42038533,,,,,,,,,0,PUBMED,"Marsico C, 2025, SMALL STRUCT","Marsico C, 2025, SMALL STRUCT" +41798400,Physics-informed data-driven discovery of constitutive models with application to strain-rate-sensitive soft materials.,Computational mechanics,Comput Mech,2026,77,2,eng,citation,Ramesh KT,Upadhyay K;Fuhg JN;Bouklas N;Ramesh KT,Upadhyay K;Fuhg JN;Bouklas N;Ramesh KT,10.1007/s00466-024-02497-x,41798400,357,386,,,,,,,0,PUBMED,"Upadhyay K, 2026, COMPUT MECH","Upadhyay K, 2026, COMPUT MECH" +41726542,Improving electronic health record processing of large language models via retrieval-augmented generation: A case study on dietary supplements.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Zhang R,Zhan Z;Zhou S;Deng J;Zhang R,Zhan Z;Zhou S;Deng J;Zhang R,,41726542,1511,1518,,,,,,,0,PUBMED,"Zhan Z, 2024, AMIA ANNU SYMP PROC","Zhan Z, 2024, AMIA ANNU SYMP PROC" +41726540,KERAP: A Knowledge-Enhanced Reasoning Approach for Accurate Zero-shot Diagnosis Prediction Using Multi-agent LLMs.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yang C,Xie Y;Cui H;Zhang Z;Lu J;Shu K;Nahab F;Hu X;Yang C,Xie Y;Cui H;Zhang Z;Lu J;Shu K;Nahab F;Hu X;Yang C,,41726540,1394,1403,,,,,,,0,PUBMED,"Xie Y, 2024, AMIA ANNU SYMP PROC","Xie Y, 2024, AMIA ANNU SYMP PROC" +41726539,Automating Adjudication of Cardiovascular Events Using Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Jiang M,Sivarajkumar S;Ameri K;Li C;Wang Y;Jiang M,Sivarajkumar S;Ameri K;Li C;Wang Y;Jiang M,,41726539,1219,1228,,,,,,,0,PUBMED,"Sivarajkumar S, 2024, AMIA ANNU SYMP PROC","Sivarajkumar S, 2024, AMIA ANNU SYMP PROC" +41726535,Developing Large Language Model-based Pipeline for Identification of Disease Diagnosis: A Case Study on Identifying Newly Diagnosed Multiple Myeloma and its Precursor Disease in Veterans Health Administration Electronic Health Records.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Chang SH,Wang M;Kuan YH;Alba PR;Gan Q;Schoen MW;Thomas TS;Li JS;Chang SH,Wang M;Kuan YH;Alba PR;Gan Q;Schoen MW;Thomas TS;Li JS;Chang SH,,41726535,1325,1334,,,,,,,0,PUBMED,"Wang M, 2024, AMIA ANNU SYMP PROC","Wang M, 2024, AMIA ANNU SYMP PROC" +41726531,Addressing Generalizability in Clinical Named Entity Recognition: Federated Learning or Large Language Models?: A Case Study on Visual Acuity Extraction from US and UK Eye Institutes.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Wang SY,Nguyen QN;Wu H;Pontikos N;Wang SY,Nguyen QN;Wu H;Pontikos N;Wang SY,,41726531,949,958,,,,,,,0,PUBMED,"Nguyen Q, 2024, AMIA ANNU SYMP PROC","Nguyen Q, 2024, AMIA ANNU SYMP PROC" +41726530,Adaptive Constraint Relaxation in Personalized Nutrition Recommendations: An LLM-Driven Knowledge Graph Retrieval Approach.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Rahmani AM,Zhang P;Fnu M;Song Y;Seneviratne O;Yang Z;Azimi I;Rahmani AM,Zhang P;Fnu M;Song Y;Seneviratne O;Yang Z;Azimi I;Rahmani AM,,41726530,1529,1538,,,,,,,0,PUBMED,"Zhang P, 2024, AMIA ANNU SYMP PROC","Zhang P, 2024, AMIA ANNU SYMP PROC" +41726528,Temporal Harmonization: Improved Detection of Mild Cognitive Impairment from Temporal Language Markers using Subject-invariant Learning.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Zhou J,Hoang B;Liang S;Pang Y;Dodge H;Zhou J,Hoang B;Liang S;Pang Y;Dodge H;Zhou J,,41726528,461,470,,,,,,,0,PUBMED,"Hoang B, 2024, AMIA ANNU SYMP PROC","Hoang B, 2024, AMIA ANNU SYMP PROC" +41726525,CDR-Agent: Intelligent Selection and Execution of Clinical Decision Rules Using Large Language Model Agents.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yu B,Xiang Z;Hsu AR;Zane AV;Kornblith AE;Lin-Martore MJ;Kaur JC;Dokiparthi VM;Li B;Yu B,Xiang Z;Hsu AR;Zane AV;Kornblith AE;Lin-Martore MJ;Kaur JC;Dokiparthi VM;Li B;Yu B,,41726525,1374,1383,,,,,,,0,PUBMED,"Xiang Z, 2024, AMIA ANNU SYMP PROC","Xiang Z, 2024, AMIA ANNU SYMP PROC" +41726521,Mining Social Media Data for Influenza Vaccine Effectiveness Using a Large Language Model and Chain-of-Thought Prompting.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Gonzalez-Hernandez G,Xu D;García GL;O'Connor K;Holston H;Klein AZ;Amaro IF;Scotch M;Gonzalez-Hernandez G,Xu D;García GL;O'Connor K;Holston H;Klein AZ;Amaro IF;Scotch M;Gonzalez-Hernandez G,,41726521,1404,1413,medRxiv. 2025 Mar 27:2025.03.26.25324701. doi: 10.1101/2025.03.26.25324701.,,,,,,0,PUBMED,"Xu D, 2024, AMIA ANNU SYMP PROC","Xu D, 2024, AMIA ANNU SYMP PROC" +41726520,Detecting Reference Errors in Scientific Literature with Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Abernethy NF,Zhang TM;Abernethy NF,Zhang TM;Abernethy NF,,41726520,1549,1556,,,,,,,0,PUBMED,"Zhang T, 2024, AMIA ANNU SYMP PROC","Zhang T, 2024, AMIA ANNU SYMP PROC" +41726519,Opportunistic Screening for Pancreatic Cancer using Computed Tomography Imaging and Radiology Reports.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Banerjee I,Le D;Correa-Medero R;Tariq A;Patel B;Yano M;Banerjee I,Le D;Correa-Medero R;Tariq A;Patel B;Yano M;Banerjee I,,41726519,663,672,ArXiv. 2025 Mar 31:arXiv:2504.00232v1.,,,,,,0,PUBMED,"Le D, 2024, AMIA ANNU SYMP PROC","Le D, 2024, AMIA ANNU SYMP PROC" +41726518,ProtoBERT-LoRA: Parameter-Efficient Prototypical Finetuning for Immunotherapy Study Identification.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Kharrazi H,Zhang S;Ding X;Ding K;Zhang J;Galinsky K;Wang M;Mayers RP;Wang Z;Kharrazi H,Zhang S;Ding X;Ding K;Zhang J;Galinsky K;Wang M;Mayers RP;Wang Z;Kharrazi H,,41726518,1539,1548,,,,,,,0,PUBMED,"Zhang S, 2024, AMIA ANNU SYMP PROC","Zhang S, 2024, AMIA ANNU SYMP PROC" +41726516,Enhancing Long-Term Care Efficiency: Embedded LLMs for Clinical Report Summarization and Caregiver Support.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Schumacher MI,Michelet A;Manzo G;Ritz A;Delgado P;Celi LA;Schumacher MI,Michelet A;Manzo G;Ritz A;Delgado P;Celi LA;Schumacher MI,,41726516,889,898,,,,,,,0,PUBMED,"Michelet A, 2024, AMIA ANNU SYMP PROC","Michelet A, 2024, AMIA ANNU SYMP PROC" +41726512,"Towards Interpretable, Sequential Multiple Instance Learning: An Application to Clinical Imaging.",AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Li ML,Luo X;Wang HS;Li ML,Luo X;Wang HS;Li ML,,41726512,804,813,,,,,,,0,PUBMED,"Luo X, 2024, AMIA ANNU SYMP PROC","Luo X, 2024, AMIA ANNU SYMP PROC" +41726506,Analyzing and Mitigating Model Drift in Acute Kidney Injury Prediction for Hospitalized Patients.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Liu M,Xu Z;Li D;Xu Q;Chan HY;Yu ASL;Liu M,Xu Z;Li D;Xu Q;Chan HY;Yu ASL;Liu M,,41726506,1414,1423,,,,,,,0,PUBMED,"Xu Z, 2024, AMIA ANNU SYMP PROC","Xu Z, 2024, AMIA ANNU SYMP PROC" +41726504,Identifying Missing IS-A Relations in SNOMED CT with Fine-Tuned Pre-trained Language Models and Non-lattice Subgraphs.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Cui L,Hao X;Abeysinghe R;Shi J;Zhang GQ;Cui L,Hao X;Abeysinghe R;Shi J;Zhang GQ;Cui L,,41726504,433,442,,,,,,,0,PUBMED,"Hao X, 2024, AMIA ANNU SYMP PROC","Hao X, 2024, AMIA ANNU SYMP PROC" +41726501,Predicting Early-Onset Colorectal Cancer with Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Nanduri J,Lau W;Kim Y;Parasa S;Haque ME;Oka A;Nanduri J,Lau W;Kim Y;Parasa S;Haque ME;Oka A;Nanduri J,,41726501,653,662,,,,,,,0,PUBMED,"Lau W, 2024, AMIA ANNU SYMP PROC","Lau W, 2024, AMIA ANNU SYMP PROC" +41726500,To what Degree can LLMs Support Medical Informatics Research? Examining the Interplay of Research Support LLMs with LLM Critics.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Geller J,Khatwani N;Wang L;Geller J,Khatwani N;Wang L;Geller J,,41726500,595,604,,,,,,,0,PUBMED,"Khatwani N, 2024, AMIA ANNU SYMP PROC","Khatwani N, 2024, AMIA ANNU SYMP PROC" +41726496,Large Language Model-Powered Conversational Agent Delivering Problem-Solving Therapy (PST) for Family Caregivers: Enhancing Empathy and Therapeutic Alliance Using In-Context Learning.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yuwen W,Wang L;Carrington D;Filienko D;El Jazmi C;Xie SJ;De Cock M;Iribarren S;Yuwen W,Wang L;Carrington D;Filienko D;El Jazmi C;Xie SJ;De Cock M;Iribarren S;Yuwen W,,41726496,1315,1324,,,,,,,0,PUBMED,"Wang L, 2024, AMIA ANNU SYMP PROC","Wang L, 2024, AMIA ANNU SYMP PROC" +41726495,A Machine-Assisted Framework for Ontology Development and Standardization: Case Study in Digital Health Technologies.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Liu H,Chen F;Harrison TB;Fu S;He L;Yue Z;Lu S;Wang L;Ruan X;Liu H,Chen F;Harrison TB;Fu S;He L;Yue Z;Lu S;Wang L;Ruan X;Liu H,,41726495,238,247,,,,,,,0,PUBMED,"Chen F, 2024, AMIA ANNU SYMP PROC","Chen F, 2024, AMIA ANNU SYMP PROC" +41726493,Relation Extraction with Instance-Adapted Predicate Descriptions.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Kavuluru R,Jiang Y;Kavuluru R,Jiang Y;Kavuluru R,,41726493,546,555,,,,,,,0,PUBMED,"Jiang Y, 2024, AMIA ANNU SYMP PROC","Jiang Y, 2024, AMIA ANNU SYMP PROC" +41726492,Leveraging Large Language Models for Cancer Vaccine Adjuvant Name Extraction from Biomedical Literature.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Hur J,Rehana H;Zheng J;Yeh FY;Bansal B;Çam NB;Jemiyo C;McGregor B;Özgür A;He Y;Hur J,Rehana H;Zheng J;Yeh FY;Bansal B;Çam NB;Jemiyo C;McGregor B;Özgür A;He Y;Hur J,,41726492,1071,1080,ArXiv. 2025 Feb 12:arXiv:2502.09659v1.,,,,,,0,PUBMED,"Rehana H, 2024, AMIA ANNU SYMP PROC","Rehana H, 2024, AMIA ANNU SYMP PROC" +41726491,An LLM-Powered Clinical Calculator Chatbot Backed by Verifiable Clinical Calculators and their Metadata.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Flynn AJ,Kumar N;Seifi F;Conte M;Flynn AJ,Kumar N;Seifi F;Conte M;Flynn AJ,,41726491,643,652,,,,,,,0,PUBMED,"Kumar N, 2024, AMIA ANNU SYMP PROC","Kumar N, 2024, AMIA ANNU SYMP PROC" +41726490,Leveraging multi-source data to resolve inconsistency across pharmacogenomic datasets in drug sensitivity prediction.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Zong N,Li X;Das T;Bhattarai K;Rajaganapathy S;Buchner VC;Wang Y;Su C;Sun L;Wang L;Cerhan JR;Zong N,Li X;Das T;Bhattarai K;Rajaganapathy S;Buchner VC;Wang Y;Su C;Sun L;Wang L;Cerhan JR;Zong N,,41726490,744,753,medRxiv. 2023 Jun 05:2023.05.25.23290546. doi: 10.1101/2023.05.25.23290546.,,,,,,0,PUBMED,"Li X, 2024, AMIA ANNU SYMP PROC","Li X, 2024, AMIA ANNU SYMP PROC" +41726484,"Automating Patient Safety Workflows: The Development and Implementation of LLaMPS, a Secure Large Language Model Application.",AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Ngufor C,Schaeferle GM;Zhou M;Patel S;Kuanar S;Lamers J;Abbas M;Devkaran S;Nienow J;Nagel JJ;Ramar K;Enayati M;Dowdy SC;Ngufor C,Schaeferle GM;Zhou M;Patel S;Kuanar S;Lamers J;Abbas M;Devkaran S;Nienow J;Nagel JJ;Ramar K;Enayati M;Dowdy SC;Ngufor C,,41726484,1140,1149,,,,,,,0,PUBMED,"Schaeferle G, 2024, AMIA ANNU SYMP PROC","Schaeferle G, 2024, AMIA ANNU SYMP PROC" +41726483,Facilitating Clinical Information Extraction with Synthetic Data and Ontology using Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Xu H,Hu Y;He H;Chen Q;Jiang X;Roberts K;Xu H,Hu Y;He H;Chen Q;Jiang X;Roberts K;Xu H,,41726483,500,505,,,,,,,0,PUBMED,"Hu Y, 2024, AMIA ANNU SYMP PROC","Hu Y, 2024, AMIA ANNU SYMP PROC" +41726481,Toward Integrating Machine Learning-powered Polysocial Risk Scores into Electronic Health Record Workflows.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Bian J,He X;Huang Y;Hu Y;Pappa M;Miller N;Gregory ME;Guo JS;Bian J,He X;Huang Y;Hu Y;Pappa M;Miller N;Gregory ME;Guo JS;Bian J,,41726481,451,460,,,,,,,0,PUBMED,"He X, 2024, AMIA ANNU SYMP PROC","He X, 2024, AMIA ANNU SYMP PROC" +41726480,RAG vs Reddit: Decoding Autism Conversations on Reddit with LLMs and Topic Modeling.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Shyu CR,Wattegama D;Black B;Moen M;Shyu CR,Wattegama D;Black B;Moen M;Shyu CR,,41726480,1345,1354,,,,,,,0,PUBMED,"Wattegama D, 2024, AMIA ANNU SYMP PROC","Wattegama D, 2024, AMIA ANNU SYMP PROC" +41726479,A Reinforcement Learning (RL)-Motivated Simulation Framework for Evaluating Vancomycin Dosing Strategies.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Zhi D,Mao B;Xie Z;Rasmy L;Nigo M;Zhi D,Mao B;Xie Z;Rasmy L;Nigo M;Zhi D,,41726479,834,843,,,,,,,0,PUBMED,"Mao B, 2024, AMIA ANNU SYMP PROC","Mao B, 2024, AMIA ANNU SYMP PROC" +41726477,Is Tree-of-Thought Prompting Strategy Better than Chain-of-Thought? Vaping Cessation Analysis Using Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Huang M,Aust L;Fu A;Huang M,Aust L;Fu A;Huang M,,41726477,167,176,,,,,,,0,PUBMED,"Aust L, 2024, AMIA ANNU SYMP PROC","Aust L, 2024, AMIA ANNU SYMP PROC" +41726476,Trustworthy and Uncertainty-Aware AI for Predicting Respiratory Complications Following Total Hip and Knee Arthroplasty.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Tafti AP,Rezvani F;Towsen K;Menezes Z;Einhorn A;Davis J;Gupta P;Plate JF;Fox C;Myers N;Tafti AP,Rezvani F;Towsen K;Menezes Z;Einhorn A;Davis J;Gupta P;Plate JF;Fox C;Myers N;Tafti AP,,41726476,1089,1099,,,,,,,0,PUBMED,"Rezvani F, 2024, AMIA ANNU SYMP PROC","Rezvani F, 2024, AMIA ANNU SYMP PROC" +41726475,Benchmarking Waitlist Mortality Prediction in Heart Transplantation Through Time-to-Event Modeling using New Longitudinal UNOS Dataset.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Padman R,Luo Y;Skandari R;Martinez C;Kilic A;Padman R,Luo Y;Skandari R;Martinez C;Kilic A;Padman R,,41726475,814,823,,,,,,,0,PUBMED,"Luo Y, 2024, AMIA ANNU SYMP PROC","Luo Y, 2024, AMIA ANNU SYMP PROC" +41726474,Cultural Prompting Improves the Empathy and Cultural Responsiveness of GPT-Generated Therapy Responses.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yuwen W,Xie SJ;Zhai S;Liang Y;Li J;Fan X;Cohen T;Yuwen W,Xie SJ;Zhai S;Liang Y;Li J;Fan X;Cohen T;Yuwen W,,41726474,1384,1393,,,,,,,0,PUBMED,"Xie S, 2024, AMIA ANNU SYMP PROC","Xie S, 2024, AMIA ANNU SYMP PROC" +41726472,Contextual Phenotyping of Pediatric Sepsis Cohort Using Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Kamaleswaran R,Nagori A;Gautam A;Wiens MO;Nguyen V;Mugisha NK;Kabakyenga J;Kissoon N;Ansermino JM;Kamaleswaran R,Nagori A;Gautam A;Wiens MO;Nguyen V;Mugisha NK;Kabakyenga J;Kissoon N;Ansermino JM;Kamaleswaran R,,41726472,929,938,,,,,,,0,PUBMED,"Nagori A, 2024, AMIA ANNU SYMP PROC","Nagori A, 2024, AMIA ANNU SYMP PROC" +41726465,Data-Driven Evidence-Based Patient-Centered Optimal Initiation Time for Dialysis Treatment.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Hoffman J,Lee EK;Liu D;Hoffman J,Lee EK;Liu D;Hoffman J,,41726465,683,692,,,,,,,0,PUBMED,"Lee E, 2024, AMIA ANNU SYMP PROC","Lee E, 2024, AMIA ANNU SYMP PROC" +41726462,Cryptogenic Stroke and Migraine: Using Probabilistic Independence and Machine Learning to Uncover Latent Sources of Disease from the Electronic Health Record.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Lasko TA,Betts JW;Still JM;Lasko TA,Betts JW;Still JM;Lasko TA,,41726462,202,211,ArXiv. 2025 Jul 9:arXiv:2505.04631v2.,,,,,,0,PUBMED,"Betts J, 2024, AMIA ANNU SYMP PROC","Betts J, 2024, AMIA ANNU SYMP PROC" +41726458,Towards Inpatient Discharge Summary Automation via Large Language Models: A Multidimensional Evaluation with a HIPAA-Compliant Instance of GPT-4o and Clinical Expert Assessment.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Wong R,Osborne T;Abbasi S;Hong S;Sexton R;Ambut J;Patel NJ;Rosenthal RN;Ung L;Wang F;Wong R,Osborne T;Abbasi S;Hong S;Sexton R;Ambut J;Patel NJ;Rosenthal RN;Ung L;Wang F;Wong R,,41726458,959,968,,,,,,,0,PUBMED,"Osborne T, 2024, AMIA ANNU SYMP PROC","Osborne T, 2024, AMIA ANNU SYMP PROC" +41726453,FCFNets: A Factual and Counterfactual Learning Framework for Enhanced Hepatic Fibrosis Prediction in Young Adults with T2D.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yin R,Yang Q;Sharma A;Calin D;de Crecy C;Inampudi R;Yin R,Yang Q;Sharma A;Calin D;de Crecy C;Inampudi R;Yin R,,41726453,1434,1443,,,,,,,0,PUBMED,"Yang Q, 2024, AMIA ANNU SYMP PROC","Yang Q, 2024, AMIA ANNU SYMP PROC" +41726452,Exploring the Implementation Experience and Use of CONCERN Early Warning System in a Rural Community Hospital: A Mixed Method Convergent Approach.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Dykes PC,Lee Y;Kang MJ;Baris VK;Lowenthal G;Rossetti SC;Cato KD;Lee RY;Kramer J;Huffam R;Dykes PC,Lee Y;Kang MJ;Baris VK;Lowenthal G;Rossetti SC;Cato KD;Lee RY;Kramer J;Huffam R;Dykes PC,,41726452,724,733,,,,,,,0,PUBMED,"Lee Y, 2024, AMIA ANNU SYMP PROC","Lee Y, 2024, AMIA ANNU SYMP PROC" +41726450,Predicting Chemotherapy-Related Symptom Deterioration Using Hybrid Deep Learning Architecture.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Mooney K,Finkelstein J;Smiley A;Echeverria C;Mooney K,Finkelstein J;Smiley A;Echeverria C;Mooney K,,41726450,362,368,,,,,,,0,PUBMED,"Finkelstein J, 2024, AMIA ANNU SYMP PROC","Finkelstein J, 2024, AMIA ANNU SYMP PROC" +41726449,Knowledge Engineering for Medical Vocabularies Using Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Hripcsak G,Chen HY;Ostropolets A;Weng C;Hripcsak G,Chen HY;Ostropolets A;Weng C;Hripcsak G,,41726449,248,256,,,,,,,0,PUBMED,"Chen H, 2024, AMIA ANNU SYMP PROC","Chen H, 2024, AMIA ANNU SYMP PROC" +41726448,Adjusting Covariate Misclassification in Electronic Health Records-Based Machine Learning Prediction Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Guo Y,Yang S;Wu Y;Liu M;Bian J;Liang M;Guo Y,Yang S;Wu Y;Liu M;Bian J;Liang M;Guo Y,,41726448,1444,1453,,,,,,,0,PUBMED,"Yang S, 2024, AMIA ANNU SYMP PROC","Yang S, 2024, AMIA ANNU SYMP PROC" +41726447,Explainable Suicide Phenotyping from Initial Psychiatric Evaluation Notes Using Reasoning Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Huang M,Li Z;Wang W;Shahani L;Vieira RM;Selek S;Soares J;Liu H;Huang M,Li Z;Wang W;Shahani L;Vieira RM;Selek S;Soares J;Liu H;Huang M,,41726447,774,784,,,,,,,0,PUBMED,"Li Z, 2024, AMIA ANNU SYMP PROC","Li Z, 2024, AMIA ANNU SYMP PROC" +41726444,Intimate Partner Homicide Among Women of Childbearing Age: Identifying Multilevel Risk Factors with Machine Learning.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Xiao R,Peddireddy S;Yan S;Kim S;Xiao R,Peddireddy S;Yan S;Kim S;Xiao R,,41726444,1003,1012,,,,,,,0,PUBMED,"Peddireddy S, 2024, AMIA ANNU SYMP PROC","Peddireddy S, 2024, AMIA ANNU SYMP PROC" +41726443,Time-series Machine Learning Models to Support Emergency Department Operational Planning.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Bhattacharya BS,Munia TTK;Marshall K;Kim K;Misra D;DeLong G;Durrani A;Manikowski J;Vawdrey DK;Bhattacharya BS,Munia TTK;Marshall K;Kim K;Misra D;DeLong G;Durrani A;Manikowski J;Vawdrey DK;Bhattacharya BS,,41726443,919,928,,,,,,,0,PUBMED,"Munia T, 2024, AMIA ANNU SYMP PROC","Munia T, 2024, AMIA ANNU SYMP PROC" +41726439,Machine Learning for Predicting Drug Release Behavior of PLGA Microspheres.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yuan K,Catapano AF;Zheng L;Yuan X;Yuan K,Catapano AF;Zheng L;Yuan X;Yuan K,,41726439,212,217,,,,,,,0,PUBMED,"Catapano A, 2024, AMIA ANNU SYMP PROC","Catapano A, 2024, AMIA ANNU SYMP PROC" +41726437,No Black Box Anymore: Demystifying Clinical Predictive Modeling with Temporal-Feature Cross Attention Mechanism.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Padman R,Li Y;Yao X;Padman R,Li Y;Yao X;Padman R,,41726437,764,773,,,,,,,0,PUBMED,"Li Y, 2024, AMIA ANNU SYMP PROC","Li Y, 2024, AMIA ANNU SYMP PROC" +41726436,Implementation and Assessment of Machine Learning Models for Forecasting Suspected Opioid Overdoses in Emergency Medical Services Data.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Cody Bumgardner VK,Mullen AD;Harris DR;Rock P;Thompson K;Slavova S;Talbert J;Cody Bumgardner VK,Mullen AD;Harris DR;Rock P;Thompson K;Slavova S;Talbert J;Cody Bumgardner VK,,41726436,909,918,,,,,,,0,PUBMED,"Mullen A, 2024, AMIA ANNU SYMP PROC","Mullen A, 2024, AMIA ANNU SYMP PROC" +41726433,Humans and Large Language Models in Clinical Decision Support: A Study with Medical Calculators.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Lu Z,Wan NC;Jin Q;Chan J;Xiong G;Applebaum S;Gilson A;McMurry R;Andrew Taylor R;Zhang A;Chen Q;Lu Z,Wan NC;Jin Q;Chan J;Xiong G;Applebaum S;Gilson A;McMurry R;Andrew Taylor R;Zhang A;Chen Q;Lu Z,,41726433,1305,1314,ArXiv. 2025 Mar 21:arXiv:2411.05897v2.,,,,,,0,PUBMED,"Wan N, 2024, AMIA ANNU SYMP PROC","Wan N, 2024, AMIA ANNU SYMP PROC" +41726432,Detection of Youth Suicide Interventions in Clinical Record Text using an Open-Source Language Model.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Saha A,Edgcomb JB;Klomhaus A;Lee J;Ponce CG;Tascione E;Saha A,Edgcomb JB;Klomhaus A;Lee J;Ponce CG;Tascione E;Saha A,,41726432,352,361,,,,,,,0,PUBMED,"Edgcomb J, 2024, AMIA ANNU SYMP PROC","Edgcomb J, 2024, AMIA ANNU SYMP PROC" +41726431,Automating Lung-RADS Categorization And Follow-Up Recommendations Using In-Context Learning With Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Bian J,Zhou T;Chen A;Hu Y;Lou X;He X;Huang Y;Hochhegger B;Mehta H;Prosperi M;Guo Y;Bian J,Zhou T;Chen A;Hu Y;Lou X;He X;Huang Y;Hochhegger B;Mehta H;Prosperi M;Guo Y;Bian J,,41726431,1567,1576,,,,,,,0,PUBMED,"Zhou T, 2024, AMIA ANNU SYMP PROC","Zhou T, 2024, AMIA ANNU SYMP PROC" +41726430,Multi-Adversarial Debiasing in Clinical Artificial Intelligence.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Washington P,Zawad MRS;Chen IY;Washington P,Zawad MRS;Chen IY;Washington P,,41726430,1492,1501,,,,,,,0,PUBMED,"Zawad M, 2024, AMIA ANNU SYMP PROC","Zawad M, 2024, AMIA ANNU SYMP PROC" +41726429,Recommending Clinical Trials for Online Patient Cases using Artificial Intelligence.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Lu Z,Chan J;Jin Q;Wan N;Floudas CS;Xue E;Lu Z,Chan J;Jin Q;Wan N;Floudas CS;Xue E;Lu Z,,41726429,228,237,ArXiv. 2025 Apr 15:arXiv:2504.20059v1.,,,,,,0,PUBMED,"Chan J, 2024, AMIA ANNU SYMP PROC","Chan J, 2024, AMIA ANNU SYMP PROC" +41726427,Bias Evaluation and Mitigation in Retrieval-Augmented Medical Question-Answering Systems.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Wang Y,Ji Y;Zhang H;Wang Y,Ji Y;Zhang H;Wang Y,,41726427,526,535,,,,,,,0,PUBMED,"Ji Y, 2024, AMIA ANNU SYMP PROC","Ji Y, 2024, AMIA ANNU SYMP PROC" +41726424,Interpretable Machine Learning to Identify Risk Factors for Recidivism in Intimate Partner Violence.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Karakurt G,Ogğuztüzün Ç;Koyutürk M;Karakurt G,Ogğuztüzün Ç;Koyutürk M;Karakurt G,,41726424,1285,1294,,,,,,,0,PUBMED,"Ogğuztüzün Ç, 2024, AMIA ANNU SYMP PROC","Ogğuztüzün Ç, 2024, AMIA ANNU SYMP PROC" +41726422,"APEA: A Type 1 Diabetes Self-Management Ambient-AI Assistance Tool that Bridges Trajectory Prediction, Interactive Explanation, and Just-in-Time Adaptive Intervention Action.",AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Shyu CR,Chen KY;Tallon EM;Shyu CR,Chen KY;Tallon EM;Shyu CR,,41726422,257,266,,,,,,,0,PUBMED,"Chen K, 2024, AMIA ANNU SYMP PROC","Chen K, 2024, AMIA ANNU SYMP PROC" +41726421,A Framework for an Intelligent Social Engagement Support System: Identifying and Addressing Challenges at Multiple Levels to Reduce Health Disparities.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Stockwell I,Reynolds TL;Algrain H;de Leon L;Parker B;Stockwell I,Reynolds TL;Algrain H;de Leon L;Parker B;Stockwell I,,41726421,1081,1088,,,,,,,0,PUBMED,"Reynolds T, 2024, AMIA ANNU SYMP PROC","Reynolds T, 2024, AMIA ANNU SYMP PROC" +41726420,VaxKG: Integrating The Vaccine Ontology And VIOLIN For Advanced Vaccine Queries And LLM-Powered Chat Systems.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,He YO,Yeh FL;Asato M;Zheng J;He YO,Yeh FL;Asato M;Zheng J;He YO,,41726420,1464,1473,,,,,,,0,PUBMED,"Yeh F, 2024, AMIA ANNU SYMP PROC","Yeh F, 2024, AMIA ANNU SYMP PROC" +41726417,10-Year Risk Prediction of Higher-Grade AV Block in Patients with First-Degree AV Block.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Yoon D,Kim DW;Kwon H;Park JW;Park HN;Kwon OS;Han C;Kim Y;Yoon D,Kim DW;Kwon H;Park JW;Park HN;Kwon OS;Han C;Kim Y;Yoon D,,41726417,615,624,,,,,,,0,PUBMED,"Kim D, 2024, AMIA ANNU SYMP PROC","Kim D, 2024, AMIA ANNU SYMP PROC" +41726415,"Measuring Accuracy of ConsultBot, Hybrid AI Tool, in Interpreting Blood Gas Results.",AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Liu Q,Meka P;Silvers CT;Gunapati B;Liu Q,Meka P;Silvers CT;Gunapati B;Liu Q,,41726415,881,888,,,,,,,0,PUBMED,"Meka P, 2024, AMIA ANNU SYMP PROC","Meka P, 2024, AMIA ANNU SYMP PROC" +41726412,A Treatment Selection Model for Opioid Use Disorder Using Electronic Health Record and ZIP-Level Data.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Walsh CG,Tang LA;Kast KA;Walsh CG,Tang LA;Kast KA;Walsh CG,,41726412,1239,1248,,,,,,,0,PUBMED,"Tang L, 2024, AMIA ANNU SYMP PROC","Tang L, 2024, AMIA ANNU SYMP PROC" +41726411,Towards Safe AI Clinicians: A Comprehensive Study on Large Language Model Jailbreaking in Healthcare.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Wang Y,Zhang H;Lou Q;Wang Y,Zhang H;Lou Q;Wang Y,,41726411,1519,1528,,,,,,,0,PUBMED,"Zhang H, 2024, AMIA ANNU SYMP PROC","Zhang H, 2024, AMIA ANNU SYMP PROC" +41726410,Crowdsourcing-Based Knowledge Graph Construction for Drug Side Effects Using Large Language Models with an Application on Semaglutide.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Li L,Duan Z;Wei K;Xue Z;Zhou J;Yang S;Ma S;Jin J;Li L,Duan Z;Wei K;Xue Z;Zhou J;Yang S;Ma S;Jin J;Li L,,41726410,332,341,,,,,,,0,PUBMED,"Duan Z, 2024, AMIA ANNU SYMP PROC","Duan Z, 2024, AMIA ANNU SYMP PROC" +41726409,PHEONA: An Evaluation Framework for Large Language Model-based Approaches to Computational Phenotyping.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Subbian V,Pungitore SA;Yadav S;Subbian V,Pungitore SA;Yadav S;Subbian V,,41726409,1041,1050,,,,,,,0,PUBMED,"Pungitore S, 2024, AMIA ANNU SYMP PROC","Pungitore S, 2024, AMIA ANNU SYMP PROC" +41726407,HIBERT: A Hybrid Clustering BERT for Interpretable Opioid Overdose Risk Prediction.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Wang F,Ding Z;Dong X;Liu Y;Ma T;Zhao X;Wong R;Rosenthal RN;Wang F,Ding Z;Dong X;Liu Y;Ma T;Zhao X;Wong R;Rosenthal RN;Wang F,,41726407,303,312,,,,,,,0,PUBMED,"Ding Z, 2024, AMIA ANNU SYMP PROC","Ding Z, 2024, AMIA ANNU SYMP PROC" +41726406,Leveraging Large Language Models for Thyroid Nodule Information Extraction and Matching Across Medical Reports.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Speier W,Lee D;Amara D;Beon C;Swee S;Radhachandran A;Athreya S;Ivezic V;Arnold C;Speier W,Lee D;Amara D;Beon C;Swee S;Radhachandran A;Athreya S;Ivezic V;Arnold C;Speier W,,41726406,673,682,,,,,,,0,PUBMED,"Lee D, 2024, AMIA ANNU SYMP PROC","Lee D, 2024, AMIA ANNU SYMP PROC" +41726405,DGSurv: Dynamic Graph-Based Multimodal Learning for Interpretable Cancer Survival Prediction.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Liu Y,Shahabi S;Cui Z;Liu R;Carlson J;Liu Y,Shahabi S;Cui Z;Liu R;Carlson J;Liu Y,,41726405,1160,1169,,,,,,,0,PUBMED,"Shahabi S, 2024, AMIA ANNU SYMP PROC","Shahabi S, 2024, AMIA ANNU SYMP PROC" +41726403,AKI-Detector: A Multi-Agent Framework by Integrating Machine Learning and Large Language Models for Early Prediction of Acute Kidney Injury in ICU.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Kong G,Shi T;Xiao M;Xu H;Zhao H;Kong G,Shi T;Xiao M;Xu H;Zhao H;Kong G,,41726403,1190,1199,,,,,,,0,PUBMED,"Shi T, 2024, AMIA ANNU SYMP PROC","Shi T, 2024, AMIA ANNU SYMP PROC" +41726400,Survivorship Navigator: Personalized Survivorship Care Plan Generation using Large Language Models.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Sun J,Pradeepkumar J;Kumar SP;Reamer CB;Dreyer M;Patel J;Liebovitz D;Sun J,Pradeepkumar J;Kumar SP;Reamer CB;Dreyer M;Patel J;Liebovitz D;Sun J,,41726400,1031,1040,,,,,,,0,PUBMED,"Pradeepkumar J, 2024, AMIA ANNU SYMP PROC","Pradeepkumar J, 2024, AMIA ANNU SYMP PROC" +41726397,"A Multi-Phase Analysis of Blood Culture Stewardship: Machine Learning Prediction, Expert Recommendation Assessment, and LLM Automation.",AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Chen JH,Amrollahi F;Marshall N;Haredasht FN;Black KC;Zahedivash A;Maddali MV;Ma SP;Chang A;Deresinski SC;Goldstein MK;Asch SM;Banaei N;Chen JH,Amrollahi F;Marshall N;Haredasht FN;Black KC;Zahedivash A;Maddali MV;Ma SP;Chang A;Deresinski SC;Goldstein MK;Asch SM;Banaei N;Chen JH,,41726397,147,156,,,,,,,0,PUBMED,"Amrollahi F, 2024, AMIA ANNU SYMP PROC","Amrollahi F, 2024, AMIA ANNU SYMP PROC" +41726396,Machine Learning-Based Prediction of Antimicrobial Susceptibility: A Step Towards Precision Antimicrobial Stewardship.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Chen JH,Amrollahi F;Haredasht FN;Vansomphone A;Marshall N;Maddali MV;Ma SP;Chang A;Deresinski SC;Goldstein MK;Kanjilal S;Medford RJ;Cooper LN;Asch SM;Banaei N;Chen JH,Amrollahi F;Haredasht FN;Vansomphone A;Marshall N;Maddali MV;Ma SP;Chang A;Deresinski SC;Goldstein MK;Kanjilal S;Medford RJ;Cooper LN;Asch SM;Banaei N;Chen JH,,41726396,138,146,,,,,,,0,PUBMED,"Amrollahi F, 2024, AMIA ANNU SYMP PROC-a","Amrollahi F, 2024, AMIA ANNU SYMP PROC" +41726395,Beyond Random Splitting: Evaluating the Impact of Data Partitioning Strategies on Ventilator-Associated Pneumonia Prediction Using EHRs.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,C Ho J,Asare-Baiden M;Zhang W;Stover Hertzberg V;C Ho J,Asare-Baiden M;Zhang W;Stover Hertzberg V;C Ho J,,41726395,157,166,,,,,,,0,PUBMED,"Asare-Baiden M, 2024, AMIA ANNU SYMP PROC","Asare-Baiden M, 2024, AMIA ANNU SYMP PROC" +41726394,Failure Modes of Time Series Interpretability Algorithms for Critical Care Applications and Potential Solutions.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Subbian V,Yadav S;Subbian V,Yadav S;Subbian V,,41726394,1424,1433,,,,,,,0,PUBMED,"Yadav S, 2024, AMIA ANNU SYMP PROC","Yadav S, 2024, AMIA ANNU SYMP PROC" +41726393,Using LLMs to Interpret Arterial Blood Gases: Comparison of a Novel Math Scratchpad with Different Prompting Methods in a Three-Arm Trial.,AMIA ... Annual Symposium proceedings. AMIA Symposium,AMIA Annu Symp Proc,2024,2024,,eng,citation,Liu Q,Meka P;Silvers CT;Gunapati B;Liu Q,Meka P;Silvers CT;Gunapati B;Liu Q,,41726393,871,880,,,,,,,0,PUBMED,"Meka P, 2024, AMIA ANNU SYMP PROC-a","Meka P, 2024, AMIA ANNU SYMP PROC" +41660580,Deep video anomaly detection in automated laboratory setting.,Expert systems with applications,Expert Syst Appl,2025,271,,eng,citation,Xu M,Dabouei A;Shibu JP;Dalal V;Cao C;MacWilliams A;Kangas J;Xu M,Dabouei A;Shibu JP;Dalal V;Cao C;MacWilliams A;Kangas J;Xu M,10.1016/j.eswa.2025.126581,41660580,,,,,,,,,0,PUBMED,"Dabouei A, 2025, EXPERT SYST APPL","Dabouei A, 2025, EXPERT SYST APPL" +41631197,Multi-layer process control in selective laser melting: a reinforcement learning approach.,Journal of intelligent manufacturing,J Intell Manuf,2026,37,1,eng,citation,Panoutsos G,Vagenas S;Al-Saadi T;Panoutsos G,Vagenas S;Al-Saadi T;Panoutsos G,10.1007/s10845-024-02548-3,41631197,281,298,,,,,,,0,PUBMED,"Vagenas S, 2026, J INTELL MANUF","Vagenas S, 2026, J INTELL MANUF" +41584369,Epigenetic germline variants predict cancer prognosis and risk and distribute uniquely in topologically associating domains.,F1000Research,F1000Res,2023,12,,eng,citation,Carter H,Goudarzi S;Pagadala M;Klie A;Talwar JV;Carter H,Goudarzi S;Pagadala M;Klie A;Talwar JV;Carter H,10.12688/f1000research.139476.2,41584369,1083,,bioRxiv. 2023 Jul 07:2023.07.04.547722. doi: 10.1101/2023.07.04.547722.,,,,,,0,PUBMED,"Goudarzi S, 2023, F1000RES","Goudarzi S, 2023, F1000RES" +41462535,Frontal lobes dysfunction across clinical clusters of acute schizophrenia.,Spanish journal of psychiatry and mental health,Span J Psychiatry Ment Health,2025,18,4,eng,citation,Chekhonin VP,Corponi F;Zorkina Y;Stahl D;Murru A;Vieta E;Serretti A;Morozova А;Reznik A;Kostyuk G;Chekhonin VP,Corponi F;Zorkina Y;Stahl D;Murru A;Vieta E;Serretti A;Morozova А;Reznik A;Kostyuk G;Chekhonin VP,10.1016/j.rpsm.2021.12.002,41462535,234,240,,,,,,,0,PUBMED,"Corponi F, 2025, SPAN J PSYCHIATRY MENT HEALTH","Corponi F, 2025, SPAN J PSYCHIATRY MENT HEALTH" +41459404,Leveraging Machine Learning for Predicting Circadian Transcription in mRNAs and lncRNAs.,Proceedings. IEEE International Conference on Bioinformatics and Biomedicine,Proceedings (IEEE Int Conf Bioinformatics Biomed),2024,2024,,eng,citation,Kojima S,Miao L;Dhulipalla KV;Kundu S;Zheng B;Li S;Kojima S,Miao L;Dhulipalla KV;Kundu S;Zheng B;Li S;Kojima S,10.1109/BIBM62325.2024.10822684,41459404,6044,6048,,,,,,,0,PUBMED,"Miao L, 2024, PROCEEDINGS (IEEE INT CONF BIOINFORMATICS BIOMED)","Miao L, 2024, PROCEEDINGS (IEEE INT CONF BIOINFORMATICS BIOMED)" +41404518,Categorizing E-cigarette-related tweets using BERT topic modeling.,"Emerging trends in drugs, addictions, and health",Emerg Trends Drugs Addict Health,2024,4,,eng,citation,Wilkinson AV,Murthy D;Keshari S;Arora S;Yang Q;Loukas A;Schwartz SJ;Harrell MB;Hébert ET;Wilkinson AV,Murthy D;Keshari S;Arora S;Yang Q;Loukas A;Schwartz SJ;Harrell MB;Hébert ET;Wilkinson AV,10.1016/j.etdah.2024.100160,41404518,,,,,,,,,0,PUBMED,"Murthy D, 2024, EMERG TRENDS DRUGS ADDICT HEALTH","Murthy D, 2024, EMERG TRENDS DRUGS ADDICT HEALTH" +41367997,Wearable Signals for Diagnosing Attention-Deficit/Hyperactivity Disorder in Adolescents: A Feasibility Study.,JAACAP open,JAACAP Open,2025,3,4,eng,citation,Wong ICK,Jiang Z;Chan AYL;Lum D;Wong KHTW;Leung JCN;Ip P;Coghill D;Wong RS;Ngai ECH;Wong ICK,Jiang Z;Chan AYL;Lum D;Wong KHTW;Leung JCN;Ip P;Coghill D;Wong RS;Ngai ECH;Wong ICK,10.1016/j.jaacop.2024.11.003,41367997,875,889,,,,,,,0,PUBMED,"Jiang Z, 2025, JAACAP OPEN","Jiang Z, 2025, JAACAP OPEN" +41360591,Comparing deep learning models for tuberculosis detection: A retrospective study of digital vs. analog chest radiographs.,The Indian journal of tuberculosis,Indian J Tuberc,2025,72 Suppl 2,,eng,citation,Putha P,Chattoraj S;Reddy B;Tadepalli M;Putha P,Chattoraj S;Reddy B;Tadepalli M;Putha P,10.1016/j.ijtb.2024.05.008,41360591,S43,S46,,,,,,,0,PUBMED,"Chattoraj S, 2025, INDIAN J TUBERC","Chattoraj S, 2025, INDIAN J TUBERC" +41268011,Beyond algorithms: Ethical implications of AI in healthcare.,"Medical journal, Armed Forces India",Med J Armed Forces India,2025,81,6,eng,citation,Pathni RK,Pathni RK,Pathni RK,10.1016/j.mjafi.2024.10.014,41268011,630,636,,,,,,,0,PUBMED,"Pathni R, 2025, MED J ARMED FORCES INDIA","Pathni R, 2025, MED J ARMED FORCES INDIA" +41230249,"Predicting outcomes of smoking cessation interventions in novel scenarios using ontology-informed, interpretable machine learning.",Wellcome open research,Wellcome Open Res,2023,8,,eng,citation,Michie S,Hastings J;Glauer M;West R;Kleinau A;Thomas J;Wright AJ;Michie S,Hastings J;Glauer M;West R;Kleinau A;Thomas J;Wright AJ;Michie S,10.12688/wellcomeopenres.20012.2,41230249,503,,,,,,,,0,PUBMED,"Hastings J, 2023, WELLCOME OPEN RES","Hastings J, 2023, WELLCOME OPEN RES" +41212044,Bark frequency cepstral coefficient based sadness emotion level recognition system.,Computer methods in biomechanics and biomedical engineering,Comput Methods Biomech Biomed Engin,2025,28,15,eng,citation,Syauqy D,Prasetio BH;Lazzuardhy DA;Widasari ER;Syauqy D,Prasetio BH;Lazzuardhy DA;Widasari ER;Syauqy D,10.1080/10255842.2024.2366524,41212044,2290,2301,,,,,,,0,PUBMED,"Prasetio B, 2025, COMPUT METHODS BIOMECH BIOMED ENGIN","Prasetio B, 2025, COMPUT METHODS BIOMECH BIOMED ENGIN" +41200664,Advances in Leukemia detection and classification: A Systematic review of AI and image processing techniques.,F1000Research,F1000Res,2024,13,,eng,citation,Moutaouakkil F,Achir A;Debbarh I;Zoubir N;Battas I;Medromi H;Moutaouakkil F,Achir A;Debbarh I;Zoubir N;Battas I;Medromi H;Moutaouakkil F,10.12688/f1000research.159318.2,41200664,1536,,,,,,,,0,PUBMED,"Achir A, 2024, F1000RES","Achir A, 2024, F1000RES" +41200131,Task-Agnostic Machine-Learning-Assisted Inference.,Advances in neural information processing systems,Adv Neural Inf Process Syst,2024,2024,,eng,citation,Lu Q,Miao J;Lu Q,Miao J;Lu Q,10.52202/079017-3368,41200131,106162,106189,,,,,,,0,PUBMED,"Miao J, 2024, ADV NEURAL INF PROCESS SYST","Miao J, 2024, ADV NEURAL INF PROCESS SYST" +41140420,Radiomic Applications in Skull Base Pathology: A Systematic Review of Potential Clinical Uses.,"Journal of neurological surgery. Part B, Skull base",J Neurol Surg B Skull Base,2025,86,6,eng,citation,Karsy M,Tenhoeve SA;Lefler S;Brown J;Owens MR;Rawson C;Tabachnick DR;Shaik K;Karsy M,Tenhoeve SA;Lefler S;Brown J;Owens MR;Rawson C;Tabachnick DR;Shaik K;Karsy M,10.1055/a-2436-8444,41140420,673,687,,,,,,,0,PUBMED,"Tenhoeve S, 2025, J NEUROL SURG B SKULL BASE","Tenhoeve S, 2025, J NEUROL SURG B SKULL BASE" +41127551,LOCALIZING MOMENTS OF ACTIONS IN UNTRIMMED VIDEOS OF INFANTS WITH AUTISM SPECTRUM DISORDER.,Proceedings. International Conference on Image Processing,Proc Int Conf Image Proc,2024,2024,,eng,citation,Ozonoff S,Helvaci HI;Cheung SS;Chuah CN;Ozonoff S,Helvaci HI;Cheung SS;Chuah CN;Ozonoff S,10.1109/icip51287.2024.10648046,41127551,3841,3847,,,,,,,0,PUBMED,"Helvaci H, 2024, PROC INT CONF IMAGE PROC","Helvaci H, 2024, PROC INT CONF IMAGE PROC" +41122661,Infants Sucking Pattern Identification Using Machine-Learned Computational Modeling.,Journal of engineering and science in medical diagnostics and therapy,J Eng Sci Med Diagn Ther,2025,8,3,eng,citation,Hassanipour F,Olapojoye A;Singh A;Nishi E;Fei B;Nostratinia A;Hassanipour F,Olapojoye A;Singh A;Nishi E;Fei B;Nostratinia A;Hassanipour F,10.1115/1.4066459,41122661,,,,,,,,,0,PUBMED,"Olapojoye A, 2025, J ENG SCI MED DIAGN THER","Olapojoye A, 2025, J ENG SCI MED DIAGN THER" +41092369,Sparse dimensionality reduction for analyzing single-cell-resolved interactions.,"Bioinformatics (Oxford, England)",Bioinformatics,2024,5,1,eng,citation,Binder H,Brunn N;Hackenberg M;Fullio CL;Vogel T;Binder H,Brunn N;Hackenberg M;Fullio CL;Vogel T;Binder H,10.1093/bioadv/vbaf230,41092369,,,doi: 10.1093/bioadv/vbag055,,,,,,0,PUBMED,"Brunn N, 2024, BIOINFORMATICS","Brunn N, 2024, BIOINFORMATICS" +41084572,Identifying and Analyzing Factors Contributing to Blood Donation Reluctance among University Students: A Quantitative Approach.,Indian journal of hematology & blood transfusion : an official journal of Indian Society of Hematology and Blood Transfusion,Indian J Hematol Blood Transfus,2025,41,4,eng,citation,Mishra AD,Thusoo S;Mishra AD,Thusoo S;Mishra AD,10.1007/s12288-024-01923-7,41084572,908,915,,,,,,,0,PUBMED,"Thusoo S, 2025, INDIAN J HEMATOL BLOOD TRANSFUS","Thusoo S, 2025, INDIAN J HEMATOL BLOOD TRANSFUS" +41079146,Multi-agent Reinforcement Learning for the Control of Three-Dimensional Rayleigh-Bénard Convection.,"Flow, turbulence and combustion",Flow Turbul Combust,2025,115,3,eng,citation,Vinuesa R,Vasanth J;Rabault J;Alcántara-Ávila F;Mortensen M;Vinuesa R,Vasanth J;Rabault J;Alcántara-Ávila F;Mortensen M;Vinuesa R,10.1007/s10494-024-00619-2,41079146,1319,1355,,,,,,,0,PUBMED,"Vasanth J, 2025, FLOW TURBUL COMBUST","Vasanth J, 2025, FLOW TURBUL COMBUST" +41059255,MRAnnotator: multi-anatomy and many-sequence MRI segmentation of 44 structures.,Radiology advances,Radiol Adv,2025,2,1,eng,citation,Mei X,Zhou A;Liu Z;Tieu A;Patel N;Sun S;Yang A;Choi P;Lee HC;Tordjman M;Deyer L;Mei Y;Fauveau V;Soultanidis G;Taouli B;Huang M;Doshi A;Fayad ZA;Deyer T;Mei X,Zhou A;Liu Z;Tieu A;Patel N;Sun S;Yang A;Choi P;Lee HC;Tordjman M;Deyer L;Mei Y;Fauveau V;Soultanidis G;Taouli B;Huang M;Doshi A;Fayad ZA;Deyer T;Mei X,10.1093/radadv/umae035,41059255,umae035,,,,,,,,0,PUBMED,"Zhou A, 2025, RADIOL ADV","Zhou A, 2025, RADIOL ADV" +41048995,Role of Mixup in Topological Persistence Based Knowledge Distillation for Wearable Sensor Data.,IEEE sensors journal,IEEE Sens J,2025,25,3,eng,citation,Turaga P,Jeon ES;Choi H;Buman MP;Turaga P,Jeon ES;Choi H;Buman MP;Turaga P,10.1109/jsen.2024.3517653,41048995,5853,5865,,,,,,,0,PUBMED,"Jeon E, 2025, IEEE SENS J","Jeon E, 2025, IEEE SENS J" +41041032,"Artificial Intelligence in Clinical Diagnosis and Treatment of Dry Eye: Workflows, Effectiveness, and Evaluation.",Journal of current ophthalmology,J Curr Ophthalmol,2024,36,4,eng,citation,Xing Y,Lu M;Yang K;Deng X;Fan T;Zhang H;Yang W;Xing Y,Lu M;Yang K;Deng X;Fan T;Zhang H;Yang W;Xing Y,10.4103/joco.joco_172_24,41041032,315,324,,,,,,,0,PUBMED,"Lu M, 2024, J CURR OPHTHALMOL","Lu M, 2024, J CURR OPHTHALMOL" +41031031,Rapid prediction of thermodynamically destabilizing tyrosine phosphorylations in cancers.,bioRxiv : the preprint server for biology,bioRxiv,2024,,,eng,citation,Chandrasekaran S,Woodard J;Liu Z;Malemir Chegini A;Tian J;Bhowmick R;Pennathur S;Mashaghi A;Brender J;Chandrasekaran S,Woodard J;Liu Z;Malemir Chegini A;Tian J;Bhowmick R;Pennathur S;Mashaghi A;Brender J;Chandrasekaran S,10.1101/2024.09.26.614998,41031031,,,Cell Rep Methods. 2025 Sep 155(9):101169. doi: 10.1016/j.crmeth.2025.101169.,,,,,,0,PUBMED,"Woodard J, 2024, BIORXIV","Woodard J, 2024, BIORXIV" +41018878,Mayer-Homology Learning Prediction of Protein-Ligand Binding Affinities.,Journal of computational biophysics and chemistry,J Comput Biophys Chem,2025,24,2,eng,citation,Wei GW,Feng H;Shen L;Liu J;Wei GW,Feng H;Shen L;Liu J;Wei GW,10.1142/s2737416524500613,41018878,253,266,,,,,,,0,PUBMED,"Feng H, 2025, J COMPUT BIOPHYS CHEM","Feng H, 2025, J COMPUT BIOPHYS CHEM" +41018491,A Scoping Review of Methodological Approaches to Detect Bias in the Electronic Health Record.,Stigma and health,Stigma Health,2025,10,3,eng,citation,Hughto JMW,Kelly PJA;Snyder AM;Agénor M;Navalta CR;Misquith C;Rich JD;Hughto JMW,Kelly PJA;Snyder AM;Agénor M;Navalta CR;Misquith C;Rich JD;Hughto JMW,10.1037/sah0000497,41018491,393,405,,,,,,,0,PUBMED,"Kelly P, 2025, STIGMA HEALTH","Kelly P, 2025, STIGMA HEALTH" +41001083,Integrating post-event very high resolution SAR imagery and machine learning for building-level earthquake damage assessment.,Bulletin of earthquake engineering,Bull Earthq Eng,2025,23,12,eng,citation,Whitworth MRZ,Macchiarulo V;Giardina G;Milillo P;Aktas YD;Whitworth MRZ,Macchiarulo V;Giardina G;Milillo P;Aktas YD;Whitworth MRZ,10.1007/s10518-024-01877-1,41001083,5021,5047,,,,,,,0,PUBMED,"Macchiarulo V, 2025, BULL EARTHQ ENG","Macchiarulo V, 2025, BULL EARTHQ ENG" +41000274,Machine Learning Estimation of Myocardial Ischemia Severity Using Body Surface ECG.,Computing in cardiology,Comput Cardiol (2010),2024,51,,eng,citation,Tasdizen T,Jin R;Bergquist JA;Dade D;Zenger B;Ye X;Ranjan R;MacLeod RS;Steinberg BA;Tasdizen T,Jin R;Bergquist JA;Dade D;Zenger B;Ye X;Ranjan R;MacLeod RS;Steinberg BA;Tasdizen T,10.22489/cinc.2024.144,41000274,,,,,,,,,0,PUBMED,"Jin R, 2024, COMPUT CARDIOL (2010)","Jin R, 2024, COMPUT CARDIOL (2010)" +40995405,Persistent Laplacian-enhanced algorithm for scarcely labeled data classification.,Machine learning,Mach Learn,2024,113,10,eng,citation,Wei GW,Bhusal G;Merkurjev E;Wei GW,Bhusal G;Merkurjev E;Wei GW,10.1007/s10994-024-06616-w,40995405,7267,7292,,,,,,,0,PUBMED,"Bhusal G, 2024, MACH LEARN","Bhusal G, 2024, MACH LEARN" +40994707,Toward a framework for risk mitigation of potential misuse of artificial intelligence in biomedical research.,Nature machine intelligence,Nat Mach Intell,2024,6,12,eng,citation,Magnus D,Trotsyuk AA;Waeiss Q;Bhatia RT;Aponte BJ;Heffernan IML;Madgavkar D;Felder RM;Lehmann LS;Palmer MJ;Greely H;Wald R;Goetz L;Trengove M;Vandersluis R;Lin H;Cho MK;Altman RB;Endy D;Relman DA;Levi M;Satz D;Magnus D,Trotsyuk AA;Waeiss Q;Bhatia RT;Aponte BJ;Heffernan IML;Madgavkar D;Felder RM;Lehmann LS;Palmer MJ;Greely H;Wald R;Goetz L;Trengove M;Vandersluis R;Lin H;Cho MK;Altman RB;Endy D;Relman DA;Levi M;Satz D;Magnus D,10.1038/s42256-024-00926-3,40994707,1435,1442,,,,,,,0,PUBMED,"Trotsyuk A, 2024, NAT MACH INTELL","Trotsyuk A, 2024, NAT MACH INTELL" +40993936,Assessing heterogeneous causal effects across clusters in partially nested designs.,Psychological methods,Psychol Methods,2024,,,eng,citation,Liu X,Liu X,Liu X,10.1037/met0000723,40993936,,,,,,,,,0,PUBMED,"Liu X, 2024, PSYCHOL METHODS","Liu X, 2024, PSYCHOL METHODS" +40988722,Neural Networks or Linguistic Features? - Comparing Different Machine-Learning Approaches for Automated Assessment of Text Quality Traits Among L1- and L2-Learners' Argumentative Essays.,International journal of artificial intelligence in education,Int J Artif Intell Educ,2025,35,3,eng,citation,Horbach A,Lohmann JF;Junge F;Möller J;Fleckenstein J;Trüb R;Keller S;Jansen T;Horbach A,Lohmann JF;Junge F;Möller J;Fleckenstein J;Trüb R;Keller S;Jansen T;Horbach A,10.1007/s40593-024-00426-w,40988722,1178,1217,,,,,,,0,PUBMED,"Lohmann J, 2025, INT J ARTIF INTELL EDUC","Lohmann J, 2025, INT J ARTIF INTELL EDUC" +40980489,Identifying clinical feature clusters toward predicting stroke in patients with asymptomatic carotid stenosis.,International journal of data science and analytics,Int J Data Sci Anal,2025,20,3,eng,citation,Luo X,Xu D;Matinmehr S;Sawchuk A;Luo X,Xu D;Matinmehr S;Sawchuk A;Luo X,10.1007/s41060-024-00597-8,40980489,2511,2524,,,,,,,0,PUBMED,"Xu D, 2025, INT J DATA SCI ANAL","Xu D, 2025, INT J DATA SCI ANAL" +40979733,"Beyond the income-achievement gap: The role of individual, family, and environmental factors in cognitive resilience among low-income youth.",JCPP advances,JCPP Adv,2025,5,3,eng,citation,McLaughlin KA,Rakesh D;Sadikova E;McLaughlin KA,Rakesh D;Sadikova E;McLaughlin KA,10.1002/jcv2.12297,40979733,e12297,,,,,,,,0,PUBMED,"Rakesh D, 2025, JCPP ADV","Rakesh D, 2025, JCPP ADV" +40979399,Editorial: AI/ML in pharmacovigilance and pharmacoepidemiology.,Frontiers in drug safety and regulation,Front Drug Saf Regul,2024,4,,eng,citation,Gottlieb A,Zou W;Natsiavas P;Gottlieb A,Zou W;Natsiavas P;Gottlieb A,10.3389/fdsfr.2024.1517365,40979399,1517365,,Editorial on the Research Topic AI/ML in pharmacovigilance and pharmacoepidemiology,,,,,,0,PUBMED,"Zou W, 2024, FRONT DRUG SAF REGUL","Zou W, 2024, FRONT DRUG SAF REGUL" +40978518,Large language models in orthopedics: An exploratory research trend analysis and machine learning classification.,Journal of orthopaedics,J Orthop,2025,66,,eng,citation,Radice F,Velasquez Garcia A;Minami M;Mejia-Rodríguez M;Ortíz-Morales JR;Radice F,Velasquez Garcia A;Minami M;Mejia-Rodríguez M;Ortíz-Morales JR;Radice F,10.1016/j.jor.2024.12.039,40978518,110,118,,,,,,,0,PUBMED,"Velasquez Garcia A, 2025, J ORTHOP","Velasquez Garcia A, 2025, J ORTHOP" +40973412,Machine learning and molecular modeling based design of nanobodies targeting human serotonin transporter and receptor.,Advances in protein chemistry and structural biology,Adv Protein Chem Struct Biol,2025,147,,eng,citation,Xue W,Xu B;Liu J;Xue W,Xu B;Liu J;Xue W,10.1016/bs.apcsb.2024.12.004,40973412,535,558,,,,,,,0,PUBMED,"Xu B, 2025, ADV PROTEIN CHEM STRUCT BIOL","Xu B, 2025, ADV PROTEIN CHEM STRUCT BIOL" +40973396,How to accurately predict nanobody structure: Classical physics-based simulations or deep learning approaches.,Advances in protein chemistry and structural biology,Adv Protein Chem Struct Biol,2025,147,,eng,citation,Xue W,Yu H;Xu B;Zhan F;Xue W,Yu H;Xu B;Zhan F;Xue W,10.1016/bs.apcsb.2024.12.001,40973396,129,150,,,,,,,0,PUBMED,"Yu H, 2025, ADV PROTEIN CHEM STRUCT BIOL","Yu H, 2025, ADV PROTEIN CHEM STRUCT BIOL" +40970099,A Novel Hybrid Ordinal Learning Model with Health Care Application.,IEEE transactions on automation science and engineering : a publication of the IEEE Robotics and Automation Society,IEEE Trans Autom Sci Eng,2025,22,,eng,citation,Li J,Wang L;Wang H;Su Y;Lure F;Li J,Wang L;Wang H;Su Y;Lure F;Li J,10.1109/tase.2024.3350894,40970099,339,352,,,,,,,0,PUBMED,"Wang L, 2025, IEEE TRANS AUTOM SCI ENG","Wang L, 2025, IEEE TRANS AUTOM SCI ENG" +40964624,A Meta-Learner Framework to Estimate Individualized Treatment Effects for Survival Outcomes.,Journal of data science : JDS,J Data Sci,2024,22,4,eng,citation,Ding Y,Bo N;Wei Y;Zeng L;Kang C;Ding Y,Bo N;Wei Y;Zeng L;Kang C;Ding Y,10.6339/24-jds1119,40964624,505,523,,,,,,,0,PUBMED,"Bo N, 2024, J DATA SCI","Bo N, 2024, J DATA SCI" +40933079,Analyzing Multimodal Features of Spontaneous Voice Assistant Commands for Mild Cognitive Impairment Detection.,Interspeech,Interspeech,2024,2024,,eng,citation,Summerour C,Lin N;Zhu Y;Liang X;Batsis JA;Summerour C,Lin N;Zhu Y;Liang X;Batsis JA;Summerour C,10.21437/interspeech.2024-2288,40933079,3030,3034,,,,,,,0,PUBMED,"Lin N, 2024, INTERSPEECH","Lin N, 2024, INTERSPEECH" +40922766,Electronic Health Record-Integrated Legal Documentation to Measure Involuntary Mental Health Detention of Children.,JAACAP open,JAACAP Open,2025,3,3,eng,citation,Zima BT,Edgcomb JB;Tseng CH;Klomhaus AM;Seroussi A;Heldt JP;Ponce CG;Perez L;Lee JJ;Zima BT,Edgcomb JB;Tseng CH;Klomhaus AM;Seroussi A;Heldt JP;Ponce CG;Perez L;Lee JJ;Zima BT,10.1016/j.jaacop.2024.09.001,40922766,689,700,,,,,,,0,PUBMED,"Edgcomb J, 2025, JAACAP OPEN","Edgcomb J, 2025, JAACAP OPEN" +40919447,Trends in Smart Restaurant Research: Bibliometric Review and Research Agenda.,F1000Research,F1000Res,2024,13,,eng,citation,Benjumea-Arias M,Valencia-Arias A;Cardona-Acevedo S;Martínez Rojas E;Ramírez Dávila J;Rodriguez-Correa P;Palacios-Moya L;Teodori de la Puente R;Agudelo-Ceballos E;Benjumea-Arias M,Valencia-Arias A;Cardona-Acevedo S;Martínez Rojas E;Ramírez Dávila J;Rodriguez-Correa P;Palacios-Moya L;Teodori de la Puente R;Agudelo-Ceballos E;Benjumea-Arias M,10.12688/f1000research.158066.4,40919447,1505,,,,,,,,0,PUBMED,"Valencia-Arias A, 2024, F1000RES","Valencia-Arias A, 2024, F1000RES" +40917581,Enhancing Genetic Risk Prediction through Federated Semi-Supervised Transfer Learning with Inaccurate Electronic Health Record Data.,Statistics in biosciences,Stat Biosci,2024,,,eng,citation,Duan R,Lu Y;Gu T;Duan R,Lu Y;Gu T;Duan R,10.1007/s12561-024-09449-2,40917581,,,,,,,,,0,PUBMED,"Lu Y, 2024, STAT BIOSCI","Lu Y, 2024, STAT BIOSCI" +40894111,A physics-informed neural network approach for determining spatially varying arterial stiffness using ultrasound imaging: Finite Difference simulation and experimental plaque phantom validation.,Proceedings of the ... IEEE International Symposium on Applications of Ferroelectrics. IEEE International Symposium on Applications of Ferroelectrics,Proc IEEE Int Symp Appl Ferroelectr,2024,2024,,eng,citation,Konofagou EE,Roy T;Kemper P;Mobadersany N;Konofagou EE,Roy T;Kemper P;Mobadersany N;Konofagou EE,10.1109/uffc-js60046.2024.10794027,40894111,,,,,,,,,0,PUBMED,"Roy T, 2024, PROC IEEE INT SYMP APPL FERROELECTR","Roy T, 2024, PROC IEEE INT SYMP APPL FERROELECTR" +40893871,Knowledge-Informed Machine Learning for Cancer Diagnosis and Prognosis: A Review.,IEEE transactions on automation science and engineering : a publication of the IEEE Robotics and Automation Society,IEEE Trans Autom Sci Eng,2025,22,,eng,citation,Li J,Mao L;Wang H;Hu LS;Tran NL;Canoll PD;Swanson KR;Li J,Mao L;Wang H;Hu LS;Tran NL;Canoll PD;Swanson KR;Li J,10.1109/tase.2024.3515839,40893871,10008,10028,,,,,,,0,PUBMED,"Mao L, 2025, IEEE TRANS AUTOM SCI ENG","Mao L, 2025, IEEE TRANS AUTOM SCI ENG" +40893146,AI and mental health: evaluating supervised machine learning models trained on diagnostic classifications.,AI & society,AI Soc,2025,40,6,eng,citation,van Oosterzee A,van Oosterzee A,van Oosterzee A,10.1007/s00146-024-02012-z,40893146,5077,5086,,,,,,,0,PUBMED,"van Oosterzee A, 2025, AI SOC","van Oosterzee A, 2025, AI SOC" +40881607,Transcriptomic and Multi-scale Network Analyses Reveal Key Drivers of Cardiovascular Disease.,"IEEE transactions on molecular, biological, and multi-scale communications",IEEE Trans Mol Biol Multiscale Commun,2025,11,1,eng,citation,Bae Y,Tumenbayar BI;Pham K;Biber JC;Drewes R;Bae Y,Tumenbayar BI;Pham K;Biber JC;Drewes R;Bae Y,10.1109/tmbmc.2024.3501576,40881607,78,90,bioRxiv. 2024 Sep 16:2024.09.11.612437. doi: 10.1101/2024.09.11.612437.,,,,,,0,PUBMED,"Tumenbayar B, 2025, IEEE TRANS MOL BIOL MULTISCALE COMMUN","Tumenbayar B, 2025, IEEE TRANS MOL BIOL MULTISCALE COMMUN" +40881194,Application of machine learning techniques to profile smoking behavior of adolescent girls in Ghana.,Gates open research,Gates Open Res,2024,8,,eng,citation,Smith J,Flanagan SV;Vargas A;Smith J,Flanagan SV;Vargas A;Smith J,10.12688/gatesopenres.14991.2,40881194,2,,,,,,,,0,PUBMED,"Flanagan S, 2024, GATES OPEN RES","Flanagan S, 2024, GATES OPEN RES" +40873697,Registration by Regression (RbR): a framework for interpretable and flexible atlas registration.,"Biomedical image registration, ... proceedings. WBIR (Workshop : 2006- )",Biomed Image Regist Proc,2024,15249,,eng,citation,Iglesias JE,Gopinath K;Hu X;Hoffmann M;Puonti O;Iglesias JE,Gopinath K;Hu X;Hoffmann M;Puonti O;Iglesias JE,10.1007/978-3-031-73480-9_16,40873697,205,215,,,,,,,0,PUBMED,"Gopinath K, 2024, BIOMED IMAGE REGIST PROC","Gopinath K, 2024, BIOMED IMAGE REGIST PROC" +40857441,Ranking and Combining Latent Structured Predictive Scores without Labeled Data.,IISE transactions,IISE Trans,2024,,,eng,citation,Lin Y,Afshar S;Chen Y;Han S;Lin Y,Afshar S;Chen Y;Han S;Lin Y,10.1080/24725854.2024.2417258,40857441,,,,,,,,,0,PUBMED,"Afshar S, 2024, IISE TRANS","Afshar S, 2024, IISE TRANS" +40860259,Intelligence-led policing in the 21(st) Century: How increased mobility requires new paradigms of information sharing.,The police journal,Police J,2025,98,3,eng,citation,Kirby S,Phythian R;Kirby S,Phythian R;Kirby S,10.1177/0032258X241309479,40860259,601,617,,,,,,,0,PUBMED,"Phythian R, 2025, POLICE J","Phythian R, 2025, POLICE J" +40843036,Identifying the dynamics of interacting objects with applications to scene understanding and video temporal manipulation.,IFAC-PapersOnLine,IFAC Pap OnLine,2024,58,15,eng,citation,Sznaier M,Comas A;Fernandez C;Ghimire S;Li H;Camps O;Sznaier M,Comas A;Fernandez C;Ghimire S;Li H;Camps O;Sznaier M,10.1016/j.ifacol.2024.08.545,40843036,301,306,,,,,,,0,PUBMED,"Comas A, 2024, IFAC PAP ONLINE","Comas A, 2024, IFAC PAP ONLINE" +40841021,Machine learning unveils the impact of anthropogenic emission changes on urban PM(2.5) and O(3): A case study in Wuhu.,Journal of environmental sciences (China),J Environ Sci (China),2025,158,,eng,citation,Wu T,Xu H;Ruan Z;Fang H;Jia Q;Li F;Li J;Ye M;Wu T,Xu H;Ruan Z;Fang H;Jia Q;Li F;Li J;Ye M;Wu T,10.1016/j.jes.2024.10.028,40841021,395,404,,,,,,,0,PUBMED,"Xu H, 2025, J ENVIRON SCI (CHINA)","Xu H, 2025, J ENVIRON SCI (CHINA)" +40840996,An hourly and localized optimization method for soil fugitive dust emission inventory based on machine learning.,Journal of environmental sciences (China),J Environ Sci (China),2025,158,,eng,citation,Feng Y,Song L;Li Z;Zhang J;Li H;Wang C;Bi X;Dai Q;Feng Y,Song L;Li Z;Zhang J;Li H;Wang C;Bi X;Dai Q;Feng Y,10.1016/j.jes.2024.12.016,40840996,1,12,,,,,,,0,PUBMED,"Song L, 2025, J ENVIRON SCI (CHINA)","Song L, 2025, J ENVIRON SCI (CHINA)" +40838098,Artificial Intelligence and Machine Learning for Materials.,Current opinion in solid state & materials science,Curr Opin Solid State Mater Sci,2025,34,,eng,citation,Zheng Y,Zheng Y,Zheng Y,10.1016/j.cossms.2024.101202,40838098,,,,,,,,,0,PUBMED,"Zheng Y, 2025, CURR OPIN SOLID STATE MATER SCI","Zheng Y, 2025, CURR OPIN SOLID STATE MATER SCI" +40837245,Nested deep transfer learning for modeling of multilayer thin films.,Advanced photonics,Adv Photonics,2024,6,5,eng,citation,Zheng Y,Unni R;Yao K;Zheng Y,Unni R;Yao K;Zheng Y,10.1117/1.ap.6.5.056006,40837245,,,,,,,,,0,PUBMED,"Unni R, 2024, ADV PHOTONICS","Unni R, 2024, ADV PHOTONICS" +40837055,Wearable Single-Electrode Capacitive Sensor with Large Penetration Depth for Intelligent Deep Tissue and Hemorrhage Monitoring.,Advanced sensor research,Adv Sens Res,2025,4,2,eng,citation,Chung JH,Cheng YJ;Kim S;White N;Wang X;Ringgold K;Neidig L;Kwon Y;Chung JH,Cheng YJ;Kim S;White N;Wang X;Ringgold K;Neidig L;Kwon Y;Chung JH,10.1002/adsr.202400143,40837055,,,,,,,,,0,PUBMED,"Cheng Y, 2025, ADV SENS RES","Cheng Y, 2025, ADV SENS RES" +40837108,Preeclampsia prediction via machine learning: a systematic literature review.,"Health systems (Basingstoke, England)",Health Syst (Basingstoke),2025,14,3,eng,citation,Peker S,Özcan M;Peker S,Özcan M;Peker S,10.1080/20476965.2024.2435845,40837108,208,222,,,,,,,0,PUBMED,"Özcan M, 2025, HEALTH SYST (BASINGSTOKE)","Özcan M, 2025, HEALTH SYST (BASINGSTOKE)" +40832452,Use of Predictive Models to Determine Transplant Eligibility.,Current transplantation reports,Curr Transplant Rep,2024,11,4,eng,citation,McElroy LM,Berchuck SI;Bhavsar N;Schappe T;Zaribafzadeh H;Matsouaka R;McElroy LM,Berchuck SI;Bhavsar N;Schappe T;Zaribafzadeh H;Matsouaka R;McElroy LM,10.1007/s40472-024-00454-4,40832452,243,250,,,,,,,0,PUBMED,"Berchuck S, 2024, CURR TRANSPLANT REP","Berchuck S, 2024, CURR TRANSPLANT REP" +40823261,The integration of machine learning into traditional Chinese medicine.,Journal of pharmaceutical analysis,J Pharm Anal,2025,15,8,eng,citation,Xie T,Hong Y;Zhu S;Liu Y;Tian C;Xu H;Chen G;Tao L;Xie T,Hong Y;Zhu S;Liu Y;Tian C;Xu H;Chen G;Tao L;Xie T,10.1016/j.jpha.2024.101157,40823261,101157,,,,,,,,0,PUBMED,"Hong Y, 2025, J PHARM ANAL","Hong Y, 2025, J PHARM ANAL" +40822446,A Principled Framework to Assess the Information-Theoretic Fitness of Brain Functional Sub-Circuits.,"Mathematics (Basel, Switzerland)",Mathematics (Basel),2024,12,19,eng,citation,Goñi J,Duong-Tran D;Nguyen N;Mu S;Chen J;Bao J;Xu FH;Garai S;Cadena-Pico J;Kaplan AD;Chen T;Zhao Y;Shen L;Goñi J,Duong-Tran D;Nguyen N;Mu S;Chen J;Bao J;Xu FH;Garai S;Cadena-Pico J;Kaplan AD;Chen T;Zhao Y;Shen L;Goñi J,10.3390/math12192967,40822446,,,ArXiv. 2024 Jul 23:arXiv:2406.18531v2.,,,,,,0,PUBMED,"Duong-Tran D, 2024, MATHEMATICS (BASEL)","Duong-Tran D, 2024, MATHEMATICS (BASEL)" +40809147,The rise of scientific machine learning: a perspective on combining mechanistic modelling with machine learning for systems biology.,Frontiers in systems biology,Front Syst Biol,2024,4,,eng,citation,Smith RW,Noordijk B;Garcia Gomez ML;Ten Tusscher KHWJ;de Ridder D;van Dijk ADJ;Smith RW,Noordijk B;Garcia Gomez ML;Ten Tusscher KHWJ;de Ridder D;van Dijk ADJ;Smith RW,10.3389/fsysb.2024.1407994,40809147,1407994,,,,,,,,0,PUBMED,"Noordijk B, 2024, FRONT SYST BIOL","Noordijk B, 2024, FRONT SYST BIOL" +40809131,Building virtual patients using simulation-based inference.,Frontiers in systems biology,Front Syst Biol,2024,4,,eng,citation,Rehberg M,Paul N;Karamitsou V;Giegerich C;Sadeghi A;Lücke M;Wagenhuber B;Kister A;Rehberg M,Paul N;Karamitsou V;Giegerich C;Sadeghi A;Lücke M;Wagenhuber B;Kister A;Rehberg M,10.3389/fsysb.2024.1444912,40809131,1444912,,,,,,,,0,PUBMED,"Paul N, 2024, FRONT SYST BIOL","Paul N, 2024, FRONT SYST BIOL" +40800522,Challenges in multi-task learning for fMRI-based diagnosis: Benefits for psychiatric conditions and CNVs would likely require thousands of patients.,"Imaging neuroscience (Cambridge, Mass.)",Imaging Neurosci (Camb),2024,2,,eng,citation,Bellec P,Harvey A;Moreau CA;Kumar K;Huguet G;Urchs SGW;Sharmarke H;Jizi K;Martin CO;Younis N;Tamer P;Martineau JL;Orban P;Silva AI;Hall J;van den Bree MBM;Owen MJ;Linden DEJ;Lippé S;Bearden CE;Dumas G;Jacquemont S;Bellec P,Harvey A;Moreau CA;Kumar K;Huguet G;Urchs SGW;Sharmarke H;Jizi K;Martin CO;Younis N;Tamer P;Martineau JL;Orban P;Silva AI;Hall J;van den Bree MBM;Owen MJ;Linden DEJ;Lippé S;Bearden CE;Dumas G;Jacquemont S;Bellec P,10.1162/imag_a_00222,40800522,,,,,,,,,0,PUBMED,"Harvey A, 2024, IMAGING NEUROSCI (CAMB)","Harvey A, 2024, IMAGING NEUROSCI (CAMB)" +40800405,A fronto-insular network underlies individual variations in anger expression and control.,"Imaging neuroscience (Cambridge, Mass.)",Imaging Neurosci (Camb),2024,2,,eng,citation,Messina I,Grecucci A;Graci F;Munari E;Yi X;Salvato G;Messina I,Grecucci A;Graci F;Munari E;Yi X;Salvato G;Messina I,10.1162/imag_a_00348,40800405,,,,,,,,,0,PUBMED,"Grecucci A, 2024, IMAGING NEUROSCI (CAMB)","Grecucci A, 2024, IMAGING NEUROSCI (CAMB)" +40800368,Assessing the consistency and sensitivity of the neural correlates of narrative stimuli using functional near-infrared spectroscopy.,"Imaging neuroscience (Cambridge, Mass.)",Imaging Neurosci (Camb),2024,2,,eng,citation,Owen AM,Kolisnyk M;Novi S;Abdalmalak A;Ardakani RM;Kazazian K;Laforge G;Debicki DB;Owen AM,Kolisnyk M;Novi S;Abdalmalak A;Ardakani RM;Kazazian K;Laforge G;Debicki DB;Owen AM,10.1162/imag_a_00331,40800368,,,,,,,,,0,PUBMED,"Kolisnyk M, 2024, IMAGING NEUROSCI (CAMB)","Kolisnyk M, 2024, IMAGING NEUROSCI (CAMB)" +40800355,BrainQCNet: A Deep Learning attention-based model for the automated detection of artifacts in brain structural MRI scans.,"Imaging neuroscience (Cambridge, Mass.)",Imaging Neurosci (Camb),2024,2,,eng,citation,Kelly C,Garcia M;Dosenbach N;Kelly C,Garcia M;Dosenbach N;Kelly C,10.1162/imag_a_00300,40800355,,,,,,,,,0,PUBMED,"Garcia M, 2024, IMAGING NEUROSCI (CAMB)","Garcia M, 2024, IMAGING NEUROSCI (CAMB)" +40800326,Cross-modal decoding of emotional expressions in fMRI-Cross-session and cross-sample replication.,"Imaging neuroscience (Cambridge, Mass.)",Imaging Neurosci (Camb),2024,2,,eng,citation,Mier D,Wallenwein LA;Schmidt SNL;Hass J;Mier D,Wallenwein LA;Schmidt SNL;Hass J;Mier D,10.1162/imag_a_00289,40800326,,,,,,,,,0,PUBMED,"Wallenwein L, 2024, IMAGING NEUROSCI (CAMB)","Wallenwein L, 2024, IMAGING NEUROSCI (CAMB)" +40800257,Translating phenotypic prediction models from big to small anatomical MRI data using meta-matching.,"Imaging neuroscience (Cambridge, Mass.)",Imaging Neurosci (Camb),2024,2,,eng,citation,Yeo BTT,Wulan N;An L;Zhang C;Kong R;Chen P;Bzdok D;Eickhoff SB;Holmes AJ;Yeo BTT,Wulan N;An L;Zhang C;Kong R;Chen P;Bzdok D;Eickhoff SB;Holmes AJ;Yeo BTT,10.1162/imag_a_00251,40800257,,,bioRxiv. 2024 Jan 02:2023.12.31.573801. doi: 10.1101/2023.12.31.573801.,,,,,,0,PUBMED,"Wulan N, 2024, IMAGING NEUROSCI (CAMB)","Wulan N, 2024, IMAGING NEUROSCI (CAMB)" +40787210,Robust causal inference for point exposures with missing confounders.,The Canadian journal of statistics = Revue canadienne de statistique,Can J Stat,2025,53,2,eng,citation,Haneuse S,Levis AW;Mukherjee R;Wang R;Haneuse S,Levis AW;Mukherjee R;Wang R;Haneuse S,10.1002/cjs.11832,40787210,,,,,,,,,0,PUBMED,"Levis A, 2025, CAN J STAT","Levis A, 2025, CAN J STAT" +40786642,An ontological framework for organising and describing behaviours: The Human Behaviour Ontology.,Wellcome open research,Wellcome Open Res,2024,9,,eng,citation,Michie S,Schenk PM;West R;Castro O;Hayes E;Hastings J;Johnston M;Marques MM;Corker E;Wright AJ;Stuart G;Zhang L;Santilli M;Michie S,Schenk PM;West R;Castro O;Hayes E;Hastings J;Johnston M;Marques MM;Corker E;Wright AJ;Stuart G;Zhang L;Santilli M;Michie S,10.12688/wellcomeopenres.21252.2,40786642,237,,,,,,,,0,PUBMED,"Schenk P, 2024, WELLCOME OPEN RES","Schenk P, 2024, WELLCOME OPEN RES" +40786641,Systems Policy Analysis for Antimicrobial Resistance Targeted Action (SPAARTA): A Research Protocol.,Wellcome open research,Wellcome Open Res,2024,9,,eng,citation,Atun R,Ahmad R;Zhu N;Jain R;Joshi J;Mpundu M;Gutierrez PA;Holmes A;Weyde T;Atun R,Ahmad R;Zhu N;Jain R;Joshi J;Mpundu M;Gutierrez PA;Holmes A;Weyde T;Atun R,10.12688/wellcomeopenres.22923.2,40786641,700,,,,,,,,0,PUBMED,"Ahmad R, 2024, WELLCOME OPEN RES","Ahmad R, 2024, WELLCOME OPEN RES" +40786616,FaceFinder: A machine learning tool for identification of facial images from heterogenous datasets.,AJO international,AJO Int,2024,1,4,eng,citation,Tran AQ,Nahass GR;Peterson JC;Heinze K;Choudhary A;Khandwala N;Purnell CA;Setabutr P;Tran AQ,Nahass GR;Peterson JC;Heinze K;Choudhary A;Khandwala N;Purnell CA;Setabutr P;Tran AQ,10.1016/j.ajoint.2024.100083,40786616,,,,,,,,,0,PUBMED,"Nahass G, 2024, AJO INT","Nahass G, 2024, AJO INT" +40786095,Deep learning neural network development for the classification of bacteriocin sequences produced by lactic acid bacteria.,F1000Research,F1000Res,2024,13,,eng,citation,Cruz-Varela J,González LL;Arias-Serrano I;Villalba-Meneses F;Navas-Boada P;Cruz-Varela J,González LL;Arias-Serrano I;Villalba-Meneses F;Navas-Boada P;Cruz-Varela J,10.12688/f1000research.154432.2,40786095,981,,,,,,,,0,PUBMED,"González L, 2024, F1000RES","González L, 2024, F1000RES" +40783914,[Immunological mechanism of non-obstructive azoospermia: An exploration based on bioinformatics and machine learning].,Zhonghua nan ke xue = National journal of andrology,Zhonghua Nan Ke Xue,2024,30,12,chi,citation,Yan QX,Huang SQ;Li ZH;Tan CY;Chen MQ;Yuan XJ;Chen WR;Yang LY;Feng XN;Chen CR;Yan QX,Huang SQ;Li ZH;Tan CY;Chen MQ;Yuan XJ;Chen WR;Yang LY;Feng XN;Chen CR;Yan QX,,40783914,1059,1067,,,,,,,0,PUBMED,"Huang S, 2024, ZHONGHUA NAN KE XUE","Huang S, 2024, ZHONGHUA NAN KE XUE" +40779305,Ethical Challenges to the Adoption of AI in Healthcare: A Review.,The New bioethics : a multidisciplinary journal of biotechnology and the body,New Bioeth,2024,30,4,eng,citation,Pruski M,Pruski M,Pruski M,10.1080/20502877.2025.2541438,40779305,251,267,,,,,,,0,PUBMED,"Pruski M, 2024, NEW BIOETH","Pruski M, 2024, NEW BIOETH" +40778194,Integration of Nanoengineering with Artificial Intelligence and Machine Learning in Surface-Enhanced Raman Spectroscopy (SERS) for the Development of Advanced Biosensing Platforms.,Advanced sensor research,Adv Sens Res,2025,4,2,eng,citation,Dellinger K,Ebrahimi F;Kumari A;Dellinger K,Ebrahimi F;Kumari A;Dellinger K,10.1002/adsr.202400155,40778194,,,,,,,,,0,PUBMED,"Ebrahimi F, 2025, ADV SENS RES","Ebrahimi F, 2025, ADV SENS RES" +40777999,"Ultra low-power, wearable, accelerated shallow-learning fall detection for elderly at-risk persons.","Smart health (Amsterdam, Netherlands)",Smart Health (Amst),2024,33,,eng,citation,Paolini C,Tian J;Mercier P;Paolini C,Tian J;Mercier P;Paolini C,10.1016/j.smhl.2024.100498,40777999,,,,,,,,,0,PUBMED,"Tian J, 2024, SMART HEALTH (AMST)","Tian J, 2024, SMART HEALTH (AMST)" +40771879,Evaluating Machine Learning for Predicting Youth Suicidal Behavior Up to 1 Year After Contact With Mental-Health Specialty Care.,Clinical psychological science : a journal of the Association for Psychological Science,Clin Psychol Sci,2025,13,3,eng,citation,D'Onofrio BM,O'Reilly LM;Fazel S;Rickert ME;Kuja-Halkola R;Cederlof M;Hellner C;Larsson H;Lichtenstein P;D'Onofrio BM,O'Reilly LM;Fazel S;Rickert ME;Kuja-Halkola R;Cederlof M;Hellner C;Larsson H;Lichtenstein P;D'Onofrio BM,10.1177/21677026241301298,40771879,614,631,,,,,,,0,PUBMED,"O'Reilly L, 2025, CLIN PSYCHOL SCI","O'Reilly L, 2025, CLIN PSYCHOL SCI" +40771425,DeepQR: single-molecule QR codes for optical gene-expression analysis.,"Nanophotonics (Berlin, Germany)",Nanophotonics,2025,14,15,eng,citation,Ebenstein Y,Jeffet J;Hadad B;Froim S;Kaboub K;Rabinowitz KM;Deek J;Margalit S;Dotan I;Bahabad A;Ebenstein Y,Jeffet J;Hadad B;Froim S;Kaboub K;Rabinowitz KM;Deek J;Margalit S;Dotan I;Bahabad A;Ebenstein Y,10.1515/nanoph-2024-0236,40771425,2549,2561,,,,,,,0,PUBMED,"Jeffet J, 2025, NANOPHOTONICS","Jeffet J, 2025, NANOPHOTONICS" +40766768,[Pulmonary Digital Twins].,Open respiratory archives,Open Respir Arch,2024,6,Suppl 2,spa,citation,Eguzkitza B,Fernández-Tena A;Arnedo C;Houzeaux G;Eguzkitza B,Fernández-Tena A;Arnedo C;Houzeaux G;Eguzkitza B,10.1016/j.opresp.2024.100394,40766768,100394,,,,,,,,0,PUBMED,"Fernández-Tena A, 2024, OPEN RESPIR ARCH","Fernández-Tena A, 2024, OPEN RESPIR ARCH" +40766175,"Estimating Higher-Order Mixed Memberships via the ℓ (2,∞) Tensor Perturbation Bound.",Journal of the American Statistical Association,J Am Stat Assoc,2025,120,,eng,citation,Zhang AR,Agterberg J;Zhang AR,Agterberg J;Zhang AR,10.1080/01621459.2024.2404265,40766175,1214,1224,,,,,,,0,PUBMED,"Agterberg J, 2025, J AM STAT ASSOC","Agterberg J, 2025, J AM STAT ASSOC" +40765626,Meta-Learning for Fast Adaptation in Intent Inferral on a Robotic Hand Orthosis for Stroke.,Proceedings of the ... IEEE/RSJ International Conference on Intelligent Robots and Systems. IEEE/RSJ International Conference on Intelligent Robots and Systems,Rep U S,2024,2024,,eng,citation,Ciocarlie M,Leandro La Rotta P;Xu J;Chen A;Winterbottom L;Chen W;Nilsen D;Stein J;Ciocarlie M,Leandro La Rotta P;Xu J;Chen A;Winterbottom L;Chen W;Nilsen D;Stein J;Ciocarlie M,10.1109/iros58592.2024.10801596,40765626,4693,4700,,,,,,,0,PUBMED,"Leandro La Rotta P, 2024, REP U S","Leandro La Rotta P, 2024, REP U S" +40761553,SRBench++ : principled benchmarking of symbolic regression with domain-expert interpretation.,IEEE transactions on evolutionary computation : a publication of the IEEE Neural Networks Council,IEEE Trans Evol Comput,2025,29,4,eng,citation,La Cava WG,de Franca FO;Virgolin M;Kommenda M;Majumder MS;Cranmer M;Espada G;Ingelse L;Fonseca A;Landajuela M;Petersen B;Glatt R;Mundhenk N;Lee CS;Hochhalter JD;Randall DL;Kamienny P;Zhang H;Dick G;Simon A;Burlacu B;Kasak J;Machado M;Wilstrup C;La Cava WG,de Franca FO;Virgolin M;Kommenda M;Majumder MS;Cranmer M;Espada G;Ingelse L;Fonseca A;Landajuela M;Petersen B;Glatt R;Mundhenk N;Lee CS;Hochhalter JD;Randall DL;Kamienny P;Zhang H;Dick G;Simon A;Burlacu B;Kasak J;Machado M;Wilstrup C;La Cava WG,10.1109/tevc.2024.3423681,40761553,1127,1134,,,,,,,0,PUBMED,"de Franca F, 2025, IEEE TRANS EVOL COMPUT","de Franca F, 2025, IEEE TRANS EVOL COMPUT" +40757725,Risk Factors for Acute Kidney Injury in Patients Undergoing Total Joint Arthroplasty.,Reports (MDPI),Reports (MDPI),2024,7,4,eng,citation,Hayriye Kocaoglu M,Kılıc HN;Cakar Turhan KS;Karadag Erkoc S;Hayriye Kocaoglu M,Kılıc HN;Cakar Turhan KS;Karadag Erkoc S;Hayriye Kocaoglu M,10.3390/reports7040088,40757725,,,,,,,,,0,PUBMED,"Kılıc H, 2024, REPORTS (MDPI)","Kılıc H, 2024, REPORTS (MDPI)" +40757598,Hip Muscle Strength Ratios Predicting Groin Injury in Male Soccer Players Using Machine Learning and Multivariate Analysis-A Prospective Cohort Study.,"Muscles (Basel, Switzerland)",Muscles,2024,3,3,eng,citation,Kellis E,Kekelekis A;Musa RM;Nikolaidis PT;Clemente FM;Kellis E,Kekelekis A;Musa RM;Nikolaidis PT;Clemente FM;Kellis E,10.3390/muscles3030026,40757598,297,309,,,,,,,0,PUBMED,"Kekelekis A, 2024, MUSCLES","Kekelekis A, 2024, MUSCLES" +40757183,Weakly-Supervised Transfer Learning with Application in Precision Medicine.,IEEE transactions on automation science and engineering : a publication of the IEEE Robotics and Automation Society,IEEE Trans Autom Sci Eng,2024,21,4,eng,citation,Li J,Mao L;Wang L;Hu LS;Eschbacher JM;Leon G;Singleton KW;Curtin LA;Urcuyo J;Sereduk C;Tran NL;Hawkins-Daarud A;Swanson KR;Li J,Mao L;Wang L;Hu LS;Eschbacher JM;Leon G;Singleton KW;Curtin LA;Urcuyo J;Sereduk C;Tran NL;Hawkins-Daarud A;Swanson KR;Li J,10.1109/tase.2023.3323773,40757183,6250,6264,,,,,,,0,PUBMED,"Mao L, 2024, IEEE TRANS AUTOM SCI ENG","Mao L, 2024, IEEE TRANS AUTOM SCI ENG" +40757142,Preparing Wearable Data for AI-Powered Mood and Compliance Prediction in HCT Patients and Caregivers.,Proceedings : ... IEEE International Conference on Big Data. IEEE International Conference on Big Data,Proc IEEE Int Conf Big Data,2024,2024,,eng,citation,Choi SW,Ziegenbein CB Jr;Ortiz BL;Gupta V;Choi SW,Ziegenbein CB Jr;Ortiz BL;Gupta V;Choi SW,10.1109/bigdata62323.2024.10825132,40757142,4996,5005,,,,,,,0,PUBMED,"Jr Z, 2024, PROC IEEE INT CONF BIG DATA","Jr Z, 2024, PROC IEEE INT CONF BIG DATA" +40756674,"mRNA medicine: Recent progresses in chemical modification, design, and engineering.",Nano research,Nano Res,2024,17,10,eng,citation,Xiao Y,Hou X;Shi J;Xiao Y,Hou X;Shi J;Xiao Y,10.1007/s12274-024-6978-6,40756674,9015,9030,,,,,,,0,PUBMED,"Hou X, 2024, NANO RES","Hou X, 2024, NANO RES" +40756565,Int-HRL: towards intention-based hierarchical reinforcement learning.,Neural computing & applications,Neural Comput Appl,2025,37,23,eng,citation,Bulling A,Penzkofer A;Schaefer S;Strohm F;Bâce M;Leutenegger S;Bulling A,Penzkofer A;Schaefer S;Strohm F;Bâce M;Leutenegger S;Bulling A,10.1007/s00521-024-10596-2,40756565,18823,18834,,,,,,,0,PUBMED,"Penzkofer A, 2025, NEURAL COMPUT APPL","Penzkofer A, 2025, NEURAL COMPUT APPL" +40740289,Leveraging long short term memory in air pollution prediction in Nairobi.,International journal of statistics and applied mathematics,Int J Stat Appl Math,2024,9,5 Pt B,eng,citation,Mwaniki JI,Masinde AW;Mwaniki PM;Mwaniki JI,Masinde AW;Mwaniki PM;Mwaniki JI,10.22271/maths.2024.v9.i5b.1856,40740289,160,164,,,,,,,0,PUBMED,"Masinde A, 2024, INT J STAT APPL MATH","Masinde A, 2024, INT J STAT APPL MATH" +40735056,Machine learning assisted identification of antibiotic-resistant Staphylococcus aureus strains using a paper-based ratiometric sensor array.,Microchemical journal : devoted to the application of microtechniques in all branches of science,Microchem J,2024,206,,eng,citation,Mohs AM,Laliwala A;Gupta R;Svechkarev D;Bayles KW;Sadykov MR;Mohs AM,Laliwala A;Gupta R;Svechkarev D;Bayles KW;Sadykov MR;Mohs AM,10.1016/j.microc.2024.111395,40735056,,,,,,,,,0,PUBMED,"Laliwala A, 2024, MICROCHEM J","Laliwala A, 2024, MICROCHEM J" +40726957,SuperResNET: Model-Free Single-Molecule Network Analysis Software Achieves Molecular Resolution of Nup96.,"Advanced intelligent systems (Weinheim an der Bergstrasse, Germany)",Adv Intell Syst,2025,7,3,eng,citation,Nabi IR,Li YL;Khater IM;Hallgrimson C;Cardoen B;Wong TH;Hamarneh G;Nabi IR,Li YL;Khater IM;Hallgrimson C;Cardoen B;Wong TH;Hamarneh G;Nabi IR,10.1002/aisy.202400521,40726957,2400521,,,,,,,,0,PUBMED,"Li Y, 2025, ADV INTELL SYST","Li Y, 2025, ADV INTELL SYST" +40703983,"Halted medical education in Korea amid Nobel Prizes in deep learning and machine learning research, tribute to a leader of Ewha Womans University College of Medicine, and highlights from this issue.",Ewha medical journal,Ewha Med J,2024,47,4,eng,citation,Huh S,Huh S,Huh S,10.12771/emj.2024.e71,40703983,e71,,,,,,,,0,PUBMED,"Huh S, 2024, EWHA MED J","Huh S, 2024, EWHA MED J" +40697699,Chatbot in anatomy learning and recapitulation: Prototype and review.,"Medical journal, Armed Forces India",Med J Armed Forces India,2025,81,4,eng,citation,Waghray A,Waghray NJ;Rajasundaram A;Johnson WMS;Waghray A,Waghray NJ;Rajasundaram A;Johnson WMS;Waghray A,10.1016/j.mjafi.2023.10.009,40697699,386,390,,,,,,,0,PUBMED,"Waghray N, 2025, MED J ARMED FORCES INDIA","Waghray N, 2025, MED J ARMED FORCES INDIA" +40693693,Doubly robust machine learning-based estimation methods for instrumental variables with an application to surgical care for cholecystitis.,"Journal of the Royal Statistical Society. Series A, (Statistics in Society)",J R Stat Soc Ser A Stat Soc,2024,,,eng,citation,Keele L,Takatsu K;Levis AW;Kennedy E;Kelz R;Keele L,Takatsu K;Levis AW;Kennedy E;Kelz R;Keele L,10.1093/jrsssa/qnae089,40693693,,,,,,,,,0,PUBMED,"Takatsu K, 2024, J R STAT SOC SER A STAT SOC","Takatsu K, 2024, J R STAT SOC SER A STAT SOC" +40692702,"SenSet, a novel human lung senescence cell gene signature, identifies cell-specific senescence mechanisms.",bioRxiv : the preprint server for biology,bioRxiv,2024,,,eng,citation,Königshoff M,Hasanaj E;Beaulieu D;Wang C;Hu Q;Bueno M;Sembrat JC;Pineda RH;Melo-Narvaez MC;Cardenes N;Yanwu Z;Yingze Z;Lafyatis R;Morris A;Mora A;Rojas M;Li D;Rahman I;Pryhuber GS;Lehmann M;Alder J;Gurkar A;Finkel T;Ma Q;Póczos B;Bar-Joseph Z;Eickelberg O;Königshoff M,Hasanaj E;Beaulieu D;Wang C;Hu Q;Bueno M;Sembrat JC;Pineda RH;Melo-Narvaez MC;Cardenes N;Yanwu Z;Yingze Z;Lafyatis R;Morris A;Mora A;Rojas M;Li D;Rahman I;Pryhuber GS;Lehmann M;Alder J;Gurkar A;Finkel T;Ma Q;Póczos B;Bar-Joseph Z;Eickelberg O;Königshoff M,10.1101/2024.12.21.629928,40692702,,,,,,,,,0,PUBMED,"Hasanaj E, 2024, BIORXIV","Hasanaj E, 2024, BIORXIV" +40657056,Developing a Vital Signal Detection Electrode for Fabric Substrate Using a High-Performance Conductive Carbon-Based Ink.,IEEE open journal of engineering in medicine and biology,IEEE Open J Eng Med Biol,2025,6,,eng,citation,Thiwawong T,Chansaengsri K;Tunhoo B;Onlaor K;Thiwawong T,Chansaengsri K;Tunhoo B;Onlaor K;Thiwawong T,10.1109/OJEMB.2024.3431030,40657056,390,399,,,,,,,0,PUBMED,"Chansaengsri K, 2025, IEEE OPEN J ENG MED BIOL","Chansaengsri K, 2025, IEEE OPEN J ENG MED BIOL" +40656101,ADHD detection based on human action recognition.,Neuroscience applied,Neurosci Appl,2024,3,,eng,citation,Naqvi SM,Li Y;Nair R;Naqvi SM,Li Y;Nair R;Naqvi SM,10.1016/j.nsa.2024.104093,40656101,104093,,,,,,,,0,PUBMED,"Li Y, 2024, NEUROSCI APPL","Li Y, 2024, NEUROSCI APPL" +40655912,Characterizing research domain criteria symptoms among psychiatric inpatients using large language models.,Journal of mood and anxiety disorders,J Mood Anxiety Disord,2024,8,,eng,citation,Perlis RH,McCoy TH;Perlis RH,McCoy TH;Perlis RH,10.1016/j.xjmad.2024.100079,40655912,100079,,,,,,,,0,PUBMED,"McCoy T, 2024, J MOOD ANXIETY DISORD","McCoy T, 2024, J MOOD ANXIETY DISORD" +40655911,Predicting OCD severity from religiosity and personality: A machine learning and neural network approach.,Journal of mood and anxiety disorders,J Mood Anxiety Disord,2024,8,,eng,citation,Muller GN,Zaboski BA;Wilens A;McNamara JPH;Muller GN,Zaboski BA;Wilens A;McNamara JPH;Muller GN,10.1016/j.xjmad.2024.100089,40655911,100089,,,,,,,,0,PUBMED,"Zaboski B, 2024, J MOOD ANXIETY DISORD","Zaboski B, 2024, J MOOD ANXIETY DISORD" +40654593,Towards collaborative data science in mental health research: The ECNP neuroimaging network accessible data repository.,Neuroscience applied,Neurosci Appl,2025,4,,eng,citation,Koutsouleris N,Khuntia A;Buciuman MO;Fanning J;Stolicyn A;Vetter C;Armio RL;From T;Goffi F;Hahn L;Kaufmann T;Laurikainen H;Maggioni E;Martinez-Zalacain I;Ruef A;Dong MS;Schwarz E;Squarcina L;Andreassen O;Bellani M;Brambilla P;Haren NV;Hietala J;Lawrie SM;Soriano-Mas C;Whalley H;Taquet M;Meisenzahl E;Falkai P;Wiegand A;Koutsouleris N;ECNP Neuroimaging Network,Khuntia A;Buciuman MO;Fanning J;Stolicyn A;Vetter C;Armio RL;From T;Goffi F;Hahn L;Kaufmann T;Laurikainen H;Maggioni E;Martinez-Zalacain I;Ruef A;Dong MS;Schwarz E;Squarcina L;Andreassen O;Bellani M;Brambilla P;Haren NV;Hietala J;Lawrie SM;Soriano-Mas C;Whalley H;Taquet M;Meisenzahl E;Falkai P;Wiegand A;Koutsouleris N;ECNP Neuroimaging Network,10.1016/j.nsa.2024.105407,40654593,105407,,,,,,,,0,PUBMED,"Khuntia A, 2025, NEUROSCI APPL","Khuntia A, 2025, NEUROSCI APPL" +40642109,Progress and challenges for the application of machine learning for neglected tropical diseases.,F1000Research,F1000Res,2023,12,,eng,citation,Mohd-Assaad N,Khew C;Akbar R;Mohd-Assaad N,Khew C;Akbar R;Mohd-Assaad N,10.12688/f1000research.129064.3,40642109,287,,,,,,,,0,PUBMED,"Khew C, 2023, F1000RES","Khew C, 2023, F1000RES" +40642055,Impact of artificial intelligence and digital technology-based diagnostic tools for communicable and non-communicable diseases in Africa.,African journal of laboratory medicine,Afr J Lab Med,2024,13,1,eng,citation,Mkolo NM,Obi CL;Olowoyo JO;Malevu TD;Mugivhisa LL;Hungwe T;Ogunrombi MO;Mkolo NM,Obi CL;Olowoyo JO;Malevu TD;Mugivhisa LL;Hungwe T;Ogunrombi MO;Mkolo NM,10.4102/ajlm.v13i1.2516,40642055,2516,,,,,,,,0,PUBMED,"Obi C, 2024, AFR J LAB MED","Obi C, 2024, AFR J LAB MED" +40636784,Detection of metabolic signatures implicated in the progression from hepatitis to cirrhosis to hepatocellular carcinoma.,iLIVER,ILIVER,2025,4,1,eng,citation,Wang R,Yu S;Wang S;Li J;Zheng H;Li P;Rong W;Jing J;He T;Sun Y;Wang L;Zhu Z;Ding X;Wang R,Yu S;Wang S;Li J;Zheng H;Li P;Rong W;Jing J;He T;Sun Y;Wang L;Zhu Z;Ding X;Wang R,10.1016/j.iliver.2024.100142,40636784,100142,,,,,,,,0,PUBMED,"Yu S, 2025, ILIVER","Yu S, 2025, ILIVER" +40635874,ACOUSTIC PARAMETER COMBINATIONS UNDERLYING MAPPING OF AUDITORY PSEUDOWORD SOUNDS TO MULTIPLE DOMAINS OF MEANING: A MACHINE LEARNING APPROACH.,bioRxiv : the preprint server for biology,bioRxiv,2024,,,eng,citation,Sathian K,Kumar GV;Lacey S;Dorsi J;Nygaard LC;Sathian K,Kumar GV;Lacey S;Dorsi J;Nygaard LC;Sathian K,10.1101/2024.09.27.615393,40635874,,,J Acoust Soc Am. 2025 Dec 1158(6):4243-4267. doi: 10.1121/10.0041768.,,,,,,0,PUBMED,"Kumar G, 2024, BIORXIV","Kumar G, 2024, BIORXIV" +40635857,"WGCNA combined with machine learning to explore potential biomarkers and treatment strategies for acute liver failure, with experimental validation.",iLIVER,ILIVER,2024,3,4,eng,citation,Ye G,Wu X;Zheng X;Ye G,Wu X;Zheng X;Ye G,10.1016/j.iliver.2024.100133,40635857,100133,,,,,,,,0,PUBMED,"Wu X, 2024, ILIVER","Wu X, 2024, ILIVER" +40625561,Unveiling the unexpected sinking and embedding dynamics of surface supported Mo/S clusters on 2D MoS(2) with active machine learning.,Smart molecules : open access,Smart Mol,2025,3,1,eng,citation,Zhao J,Zhao L;Ren Y;Shi X;Liu H;Yu Z;Gao J;Zhao J,Zhao L;Ren Y;Shi X;Liu H;Yu Z;Gao J;Zhao J,10.1002/smo.20240018,40625561,e20240018,,,,,,,,0,PUBMED,"Zhao L, 2025, SMART MOL","Zhao L, 2025, SMART MOL" +40620613,Artificial Intelligence and Postpartum Hemorrhage.,"Maternal-fetal medicine (Wolters Kluwer Health, Inc.)",Matern Fetal Med,2025,7,1,eng,citation,Collins SL,Mathewlynn SJ;Soltaninejad M;Collins SL,Mathewlynn SJ;Soltaninejad M;Collins SL,10.1097/FM9.0000000000000257,40620613,22,28,,,,,,,0,PUBMED,"Mathewlynn S, 2025, MATERN FETAL MED","Mathewlynn S, 2025, MATERN FETAL MED" +40604228,Rapid 3D imaging at cellular resolution for digital cytopathology with a multi-camera array scanner (MCAS).,Npj imaging,Npj Imaging,2024,2,1,eng,citation,Horstmeyer R,Kim K;Chaware A;Cook CB;Xu S;Abdelmalak M;Cooke C;Zhou KC;Harfouche M;Reamey P;Saliu V;Doman J;Dugo C;Horstmeyer G;Davis R;Taylor-Cho I;Foo WC;Kreiss L;Jiang XS;Horstmeyer R,Kim K;Chaware A;Cook CB;Xu S;Abdelmalak M;Cooke C;Zhou KC;Harfouche M;Reamey P;Saliu V;Doman J;Dugo C;Horstmeyer G;Davis R;Taylor-Cho I;Foo WC;Kreiss L;Jiang XS;Horstmeyer R,10.1038/s44303-024-00042-2,40604228,39,,,,,,,,0,PUBMED,"Kim K, 2024, NPJ IMAGING","Kim K, 2024, NPJ IMAGING" +40604097,Advanced ultrasound methods to improve chronic kidney disease diagnosis.,Npj imaging,Npj Imaging,2024,2,1,eng,citation,Kiessling F,Fleig S;Magnuska ZA;Koczera P;Salewski J;Djudjaj S;Schmitz G;Kiessling F,Fleig S;Magnuska ZA;Koczera P;Salewski J;Djudjaj S;Schmitz G;Kiessling F,10.1038/s44303-024-00023-5,40604097,22,,,,,,,,0,PUBMED,"Fleig S, 2024, NPJ IMAGING","Fleig S, 2024, NPJ IMAGING" +40603523,Focal cortical dysplasia (type II) detection with multi-modal MRI and a deep-learning framework.,Npj imaging,Npj Imaging,2024,2,1,eng,citation,Barma S,Shankar A;Saikia MJ;Dandapat S;Barma S,Shankar A;Saikia MJ;Dandapat S;Barma S,10.1038/s44303-024-00031-5,40603523,31,,,,,,,,0,PUBMED,"Shankar A, 2024, NPJ IMAGING","Shankar A, 2024, NPJ IMAGING" +40602875,Characterization and recognition of three-dimensional excitation-emission matrix spectra of wastewater from six typical categories.,Journal of environmental sciences (China),J Environ Sci (China),2025,157,,eng,citation,Chen L,Kuang L;Liu R;Jin M;Lan Y;Su Y;Zhao Y;Chen L,Kuang L;Liu R;Jin M;Lan Y;Su Y;Zhao Y;Chen L,10.1016/j.jes.2024.04.026,40602875,206,219,,,,,,,0,PUBMED,"Kuang L, 2025, J ENVIRON SCI (CHINA)","Kuang L, 2025, J ENVIRON SCI (CHINA)" +40585881,Current scenario of machine learning applications to hydrothermal liquefaction via bibliometric analysis.,F1000Research,F1000Res,2024,13,,eng,citation,Tippayawong N,Katongtung T;Sukpancharoen S;Sinthupinyo S;Tippayawong N,Katongtung T;Sukpancharoen S;Sinthupinyo S;Tippayawong N,10.12688/f1000research.156514.3,40585881,1131,,,,,,,,0,PUBMED,"Katongtung T, 2024, F1000RES","Katongtung T, 2024, F1000RES" +40574796,From Biased Selective Labels to Pseudo-Labels: An Expectation-Maximization Framework for Learning from Biased Decisions.,Proceedings of machine learning research,Proc Mach Learn Res,2024,235,,eng,citation,Wiens J,Chang T;Wiens J,Chang T;Wiens J,,40574796,6286,6324,,,,,,,0,PUBMED,"Chang T, 2024, PROC MACH LEARN RES","Chang T, 2024, PROC MACH LEARN RES" +40568571,Comprehensive examination of resting state fMRI connectomics yields new insights into brain function deficits in Gulf War illness after accounting for heterogeneity in brain impairment across the ill veteran population.,Neuroimage. Reports,Neuroimage Rep,2024,4,3,eng,citation,Gopinath KS,Yang G;Haley RW;Guo Y;Gopinath KS,Yang G;Haley RW;Guo Y;Gopinath KS,10.1016/j.ynirp.2024.100209,40568571,100209,,,,,,,,0,PUBMED,"Yang G, 2024, NEUROIMAGE REP","Yang G, 2024, NEUROIMAGE REP" +40568361,Quantitative 3D reconstruction of viral vector distribution in rodent and ovine brain following local delivery.,Neuroimage. Reports,Neuroimage Rep,2024,4,4,eng,citation,Kiselyov AS,Poceviciute R;Mitchell K;Nikolakopoulou AM;Cho SK;Ma X;Chen P;Figueroa S;Sarmiento EJ;Singh A;Hartstein O;Loudon WG;Cros F;Kiselyov AS,Poceviciute R;Mitchell K;Nikolakopoulou AM;Cho SK;Ma X;Chen P;Figueroa S;Sarmiento EJ;Singh A;Hartstein O;Loudon WG;Cros F;Kiselyov AS,10.1016/j.ynirp.2024.100218,40568361,100218,,,,,,,,0,PUBMED,"Poceviciute R, 2024, NEUROIMAGE REP","Poceviciute R, 2024, NEUROIMAGE REP" +40552869,Real-Life Physical Activity in Community-Dwelling Older Adults over a Year with Changes in COVID-19 Restrictions in Norway.,Gerontology,Gerontology,2025,71,2,eng,citation,Helbostad JL,Nygård AJ;Taraldsen K;Lie M;Skelton DA;Salvesen ØO;Helbostad JL,Nygård AJ;Taraldsen K;Lie M;Skelton DA;Salvesen ØO;Helbostad JL,10.1159/000541490,40552869,131,142,,,,,,,0,PUBMED,"Nygård A, 2025, GERONTOLOGY","Nygård A, 2025, GERONTOLOGY" +40552318,Innovating beyond electrophysiology through multimodal neural interfaces.,Nature reviews electrical engineering,Nat Rev Electr Eng,2025,2,1,eng,citation,Kuzum D,Ramezani M;Ren Y;Cubukcu E;Kuzum D,Ramezani M;Ren Y;Cubukcu E;Kuzum D,10.1038/s44287-024-00121-x,40552318,42,57,,,,,,,0,PUBMED,"Ramezani M, 2025, NAT REV ELECTR ENG","Ramezani M, 2025, NAT REV ELECTR ENG" +40542698,Emerging Uses of Artificial Intelligence in Chronic Dermatologic Disease: A Scoping Review.,Journal of cutaneous medicine and surgery,J Cutan Med Surg,2025,29,3,eng,citation,Mukovozov I,Hollman D;Doktorchik C;Mukovozov I,Hollman D;Doktorchik C;Mukovozov I,10.1177/12034754241308237,40542698,274,281,,,,,,,0,PUBMED,"Hollman D, 2025, J CUTAN MED SURG","Hollman D, 2025, J CUTAN MED SURG" +40539117,Testing a Large Number of Composite Null Hypotheses Using Conditionally Symmetric Multidimensional Gaussian Mixtures in Genome-Wide Studies.,Journal of the American Statistical Association,J Am Stat Assoc,2025,120,550,eng,citation,Lin X,Sun R;McCaw ZR;Lin X,Sun R;McCaw ZR;Lin X,10.1080/01621459.2024.2422124,40539117,605,617,,,,,,,0,PUBMED,"Sun R, 2025, J AM STAT ASSOC","Sun R, 2025, J AM STAT ASSOC" diff --git a/test_pubmed_200.xlsx b/test_pubmed_200.xlsx new file mode 100644 index 000000000..1f2e89983 Binary files /dev/null and b/test_pubmed_200.xlsx differ diff --git a/www/services/__init__.py b/www/services/__init__.py index 28584e105..238f3eb80 100644 --- a/www/services/__init__.py +++ b/www/services/__init__.py @@ -14,4 +14,7 @@ from .tabletag import * from .termextraction import * from .thematicmap import * -from .utils import * \ No newline at end of file +from .utils import * +from .api_retriever import retrieve +from .standardizer import standardize +from .validator import validate \ No newline at end of file diff --git a/www/services/api_retriever.py b/www/services/api_retriever.py new file mode 100644 index 000000000..37a8ec65c --- /dev/null +++ b/www/services/api_retriever.py @@ -0,0 +1,139 @@ +import time +import requests + +def fetch_page(url: str, params: dict, retries: int = 3): + """ + Sends a single HTTP GET request to the given URL with the given params. + Retries up to 3 times if the request fails or returns a 429 error. + Returns the JSON response as a dictionary, or None if all retries fail. + """ + for attempt in range(retries): + response = requests.get(url, params=params) + + if response.status_code == 200: + return response.json() + + elif response.status_code == 429: + print(f"Rate limited. Waiting before retry {attempt + 1}...") + time.sleep(2) + + else: + print(f"Error {response.status_code}. Retrying...") + time.sleep(1) + + return None + + +def fetch_openalex(query: str, total_wanted: int = 100, per_page: int = 25) -> list: + """ + Fetches multiple pages of results from the OpenAlex API. + Loops through pages until the desired number of results is reached. + Returns a list of raw paper dictionaries. + """ + url = "https://api.openalex.org/works" + all_results = [] + page = 1 + + while len(all_results) < total_wanted: + params = { + "search": query, + "per-page": per_page, + "page": page + } + data = fetch_page(url, params) + if data is None: + print("Failed to fetch page. Stopping.") + break + all_results.extend(data["results"]) + page += 1 + time.sleep(0.5) + + return all_results[:total_wanted] + + +def fetch_pubmed_ids(query: str, total_wanted: int = 100, mindate: str = None, maxdate: str = None) -> list: + """ + Searches PubMed for a query and returns a list of PubMed IDs (PMIDs). + PubMed requires two steps: first get IDs, then fetch paper details. + If mindate/maxdate are provided, restricts the search to that + publication-date range (format: "YYYY"), so results are spread + across multiple years instead of defaulting to the most recent ones. + Returns a list of PMID strings. + """ + url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" + params = { + "db": "pubmed", + "term": query, + "retmax": total_wanted, + "retmode": "json" + } + if mindate and maxdate: + params["datetype"] = "pdat" + params["mindate"] = mindate + params["maxdate"] = maxdate + + data = fetch_page(url, params) + if data is None: + return [] + return data["esearchresult"]["idlist"] + + +def fetch_pubmed(query: str, total_wanted: int = 100, mindate: str = None, maxdate: str = None) -> list: + """ + Fetches paper details from PubMed for a given query. + First retrieves PMIDs via fetch_pubmed_ids(), then fetches + paper summaries in batches of 20. + If mindate/maxdate are provided (format: "YYYY"), restricts results + to that publication-date range. + Returns a list of raw paper dictionaries. + """ + ids = fetch_pubmed_ids(query=query, total_wanted=total_wanted, mindate=mindate, maxdate=maxdate) + if not ids: + print("No PubMed IDs found. Stopping.") + return [] + + url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" + all_results = [] + batch_size = 20 + + for i in range(0, len(ids), batch_size): + batch = ids[i:i + batch_size] + params = { + "db": "pubmed", + "id": ",".join(batch), + "retmode": "json" + } + data = fetch_page(url, params) + if data is None: + print("Failed to fetch batch. Skipping.") + continue + + for pmid in batch: + if pmid in data["result"]: + all_results.append(data["result"][pmid]) + + time.sleep(0.5) + + return all_results[:total_wanted] + + +def retrieve(query: str, platform: str = "openalex", total: int = 100, mindate: str = None, maxdate: str = None) -> list: + """ + Main entry point for the API retriever. + Takes a search query and platform selection from the user. + Returns a list of raw paper dictionaries ready for standardizer.py. + + mindate/maxdate (format: "YYYY") are currently only applied to the + "pubmed" platform, to spread results across a publication-year + range instead of defaulting to the most recent ones. + + Supported platforms: "openalex", "pubmed" + """ + if platform == "openalex": + return fetch_openalex(query=query, total_wanted=total) + + elif platform == "pubmed": + return fetch_pubmed(query=query, total_wanted=total, mindate=mindate, maxdate=maxdate) + + else: + raise ValueError(f"Unsupported platform: {platform}. Choose 'openalex' or 'pubmed'.") \ No newline at end of file diff --git a/www/services/biblionetwork.py b/www/services/biblionetwork.py index 7e65b4880..e403b076a 100644 --- a/www/services/biblionetwork.py +++ b/www/services/biblionetwork.py @@ -2,111 +2,304 @@ from .cocmatrix import * -def biblionetwork(M, analysis="coupling", network="authors", n=None, sep=";", short=False, shortlabel=True, remove_terms=None, synonyms=None): - +def biblionetwork( + M, + analysis="coupling", + network="authors", + n=None, + sep=";", + short=False, + shortlabel=True, + remove_terms=None, + synonyms=None +): + def crossprod(A, B): - return A.T @ B # Moltiplicazione matriciale per ottenere il prodotto incrociato + return A.T @ B NetMatrix = None + # SAFETY CHECK + if M is None: + print("Input object is None") + return None + + # ---------------- COUPLING ---------------- # + if analysis == "coupling": + # PATCH: CR-based coupling is not viable for OpenAlex or PubMed because + # CR contains raw URLs (OpenAlex) or empty lists (PubMed) instead of + # formatted WoS reference strings. Each URL is unique so cocMatrix builds + # a massive sparse matrix that crashes with OOM. Skip early. + db_name = "" + if "DB" in M.columns and not M["DB"].empty: + db_name = str(M["DB"].iloc[0]).lower() + + if network == "authors": + WA = cocMatrix(M, Field="AU", type="sparse", n=n, sep=sep, short=short) WCR = cocMatrix(M, Field="CR", type="sparse", n=n, sep=sep, short=short) + + if WA is None or WCR is None: + return None + CRA = crossprod(WCR, WA) NetMatrix = crossprod(CRA, CRA) + elif network == "references": - WCR = cocMatrix(M, Field="CR", type="sparse", n=n, sep=sep, short=short).T + + WCR = cocMatrix(M, Field="CR", type="sparse", n=n, sep=sep, short=short) + + if WCR is None: + return None + + WCR = WCR.T NetMatrix = crossprod(WCR, WCR) + elif network == "sources": + WSO = cocMatrix(M, Field="SO", type="sparse", n=n, sep=sep, short=short) WCR = cocMatrix(M, Field="CR", type="sparse", n=n, sep=sep, short=short) + + if WSO is None or WCR is None: + return None + CRSO = crossprod(WCR, WSO) NetMatrix = crossprod(CRSO, CRSO) + elif network == "countries": + WCO = cocMatrix(M, Field="AU_CO", type="sparse", n=n, sep=sep, short=short) WCR = cocMatrix(M, Field="CR", type="sparse", n=n, sep=sep, short=short) + + if WCO is None or WCR is None: + return None + CRCO = crossprod(WCR, WCO) NetMatrix = crossprod(CRCO, CRCO) + # ---------------- CO-OCCURRENCES ---------------- # + elif analysis == "co-occurrences": + if network == "authors": + WA = cocMatrix(M, Field="AU", type="sparse", n=n, sep=sep, short=short) + elif network == "keywords": - WA = cocMatrix(M, Field="ID", type="sparse", n=n, sep=sep, short=short, remove_terms=remove_terms, synonyms=synonyms) + + WA = cocMatrix( + M, + Field="ID", + type="sparse", + n=n, + sep=sep, + short=short, + remove_terms=remove_terms, + synonyms=synonyms + ) + elif network == "author_keywords": - WA = cocMatrix(M, Field="DE", type="sparse", n=n, sep=sep, short=short, remove_terms=remove_terms, synonyms=synonyms) + + WA = cocMatrix( + M, + Field="DE", + type="sparse", + n=n, + sep=sep, + short=short, + remove_terms=remove_terms, + synonyms=synonyms + ) + elif network == "titles": - WA = cocMatrix(M, Field="TI_TM", type="sparse", n=n, sep=sep, short=short, remove_terms=remove_terms, synonyms=synonyms) + + WA = cocMatrix( + M, + Field="TI_TM", + type="sparse", + n=n, + sep=sep, + short=short, + remove_terms=remove_terms, + synonyms=synonyms + ) + elif network == "abstracts": - WA = cocMatrix(M, Field="AB_TM", type="sparse", n=n, sep=sep, short=short, remove_terms=remove_terms, synonyms=synonyms) + + WA = cocMatrix( + M, + Field="AB_TM", + type="sparse", + n=n, + sep=sep, + short=short, + remove_terms=remove_terms, + synonyms=synonyms + ) + elif network == "sources": + WA = cocMatrix(M, Field="SO", type="sparse", n=n, sep=sep, short=short) + + else: + print("Invalid co-occurrence network") + return None + + if WA is None: + return None + NetMatrix = crossprod(WA, WA) + # ---------------- CO-CITATION ---------------- # + elif analysis == "co-citation": + if network == "authors": + WA = cocMatrix(M, Field="CR_AU", type="sparse", n=n, sep=sep, short=short) + elif network == "references": + WA = cocMatrix(M, Field="CR", type="sparse", n=n, sep=sep, short=short) + elif network == "sources": + WA = cocMatrix(M, Field="CR_SO", type="sparse", n=n, sep=sep, short=short) + + else: + print("Invalid co-citation network") + return None + + if WA is None: + return None + NetMatrix = crossprod(WA, WA) + # ---------------- COLLABORATION ---------------- # + elif analysis == "collaboration": + if network == "authors": + WA = cocMatrix(M, Field="AU", type="sparse", n=n, sep=sep, short=short) + elif network == "universities": + WA = cocMatrix(M, Field="AU_UN", type="sparse", n=n, sep=sep, short=short) + elif network == "countries": + WA = cocMatrix(M, Field="AU_CO", type="sparse", n=n, sep=sep, short=short) + + else: + print("Invalid collaboration network") + return None + + if WA is None: + return None + NetMatrix = crossprod(WA, WA) - # Verifica che NetMatrix non sia None prima di procedere + # ---------------- FINAL CLEANUP ---------------- # + if NetMatrix is not None: - NetMatrix = pd.DataFrame(NetMatrix) # Converti in DataFrame se necessario - # Eliminazione delle colonne e righe vuote - filtered_columns = [col for col in NetMatrix.columns if str(col).strip()] - filtered_index = [idx for idx in NetMatrix.index if str(idx).strip()] + NetMatrix = pd.DataFrame(NetMatrix) + + filtered_columns = [ + col for col in NetMatrix.columns + if str(col).strip() + ] + + filtered_index = [ + idx for idx in NetMatrix.index + if str(idx).strip() + ] + NetMatrix = NetMatrix.loc[filtered_index, filtered_columns] - M = M.get() # Estrai il dizionario se M è un oggetto + # PATCH: M may already be a plain DataFrame after term_extraction + M = M.get() if hasattr(M, 'get') and callable(M.get) and not isinstance(M, pd.DataFrame) else M + + # SAFETY CHECK + if M is None or M.empty: + return NetMatrix + + # SAFE DB HANDLING + # PATCH: default changed from hardcoded "web_of_science" to "" so that + # unknown sources don't silently get treated as WoS. + db_name = "" + + if "DB" in M.columns and not M["DB"].empty: + db_name = str(M["DB"].iloc[0]).lower() - db_name = M["DB"].iloc[0] print(f"db_name: {db_name}") - if network == "references" and db_name == "SCOPUS": - ind = [i for i, col in enumerate(NetMatrix.columns) if str(col)[0].isalpha()] + + # PATCH: the Scopus-specific reference filter now checks for "scopus" + # (lowercase) to match the normalized db_name above. + if network == "references" and db_name == "scopus": + + ind = [ + i for i, col in enumerate(NetMatrix.columns) + if str(col) and str(col)[0].isalpha() + ] + NetMatrix = NetMatrix.iloc[ind, ind] + # PATCH: shortlabel now supported for openalex and pubmed. + # Both use the same label format as WoS (author + year), + # so they are routed to the same branch in label_short(). if network == "references" and shortlabel: - LABEL = label_short(NetMatrix, db=db_name.lower()) + + LABEL = label_short(NetMatrix, db=db_name) LABEL = remove_duplicated_labels(LABEL) - NetMatrix.columns = NetMatrix.index = LABEL + + NetMatrix.columns = LABEL + NetMatrix.index = LABEL return NetMatrix -def label_short(NET, db="isi"): +def label_short(NET, db=""): + LABEL = pd.Series(NET.columns) + YEAR = LABEL.str.extract(r'(\d{4})')[0].fillna("") - if db == "web_of_science": + # PATCH: added "openalex" and "pubmed" to the WoS branch since both + # sources produce SR strings in the same "Author, Year, Journal" format. + if db in ("web_of_science", "openalex", "pubmed"): + AU = LABEL.str.split(" ").str[:2].str.join(" ") LABEL = AU + " " + YEAR + elif db == "scopus": + AU = LABEL.str.split(". ").str[0] LABEL = AU + ". " + YEAR + + # For unknown sources, return labels unchanged to avoid crashes. + return LABEL.tolist() def remove_duplicated_labels(LABEL): + LABEL = pd.Series(LABEL) + counts = LABEL.value_counts() + duplicates = counts[counts > 1].index for dup in duplicates: + dup_indices = LABEL[LABEL == dup].index - LABEL.iloc[dup_indices] = [f"{dup}-{i+1}" for i in range(len(dup_indices))] + + LABEL.iloc[dup_indices] = [ + f"{dup}-{i+1}" + for i in range(len(dup_indices)) + ] return LABEL.tolist() diff --git a/www/services/cocmatrix.py b/www/services/cocmatrix.py index f523aed67..343c4d3c5 100644 --- a/www/services/cocmatrix.py +++ b/www/services/cocmatrix.py @@ -1,64 +1,134 @@ from .utils import * -def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True, short=False, remove_terms=None, synonyms=None): +def cocMatrix( + df, + Field="AU", + type="sparse", + n=None, + sep=";", + binary=True, + short=False, + remove_terms=None, + synonyms=None +): """ Computes occurrences between elements of a Tag Field from a bibliographic data frame. - - Args: - M: A DataFrame obtained by the converting function. It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file. - Field: A string indicating one of the field tags of the standard ISI WoS Field Tag codify. - type: Indicates the output format of co-occurrences ("matrix" or "sparse"). - n: An integer indicating the number of items to select. If None, all items are selected. - sep: The field separator character. - binary: A boolean. If True each cell contains a 0/1. If False each cell contains the frequency. - short: A boolean. If True all items with frequency < 2 are deleted to reduce the matrix size. - remove_terms: A list of additional terms to delete from the documents before term extraction. - synonyms: A list of synonyms that will be merged into a single term. - - Returns: - A bipartite network matrix with cases corresponding to manuscripts and variables to the objects extracted from the Tag Field. """ - M = df.get() + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # SAFETY CHECK + if M is None or M.empty: + print("Input dataframe is empty") + return None + + # SAFETY CHECK FOR SR if "LABEL" not in M.columns: + if "SR" not in M.columns: + print("SR column missing") + return None + M.index = M["SR"] print("Processing field: " + Field + "\n") + RowNames = M.index # REMOVE TERMS AND MERGE SYNONYMS if Field in ["ID", "DE", "TI", "TI_TM", "AB", "AB_TM"]: - Fi = M[Field].fillna("").apply(lambda x: x if isinstance(x, list) else [i.strip() for i in x.split(sep)]) - TERMS = pd.DataFrame({"item": [item.upper() for sublist in Fi for item in sublist], "SR": M.index.repeat(Fi.str.len())}) + + if Field not in M.columns: + print(f"{Field} column missing") + return None + + Fi = M[Field].fillna("").apply( + lambda x: x if isinstance(x, list) + else [i.strip() for i in str(x).split(sep)] + ) + + TERMS = pd.DataFrame({ + "item": [ + item.upper() + for sublist in Fi + for item in sublist + ], + "SR": M.index.repeat(Fi.str.len()) + }) # Merge synonyms if synonyms: - synonyms_dict = {syn.split(";")[0].strip().upper(): [s.strip().upper() for s in syn.split(";")[1:]] for syn in synonyms} + synonyms_dict = { + syn.split(";")[0].strip().upper(): + [s.strip().upper() for s in syn.split(";")[1:]] + for syn in synonyms + } + for key, values in synonyms_dict.items(): TERMS["item"] = TERMS["item"].replace(values, key) # Remove terms if remove_terms: - TERMS = TERMS[~TERMS["item"].str.upper().isin([term.strip().upper() for term in remove_terms])] + TERMS = TERMS[ + ~TERMS["item"].str.upper().isin( + [term.strip().upper() for term in remove_terms] + ) + ] + + TERMS = TERMS.groupby("SR")["item"].apply( + lambda x: ";".join(x) + ).reset_index() + + M = ( + M.drop(columns=[Field, "SR"], errors="ignore") + .merge(TERMS, on="SR", how="left") + .rename(columns={"item": Field}) + ) - TERMS = TERMS.groupby("SR")["item"].apply(lambda x: ";".join(x)).reset_index() - M = M.drop(columns=[Field, 'SR']).merge(TERMS, on="SR", how="left").rename(columns={"item": Field}) M.index = RowNames + # SAFETY CHECK FOR CR if Field == "CR": - M["CR"] = M["CR"].apply(lambda x: [ref.replace("DOI;", "DOI ") for ref in x] if isinstance(x, list) else x) + if "CR" not in M.columns: + print("CR column missing") + return None + + M["CR"] = M["CR"].apply( + lambda x: [ + ref.replace("DOI;", "DOI ") + for ref in x + ] if isinstance(x, list) else x + ) + + # FIELD EXISTENCE CHECK if Field in M.columns: - Fi = M[Field].fillna("").apply(lambda x: x if isinstance(x, list) else [i.strip() for i in x.split(sep)]) + + Fi = M[Field].fillna("").apply( + lambda x: x if isinstance(x, list) + else [i.strip() for i in str(x).split(sep)] + ) + else: print(f"Field {Field} is not a column name of input data frame") - return + return None - Fi = Fi.apply(lambda x: [i.strip() for i in x]) # Equivalent to trim.leading in R - if Field == "CR": - Fi = Fi.apply(lambda x: [i for i in x if len(i) > 10]) # Delete not congruent references + Fi = Fi.apply(lambda x: [i.strip() for i in x]) - allField = [item for sublist in Fi for item in sublist if item] + # DELETE INVALID REFERENCES + if Field == "CR": + Fi = Fi.apply( + lambda x: [i for i in x if len(i) > 10] + ) + + allField = [ + item + for sublist in Fi + for item in sublist + if item + ] + + # REDUCE REFERENCES if Field == "CR": allField = reduceRefs(allField) Fi = Fi.apply(reduceRefs) @@ -68,6 +138,7 @@ def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True, short if n: uniqueField = uniqueField[:n] + elif short: uniqueField = tabField[tabField > 1].index.tolist() @@ -75,36 +146,63 @@ def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True, short print("Matrix is empty!!") return None + # MATRIX CREATION if type == "matrix" or not binary: WF = np.zeros((M.shape[0], len(uniqueField))) + elif type == "sparse": WF = lil_matrix((M.shape[0], len(uniqueField))) + else: print("Error in type argument") - return + return None - col_idx = {term: idx for idx, term in enumerate(uniqueField)} - row_idx = {sr: idx for idx, sr in enumerate(M.index)} + col_idx = { + term: idx + for idx, term in enumerate(uniqueField) + } + row_idx = { + sr: idx + for idx, sr in enumerate(M.index) + } + + # BUILD MATRIX for i, terms in Fi.items(): + if terms: + if binary: - indices = [col_idx[term] for term in set(terms) if term in col_idx] + + indices = [ + col_idx[term] + for term in set(terms) + if term in col_idx + ] + WF[row_idx[i], indices] = 1 + else: + term_counts = pd.Series(terms).value_counts() + for term, count in term_counts.items(): + if term in col_idx: WF[row_idx[i], col_idx[term]] = count if type == "sparse" and not binary: WF = lil_matrix(WF) - # Convert the sparse matrix to a DataFrame for better readability - WF_df = pd.DataFrame(WF.toarray(), index=M.index, columns=uniqueField) + # CONVERT TO DATAFRAME + WF_df = pd.DataFrame( + WF.toarray(), + index=M.index, + columns=uniqueField + ) + if binary: - WF_df = WF_df.astype(int) # Ensure binary values are 0 and 1 - # print(WF_df) + WF_df = WF_df.astype(int) return WF_df @@ -112,25 +210,27 @@ def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True, short def reduceRefs(refs): """ Remove everything after "V" followed by a digit and "DOI " from references. - - Args: - refs: A list of references to reduce. - - Returns: - A list of reduced references. """ + reduced_refs = [] + for ref in refs: - # Remove everything after "V" followed by a digit + + if not isinstance(ref, str): + continue + + # Remove everything after V followed by digit v_match = re.search(r"V\d", ref) + if v_match: ref = ref[:v_match.start()] - - # Remove everything after "DOI " + + # Remove everything after DOI doi_match = re.search(r"DOI ", ref) + if doi_match: ref = ref[:doi_match.start()] - + reduced_refs.append(ref.strip()) return reduced_refs diff --git a/www/services/couplingmap.py b/www/services/couplingmap.py index a2b3628d7..451a0f6ff 100644 --- a/www/services/couplingmap.py +++ b/www/services/couplingmap.py @@ -15,59 +15,64 @@ def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5, print('\nanalysis argument is incorrect.\n\nPlease select one of the following choices: "documents", "authors", "sources"\n\n') return None - df = metaTagExtraction(df, "SR") # serve questo per avere il merging perfetto per uniformare la colonna SR - M = df.get() + _df_check = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if 'SR' not in _df_check.columns or _df_check['SR'].eq('').all(): + df = metaTagExtraction(df, "SR") + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df ngrams = int(ngrams) minfreq = max(0, int(minfreq * len(M) // 1000)) Net = network(df, analysis=analysis, field=field, stemming=stemming, n=n, community_repulsion=community_repulsion, cluster=clustering) + + # PATCH: network() returns None when the matrix is empty (e.g. OpenAlex URL-based references) + if Net is None: + print("Network is empty — cannot build coupling map.") + return None + net = Net['graph'] NCS = normalizeCitationScore(df, field=analysis, impact_measure=impact_measure) + # PATCH: normalizeCitationScore may return None if localCitations fails + if NCS is None: + print("NCS is None — cannot build coupling map.") + return None + if impact_measure == "global": NCS['MNLCS'] = NCS['MNGCS'] NCS['LC'] = NCS['TC'] - # Converte la prima colonna di NCS in maiuscolo NCS.iloc[:, 0] = NCS.iloc[:, 0].str.upper() - # Label dei nodi del grafo label = pd.Series(net.vs['name']) - # Creazione del DataFrame L per il merge con NCS L = pd.DataFrame({'id': label.str.upper()}) - L.columns = [analysis] # Rinominare la colonna per corrispondere a `analysis` + L.columns = [analysis] - # Garantire che i tipi di dato e il formato siano compatibili NCS[analysis] = NCS[analysis].astype(str).str.upper() L[analysis] = L[analysis].astype(str).str.upper() - # Merge tra L e NCS (simile a left_join in R) + D = L.merge(NCS, left_on=analysis, right_on=analysis, how='left', copy=True) - # Get vertex names and create initial dataframes label = pd.Series(net.vs['name']) - # First merge with NCS L = pd.DataFrame({'id': label.str.upper()}) L.columns = [analysis] D = L.merge(NCS, on=analysis, how='left', copy=True) - # Second merge with cluster results L = pd.DataFrame({'id': label.str.lower()}) L.columns = [analysis] Net['cluster_res'] = Net['cluster_res'].rename(columns={'vertex': analysis}) + Net['cluster_res'][analysis] = Net['cluster_res'][analysis].astype(str).str.lower() + L[analysis] = L[analysis].astype(str).str.lower() C = L.merge(Net['cluster_res'], on=analysis, how='left', copy=True) - # Get group membership and colors group = Net['cluster_obj'].membership color = net.vs['color'] - # Convert colors to hex and handle NaN values color = [to_hex(c) if pd.notna(c) else "#D3D3D3" for c in color] - # color[pd.isna(color)] = "#B3B3B3" # Colore grigio chiaro in formato RGBA D['group'] = group D['color'] = color @@ -75,10 +80,8 @@ def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5, DC = pd.concat([D, C.iloc[:, 1:]], axis=1) DC['name'] = DC.iloc[:, 0] - # Resetta l'indice per evitare ambiguità DC = DC.reset_index(drop=True) - # Raggruppa senza ambiguità df_lab = DC.groupby('group', as_index=False).apply(lambda x: x.assign( MNLCS2=x['MNLCS'].where(x['MNLCS'] >= 1), MNLCS=round(x['MNLCS'], 2), @@ -103,6 +106,12 @@ def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5, meancentr = df['rcentrality'].mean() df = df[df['freq'] >= minfreq] + # PATCH: if df is empty after frequency filter, return None + + if df.empty: + print("No clusters passed the frequency filter.") + return None + df_lab = df_lab[df_lab['group'].isin(df['group'])] df_lab = df_lab.iloc[:, [0, 6, 14, 7, 3]] df_lab.columns = [analysis, "Cluster", "ClusterFrequency", "ClusterColor", "NormalizedLocalCitationScore"] @@ -113,57 +122,46 @@ def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5, if label_term is None: label_term = "null" - if label_term in ["DE", "ID", "TI", "AB"]: + db_val = M['DB'].iloc[0] if 'DB' in M.columns and not M.empty else '' + if label_term in ["DE", "ID", "TI", "AB"] and str(db_val).upper() not in ("OPENALEX", "PUBMED"): w = labeling(M, df_lab, term=label_term, n=n, n_labels=n_labels, analysis=analysis, ngrams=ngrams) df['label'] = w df['log_freq'] = np.log(df['freq']) df['adjusted_color'] = df['color'].apply(lambda x: adjust_color(x, alpha=0.5)) - ################## FIGURE ################## - # Calculate range for bubble sizes based on size parameter x_max = df['rcentrality'].max() x_range = np.ptp(df['rcentrality']) y_min = df['rimpact'].min() y_range = np.ptp(df['rimpact']) - # Calcola x e y (aggiungiamo +0.5 a entrambi gli estremi come in R) x1 = x_max - 0.02 - (x_range * 0.125) + 0.5 x2 = x_max - 0.02 + 0.5 y1 = y_min y2 = y_min + (y_range * 0.125) - # Format hover text for Plotly with proper line breaks - # We need to replace newlines with HTML breaks for the hover text in Plotly - - # Function to limit to first 10 items def limit_to_first(text): if pd.isna(text): return "" lines = text.split('\n') if len(lines) > 10: lines = lines[:10] - lines.append('...') # Add ellipsis to show there are more items + lines.append('...') return '\n'.join(lines) - # Apply the function to limit each entry to 20 items df['words'] = df['words'].apply(limit_to_first) - - # Replace newlines with HTML breaks for Plotly hover display df['words_daccapo'] = df['words'].str.replace('\n', '
') - # Ensure words column is properly formatted for hover display for i, row in df.iterrows(): if pd.isna(row['words_daccapo']): df.at[i, 'words_daccapo'] = "" - # Crea il grafico di base fig = px.scatter( df, x='rcentrality', y='rimpact', - size=df['log_freq'] * 15, # Multiply log_freq by 15 - color_discrete_sequence=df['adjusted_color'], # Use pre-adjusted colors + size=df['log_freq'] * 15, + color_discrete_sequence=df['adjusted_color'], hover_name='words_daccapo', labels={'rcentrality': 'Centrality', 'rimpact': 'Impact'}, ) @@ -175,49 +173,37 @@ def limit_to_first(text): margin=dict(l=0, r=0, t=0, b=0) ) - # Create custom hover template instead of using words_daccapo column fig.update_traces( hovertemplate='%{hovertext}', hovertext=[words.replace('\n', '
') for words in df['words']] ) - # Remove the words_daccapo column as it's no longer needed if 'words_daccapo' in df.columns: df = df.drop('words_daccapo', axis=1) - # Aggiungi linee orizzontali e verticali fig.add_hline(y=meandens, line_dash="dash", line_color="rgba(0,0,0,0.7)") fig.add_vline(x=meancentr, line_dash="dash", line_color="rgba(0,0,0,0.7)") - # Aggiorna le proprietà dei marker per replicare R min_size = 10 * (1 + size) max_size = 30 * (1 + size) - - # Calculate size reference for correct scaling sizeref = 2.0 * max(df['log_freq']) / (max_size**2) fig.update_traces( marker=dict( - color=df['adjusted_color'], # Use adjusted color with transparency + color=df['adjusted_color'], symbol='circle', sizemode='area', sizemin=min_size, - sizeref=sizeref, # Dynamic sizing based on log_freq range - line=dict(width=10) # Border for points + sizeref=sizeref, + line=dict(width=10) ) - ) - # Aggiunge le etichette se size > 0 if size > 0: - # Replace \n with
for Plotly and only show labels for freq > 1 labels = df['label'].where(df['freq'] > 1, '').str.lower().str.replace('\n', '
') text_size = 3 * (1 + size) - # Implementa repel se richiesto if repel: - # In Plotly non esiste un vero repel, ma possiamo aggiustare il posizionamento - # Per una simulazione migliore si potrebbe implementare un algoritmo di repulsione fig.add_trace(go.Scatter( x=df['rcentrality'], y=df['rimpact'], @@ -238,14 +224,12 @@ def limit_to_first(text): showlegend=False )) - # Calcola i limiti degli assi come in R rangex = max(meancentr - df['rcentrality'].min(), df['rcentrality'].max() - meancentr) rangey = max(meandens - df['rimpact'].min(), df['rimpact'].max() - meandens) xlimits = [meancentr - rangex - 0.5, meancentr + rangex + 0.5] ylimits = [meandens - rangey - 0.5, meandens + rangey + 0.5] - # Aggiorna il layout del grafico per match con il tema di R fig.update_layout( showlegend=False, plot_bgcolor='white', @@ -270,8 +254,8 @@ def limit_to_first(text): range=ylimits ), autosize=True, - width=None, # Let container control width - height=None, # Let container control height if needed + width=None, + height=None, ) g = fig @@ -305,29 +289,29 @@ def limit_to_first(text): return results -#### FUNCTION DA METTERE IN SERVICES??? -# Normalizzazione del punteggio di citazione def normalizeCitationScore(df, field="documents", impact_measure="local"): if field not in ["documents", "authors", "sources"]: print('\nfield argument is incorrect.\n\nPlease select one of the following choices: "documents", "authors", "sources"\n\n') return None - # Applica localCitations se richiesto if impact_measure == "local": - df = localCitations(df, fast_search=False, sep=";")['M'] + lc = localCitations(df, fast_search=False, sep=";") + # PATCH: localCitations may return None if histNetwork finds no citations + if lc is None: + return None + df = lc['M'] else: + # PATCH: df may be reactive here + df = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df df['LCS'] = 0 - # Converte colonne in numerico df['TC'] = df['TC'].astype(float, errors='ignore') df['PY'] = df['PY'].astype(float, errors='ignore') - # Rimpiazza LCS=0 con 1 e calcola NGCS/NLCS per anno df['LCS'] = df['LCS'].replace(0, 1) df['NGCS'] = df.groupby('PY')['TC'].transform(lambda x: x / x.mean(skipna=True)) df['NLCS'] = df.groupby('PY')['LCS'].transform(lambda x: x / x.mean(skipna=True)) - # Suddivisione per tipo di campo richiesto if field == "documents": NCS = df[['SR', 'PY', 'NGCS', 'NLCS', 'TC', 'LCS']].rename(columns={ 'NGCS': 'MNGCS', @@ -337,8 +321,12 @@ def normalizeCitationScore(df, field="documents", impact_measure="local"): }) elif field == "authors": - df['AU'] = df['AU'].fillna('').str.split(';') # Divide gli autori - exploded = df.explode('AU').assign(AU=lambda x: x['AU'].str.strip()) # Espande e rimuove spazi extra + df['AU'] = df['AU'].apply( + lambda x: x if isinstance(x, list) + else [i.strip() for i in str(x).split(';')] if pd.notna(x) and x != '' + else [] + ) + exploded = df.explode('AU').assign(AU=lambda x: x['AU'].str.strip()) NCS = ( exploded.groupby('AU').agg( @@ -365,7 +353,6 @@ def normalizeCitationScore(df, field="documents", impact_measure="local"): .rename(columns={'SO': 'sources'}) ) - # Gestione impatto globale if impact_measure == "global": NCS.drop(columns=['MNLCS', 'LC'], errors='ignore', inplace=True) else: @@ -374,51 +361,43 @@ def normalizeCitationScore(df, field="documents", impact_measure="local"): return NCS -# Network def network(df, analysis, field, stemming, n, cluster, community_repulsion): - NetMatrix = None # Inizializza la matrice della rete + NetMatrix = None + + # PATCH: extract plain DataFrame before passing to term_extraction or biblionetwork + df_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df if analysis == "documents": if field == "CR": - NetMatrix = biblionetwork(df, analysis="coupling", network="references", short=True, shortlabel=False, sep=";") + NetMatrix = biblionetwork(df_plain, analysis="coupling", network="references", short=True, shortlabel=False, sep=";") else: if field in ["TI", "AB"]: - df = term_extraction(df, field=field, verbose=False, stemming=stemming) - if field == "TI": - NetMatrix = biblionetwork(df, analysis="coupling", network="references", short=True, shortlabel=False, sep=";") - else: - NetMatrix = biblionetwork(df, analysis="coupling", network="references", short=True, shortlabel=False, sep=";") + df_plain = term_extraction(df_plain, field=field, verbose=False, stemming=stemming) + NetMatrix = biblionetwork(df_plain, analysis="coupling", network="references", short=True, shortlabel=False, sep=";") elif analysis == "authors": if field == "CR": - NetMatrix = biblionetwork(df, analysis="coupling", network="authors", short=True) + NetMatrix = biblionetwork(df_plain, analysis="coupling", network="authors", short=True) else: if field in ["TI", "AB"]: - df = term_extraction(df, field=field, verbose=False, stemming=stemming) - # NetMatrix = coupling(df, field, analysis="authors") + df_plain = term_extraction(df_plain, field=field, verbose=False, stemming=stemming) elif analysis == "sources": if field == "CR": - NetMatrix = biblionetwork(df, analysis="coupling", network="sources", short=True) + NetMatrix = biblionetwork(df_plain, analysis="coupling", network="sources", short=True) else: if field in ["TI", "AB"]: - df = term_extraction(df, field=field, verbose=False, stemming=stemming) - # NetMatrix = coupling(df, field, analysis="sources") + df_plain = term_extraction(df_plain, field=field, verbose=False, stemming=stemming) - # Controllo se la matrice è None (caso di errore o input non valido) if NetMatrix is None: print("\n\nNetwork matrix is empty or analysis type is incorrect!\nThe analysis cannot be performed\n\n") return None - - # Converti in DataFrame se non lo è già if not isinstance(NetMatrix, pd.DataFrame): NetMatrix = pd.DataFrame(NetMatrix) - # Rimuovi colonne e righe con nomi vuoti NetMatrix = NetMatrix.loc[:, NetMatrix.columns.str.strip() != ""].loc[NetMatrix.index.str.strip() != ""] - if NetMatrix.shape[0] > 0: Net = network_plot(NetMatrix, normalize="salton", n=n, Title=f"Coupling network of {analysis} using {field}", type="auto", @@ -433,18 +412,17 @@ def network(df, analysis, field, stemming, n, cluster, community_repulsion): def labeling(df, df_lab, term, n, n_labels, analysis, ngrams): - # Se il termine è TI o AB, estrai termini if term in ["TI", "AB"]: - df = term_extraction(reactive.Value(df), field=term, ngrams=ngrams, verbose=False) - df = df.get() + # PATCH: df is already a plain DataFrame here — no need to wrap in reactive + df = term_extraction(df, field=term, ngrams=ngrams, verbose=False) term = f"{term}_TM" - # Normalizzazione delle stringhe per evitare errori di merge df_lab = df_lab.apply(lambda x: x.astype(str).str.upper().str.strip()) - df = df.apply(lambda x: x.astype(str).str.upper().str.strip()) + df = df.apply(lambda x: x.str.upper().str.strip() if x.dtype == object and not x.apply(lambda v: isinstance(v, list)).any() else x) - # Analisi specifica if analysis == "documents": + df['SR'] = df['SR'].astype(str) + df_lab[analysis] = df_lab[analysis].astype(str) df = df_lab.merge(df, left_on="documents", right_on="SR", how="left") elif analysis == "authors": @@ -454,9 +432,8 @@ def labeling(df, df_lab, term, n, n_labels, analysis, ngrams): if WA.shape[1] != WF.shape[0]: raise ValueError("Dimensioni non allineate tra WA e WF") - AF = WA.T @ WF # Prodotto matriciale + AF = WA.T @ WF - # Creazione della mappa autore -> termini concatenati A = { author: ';'.join( [name for name, count in zip(WF.columns, AF[i].toarray().flatten()) if count > 0] @@ -470,11 +447,9 @@ def labeling(df, df_lab, term, n, n_labels, analysis, ngrams): elif analysis == "sources": df = df_lab.merge(df, left_on="sources", right_on="SO", how="inner") - # Se 'SR' non esiste, usa la prima colonna del DataFrame if 'SR' not in df.columns: df['SR'] = df.iloc[:, 0] - # Creazione della tabella globale delle etichette df['SR'] = df.iloc[:, 0] tab_global = table_tag(df, term) tab_global = pd.DataFrame({ @@ -483,41 +458,41 @@ def labeling(df, df_lab, term, n, n_labels, analysis, ngrams): 'n': len(df) }) - # Assegnazione delle etichette migliori ai cluster df['w'] = df.groupby('Cluster').apply(lambda x: best_lab(x, tab_global, n_labels, term)).explode().reset_index(drop=True) return df['w'] def best_lab(df, tab_global, n_labels, term): - # Creazione della tabella locale con le etichette tab = table_tag(df, term) tab = pd.DataFrame(list(tab.items()), columns=['label', 'value']) - # Espandi le liste di parole nei singoli termini tab = tab.explode('label') - # Merge con la tabella globale tab = tab.merge(tab_global, on='label', how="left") if tab.empty: return "" - # Evita errori di divisione per zero tab['conf'] = round(tab['value'] / tab['tot'] * 100, 1).fillna(0) tab['supp'] = round(tab['tot'] / tab_global['n'].iloc[0] * 100, 1).fillna(0) tab['relevance'] = round(tab['conf'] * tab['supp'] / 100, 1) - # Ordina per rilevanza e seleziona le migliori etichette tab = tab.sort_values(by='relevance', ascending=False).head(n_labels) - # Ritorna la stringa con etichette e confidence return '\n'.join(f"{label} - conf {conf}%" for label, conf in zip(tab['label'], tab['conf'])).lower() def localCitations(df, fast_search=False, sep=";"): - df = metaTagExtraction(df, "SR") - M = df.get() + _df_check = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + if 'SR' not in _df_check.columns or _df_check['SR'].eq('').all(): + df = metaTagExtraction(df, "SR") + M = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # PATCH: safety check + if M is None or M.empty: + return None + M['TC'] = M['TC'].fillna(0) if fast_search: loccit = M['TC'].quantile(0.75) @@ -525,14 +500,21 @@ def localCitations(df, fast_search=False, sep=";"): loccit = 1 H = histNetwork(df, min_citations=loccit, sep=sep, network=False) + + # PATCH: histNetwork may return None + if H is None: + return None + LCS = H['histData'] M = H['M'] + + # PATCH: if all LCS are 0, return None to avoid empty result propagation + if 'LCS' not in M.columns or M['LCS'].sum() == 0: + return None - # Split authors and repeat local citations AU = M['AU'].explode() n = AU.groupby(level=0).size() - # Create DataFrame for authors and local citations df_authors = pd.DataFrame({'AU': AU, 'LCS': M['LCS'].repeat(n).values}) author_counts = df_authors.groupby('AU')['LCS'].sum().reset_index() author_counts.columns = ["Authors", "N. of Local Citations"] @@ -558,40 +540,23 @@ def localCitations(df, fast_search=False, sep=";"): def adjust_color(color, alpha=0.5): - """ - Adjust the color by changing its alpha value. - """ + """Adjust the color by changing its alpha value.""" + rgba = mcolors.to_rgba(color) + adjusted_rgba = (rgba[0], rgba[1], rgba[2], alpha) + return mcolors.to_hex(adjusted_rgba) - # Convert color to RGBA - rgba = mcolors.to_rgba(color) - # Adjust the alpha value - adjusted_rgba = (rgba[0], rgba[1], rgba[2], alpha) - # Convert back to hex - return mcolors.to_hex(adjusted_rgba) def avoid_net_overlaps(coords, labels, sizes, threshold=0.10): - """Function to avoid label overlapping - Args: - coords: numpy array of x,y coordinates - labels: list of node labels - sizes: list of node sizes (dotSizes) - threshold: distance threshold for overlap detection - Returns: - list of labels to remove to avoid overlap - """ - - # Create dataframe of nodes with labels + """Function to avoid label overlapping.""" df = pd.DataFrame({ 'x': coords[:, 0], - 'y': coords[:, 1] / 2, # Normalize y coordinates + 'y': coords[:, 1] / 2, 'label': labels, 'size': sizes }) - # Calculate pairwise manhattan distances distances = squareform(pdist(df[['x', 'y']], metric='cityblock')) - # Create dataframe of overlapping pairs overlaps = [] n = len(labels) for i in range(n): @@ -608,7 +573,6 @@ def avoid_net_overlaps(coords, labels, sizes, threshold=0.10): if not overlaps: return [] - # Convert to dataframe overlaps_df = pd.DataFrame(overlaps) labels_to_remove = [] @@ -625,11 +589,10 @@ def avoid_net_overlaps(coords, labels, sizes, threshold=0.10): overlaps_df = overlaps_df.iloc[1:] continue - # Remove rows containing this label overlaps_df = overlaps_df[ (overlaps_df['from'] != label) & (overlaps_df['to'] != label) ] labels_to_remove.append(label) - return labels_to_remove + return labels_to_remove \ No newline at end of file diff --git a/www/services/format_functions.py b/www/services/format_functions.py index 1a8ee7af4..6f40caff9 100644 --- a/www/services/format_functions.py +++ b/www/services/format_functions.py @@ -1,11 +1,12 @@ from .utils import * from .parsers import * +from .io_utils import load_standardized_csv import zipfile import tempfile import os -def format_ab_column(entry, source, file_type): # Function for AB Column (format--> "Abstract") +def format_ab_column(entry, source, file_type): abstract = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -29,27 +30,22 @@ def format_ab_column(entry, source, file_type): # Function for AB Column elif source == 'Cochrane': if file_type == '.txt': abstract = entry.get('AB', '') - return abstract -def format_af_column(entry, source, file_type): # Function for AF Column (format--> "[Surname, Name]") +def format_af_column(entry, source, file_type): authors = [] if source == 'Web_of_Science': if file_type == '.bib': authors_str = entry.get('author', '').replace('\n', ' ') authors_list = authors_str.split(" and ") - for person in authors_list: parts = person.split(", ") if len(parts) == 2: surname, first_names = parts else: - # Se ci sono più parti, la prima è il cognome, il resto sono nomi propri surname = parts[0] first_names = ' '.join(parts[1:]) - - # author_dict = {'Surname': surname, 'Name': first_names} author_dict = surname + ' ' + first_names authors.append(author_dict) elif file_type == '.txt' or file_type == '.ciw': @@ -59,11 +55,8 @@ def format_af_column(entry, source, file_type): # Function for AF Column if len(parts) == 2: surname, first_names = parts else: - # Se ci sono più parti, la prima è il cognome, il resto sono nomi propri surname = parts[0] first_names = ' '.join(parts[1:]) - - # author_dict = {'Surname': surname, 'Name': first_names} author_dict = surname + ' ' + first_names authors.append(author_dict) elif source == 'PubMed': @@ -71,14 +64,11 @@ def format_af_column(entry, source, file_type): # Function for AF Column for author in entry.get('FAU', '').split(";"): if ', ' in author: surname, first_names = author.split(", ") - #author_dict = {'Surname': surname, 'Name': first_names} author_dict = surname + ' ' + first_names authors.append(author_dict) else: - # Handle cases where the author string does not contain a comma and space surname = author first_names = '' - #author_dict = {'Surname': surname, 'Name': first_names} author_dict = surname + ' ' + first_names authors.append(author_dict) elif source == 'Scopus': @@ -88,7 +78,6 @@ def format_af_column(entry, source, file_type): # Function for AF Column if len(parts) == 2: surname, first_names = parts else: - # Se ci sono più parti, la prima è il cognome, il resto sono nomi propri surname = parts[0] first_names = ' '.join(parts[1:]) author_dict = surname + ' ' + first_names @@ -107,7 +96,6 @@ def format_af_column(entry, source, file_type): # Function for AF Column for person in persons: if person.strip() and len(person.split(", ")) == 2: surname, name = person.split(", ") - # author_dict = {'Surname': surname, 'Name': name} author_dict = surname + ' ' + name authors.append(author_dict) elif source == 'The_Lens': @@ -118,7 +106,6 @@ def format_af_column(entry, source, file_type): # Function for AF Column parts = person.split(" ") name = " ".join(parts[:-1]) surname = parts[-1] - # author_dict = {'Surname': surname, 'Name': name} author_dict = surname + ' ' + name authors.append(author_dict) elif source == 'Cochrane': @@ -127,13 +114,12 @@ def format_af_column(entry, source, file_type): # Function for AF Column return authors -def format_au_column(entry, source, file_type): # Function for AU Column (format--> "[Surname, N.]") +def format_au_column(entry, source, file_type): authors = [] if source == 'Web_of_Science': if file_type == '.bib': authors_str = entry.get('author', '').replace('\n', ' ') authors_list = authors_str.split(" and ") - for person in authors_list: parts = person.split(", ") if len(parts) == 2: @@ -141,9 +127,7 @@ def format_au_column(entry, source, file_type): # Function for AU Column else: surname = parts[0] names = ' '.join(parts[1:]) - initials = ''.join([name[0] for name in names.split() if name]) - # author_dict = {'Surname': surname, 'Name Initials': initials} author_dict = surname + ' ' + initials authors.append(author_dict) elif file_type == '.txt' or file_type == '.ciw': @@ -155,9 +139,6 @@ def format_au_column(entry, source, file_type): # Function for AU Column else: surname = parts[0] names = ' '.join(parts[1:]) - - # initials = ''.join([name[0] + '.' for name in re.split(r'[ -]', names) if name]) - # author_dict = {'Surname': surname, 'Name Initials': initials} author_dict = surname + ' ' + names authors.append(author_dict) elif source == 'PubMed': @@ -167,16 +148,25 @@ def format_au_column(entry, source, file_type): # Function for AU Column if author: surname, *initials = author.split(" ") initials = ' '.join(initials) - #author_dict = {'Surname': surname, 'Name Initials': initials} author_dict = surname + ' ' + initials authors.append(author_dict) elif source == 'Scopus': if file_type == '.bib': for person in entry.get('author', []).split(" and "): - surname, names = person.split(", ") + # PATCH 3: the original code used `surname, names = person.split(", ")` + # without checking the number of parts — if the string contains no + # comma+space the unpacking crashes with ValueError. + # → guard with len check before unpacking. + parts = person.split(", ") + if len(parts) == 2: + surname, names = parts + else: + surname = parts[0] + names = ' '.join(parts[1:]) if len(parts) > 1 else '' initials = '' for name in names.split(" "): - initials += name[0] + '.' + if name: + initials += name[0] + '.' author_dict = surname + ' ' + initials authors.append(author_dict) elif file_type == '.csv': @@ -190,17 +180,14 @@ def format_au_column(entry, source, file_type): # Function for AU Column authors.append(author_dict) elif source == 'Dimensions': if file_type == '.csv' or file_type == '.xlsx': - # Pulizia preliminare dei dati - authors_raw = re.sub(r"\s+", " ", entry['Authors']) # Rimuove spazi multipli - authors_raw = re.sub(r"[()]", "", authors_raw) # Rimuove parentesi - - # Suddivisione degli autori + authors_raw = re.sub(r"\s+", " ", entry['Authors']) + authors_raw = re.sub(r"[()]", "", authors_raw) persons = authors_raw.split("; ") for person in persons: if person.strip() and len(person.split(", ")) == 2: surname, name = person.split(", ") initials = ''.join([part[0] for part in name.split()]) - author_dict = surname + ' ' + initials + author_dict = surname + ' ' + initials authors.append(author_dict) elif person.strip() and len(person.split(" ")) > 1: parts = person.split(" ") @@ -226,17 +213,14 @@ def format_au_column(entry, source, file_type): # Function for AU Column if author: surname, *initials = author.split(" ") if len(initials) >= 2: - #author_dict = {'Surname': initials[0], 'Name Initials': initials[1]} author_dict = initials[0] + ' ' + initials[1] else: - #author_dict = {'Surname': surname, 'Name Initials': initials[0]} - author_dict = surname + ' ' + initials[0] + author_dict = surname + ' ' + (initials[0] if initials else '') authors.append(author_dict) - return authors -def format_au1_un_column(entry, source, file_type): # Function for AU1_UN Column (format--> "University of the First Author") +def format_au1_un_column(entry, source, file_type): university = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -248,16 +232,20 @@ def format_au1_un_column(entry, source, file_type): # Function for AU1_UN Co istituti = entry.get('AD', '').split(";") risultato = [] - if isinstance(istituti[0], str): + # PATCH 5: the original code set `parti` inside an `if isinstance` + # block but used it outside — if the condition was False, `parti` + # was never defined and the subsequent `if len(parti)` raised + # NameError. → guard the outer blocks with the same isinstance check. + if istituti and isinstance(istituti[0], str): parti = istituti[0].split(",") - if len(parti) > 1 and any( - keyword in parti[1] for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): - seconda_parte = parti[1].strip().rstrip('.') - risultato.append(seconda_parte) - elif len(parti) > 2 and any( - keyword in parti[2] for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): - terza_parte = parti[2].strip().rstrip('.') - risultato.append(terza_parte) + if len(parti) > 1 and any( + keyword in parti[1] for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): + seconda_parte = parti[1].strip().rstrip('.') + risultato.append(seconda_parte) + elif len(parti) > 2 and any( + keyword in parti[2] for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): + terza_parte = parti[2].strip().rstrip('.') + risultato.append(terza_parte) university = ';'.join(risultato) elif source == 'Scopus': @@ -278,11 +266,10 @@ def format_au1_un_column(entry, source, file_type): # Function for AU1_UN Co elif source == 'Cochrane': if file_type == '.txt': university = '' - return university -def format_au_un_column(entry, source, file_type): # Function for AU_UN Column (format--> [Universities]) +def format_au_un_column(entry, source, file_type): universities = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -297,21 +284,15 @@ def format_au_un_column(entry, source, file_type): # Function for AU_UN Col if isinstance(text, str): istituti = text.split(";") risultato = [] - for istituto in istituti: if any(keyword in istituto for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): - # Dividi la sottostringa usando il delimitatore ',' parti = istituto.split(",") if len(parti) > 1 and any(keyword in parti[1] for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): - # Prendi la seconda parte (dopo la prima virgola) e rimuovi eventuali punti alla fine seconda_parte = parti[1].strip().rstrip('.') risultato.append(seconda_parte) elif len(parti) > 2 and any(keyword in parti[2] for keyword in ["University", "National", "Medical", "Centre", "Electronic"]): - # Prendi la terza parte (dopo la seconda virgola) e rimuovi eventuali punti alla fine terza_parte = parti[2].strip().rstrip('.') risultato.append(terza_parte) - - # Unisci le stringhe risultanti in un'unica stringa con ';' come separatore universities.extend(risultato) elif source == 'Scopus': if file_type == '.bib': @@ -332,11 +313,10 @@ def format_au_un_column(entry, source, file_type): # Function for AU_UN Col if file_type == '.txt': university = '' universities.append(university) - return universities -def format_bp_column(entry, source, file_type): # Function for BP Column (format--> Begin Page) +def format_bp_column(entry, source, file_type): begin_page = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -346,10 +326,12 @@ def format_bp_column(entry, source, file_type): # Function for BP Column elif source == 'PubMed': if file_type == '.txt': try: - begin_page = entry.get('PG', '').split("-") - if begin_page[0] < begin_page[1]: - begin_page = begin_page[0] - except: + pages = entry.get('PG', '').split("-") + # PATCH 4: original code compared strings lexicographically + # ("9" < "10" is False in string comparison) — cast to int first. + if len(pages) == 2 and int(pages[0]) < int(pages[1]): + begin_page = pages[0] + except (ValueError, IndexError): begin_page = '' elif source == 'Scopus': if file_type == '.bib': @@ -367,30 +349,28 @@ def format_bp_column(entry, source, file_type): # Function for BP Column elif source == 'Cochrane': if file_type == '.txt': begin_page = '' - return begin_page -def format_c1_column(entry, source, file_type): # Function for C1 Column (format--> [Affiliations]) +def format_c1_column(entry, source, file_type): affiliations = [] if source == 'Web_of_Science': if file_type == '.bib': affiliation_text = entry.get('affiliation', '') if affiliation_text: - affiliation_lines = affiliation_text.strip().split("\n") # Remove leading and trailing whitespaces from the 'affiliation' field and split it into lines. - + affiliation_lines = affiliation_text.strip().split("\n") for line in affiliation_lines: if "(Corresponding Author)" not in line: num_authors = len(line.split("; ")) - if num_authors == 0: # If there is just one author, the affiliation is the string formed by joining the parts from the third part onwards. + if num_authors == 0: parts = line.split(",") affiliation = ", ".join(parts[2:]) - else: # If there are multiple authors, split the last part into subparts using ',' as the separator. The affiliation is the string formed by joining the subparts from the third subpart onwards. + else: parts = line.split(";") last_parts = parts[-1] last_part = last_parts.split(",") affiliation = ", ".join(last_part[2:]) - affiliation = affiliation.strip().rstrip('.') # Remove leading and trailing whitespaces from the affiliation and remove any trailing '.' + affiliation = affiliation.strip().rstrip('.') affiliations.append(affiliation) elif file_type == '.txt' or file_type == '.ciw': author_affiliations = entry.get('C1', '') @@ -414,7 +394,7 @@ def format_c1_column(entry, source, file_type): # Function for C1 Column affiliations.append(affiliation) elif source == 'Dimensions': if file_type == '.csv' or file_type == '.xlsx': - persons = re.findall(r'\((.*?)\)', entry['Authors (Raw Affiliation)']) + persons = re.findall(r'\((.*?)\)', entry['Authors (Raw Affiliation)']) for person in persons: affiliations.append(person) elif source == 'The_Lens': @@ -423,11 +403,10 @@ def format_c1_column(entry, source, file_type): # Function for C1 Column elif source == 'Cochrane': if file_type == '.txt': affiliations = [] - return affiliations -def format_cr_column(entry, source, file_type): # Function for CR Column (format--> "[References]") +def format_cr_column(entry, source, file_type): cited_references = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -454,11 +433,10 @@ def format_cr_column(entry, source, file_type): # Function for CR Column elif source == 'Cochrane': if file_type == '.txt': cited_references = [] - return cited_references -def format_de_column(entry, source, file_type): # Function for DE Column (format--> "[Keywords]") +def format_de_column(entry, source, file_type): author_keywords = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -495,7 +473,6 @@ def format_de_column(entry, source, file_type): # Function for DE Column else: keywords = str(entry['MeSH terms']).split("; ") for keyword in keywords: - # keyword_dict = {'Terms': keyword} author_keywords.append(keyword) elif source == 'The_Lens': if file_type == '.csv': @@ -504,17 +481,15 @@ def format_de_column(entry, source, file_type): # Function for DE Column else: keywords = str(entry['Keywords']).split("; ") for keyword in keywords: - # keyword_dict = {'Terms': keyword} author_keywords.append(keyword) elif source == 'Cochrane': if file_type == '.txt': for keyword in entry.get('KY', '').split(";"): author_keywords.append(keyword) - return author_keywords -def format_di_column(entry, source, file_type): # Function for DI Column (format--> "DOI") +def format_di_column(entry, source, file_type): doi = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -538,11 +513,10 @@ def format_di_column(entry, source, file_type): # Function for DI Column elif source == 'Cochrane': if file_type == '.txt': doi = entry.get('DOI', '') - return doi -def format_dt_column(entry, source, file_type): # Function for DT Column ("Document Type") +def format_dt_column(entry, source, file_type): document_type = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -566,11 +540,10 @@ def format_dt_column(entry, source, file_type): # Function for DT Column elif source == 'Cochrane': if file_type == '.txt': document_type = '' - return document_type -def format_em_column(entry, source, file_type): # Function for EM Column (format--> "[Authors E-mail]") +def format_em_column(entry, source, file_type): emails = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -605,29 +578,30 @@ def format_em_column(entry, source, file_type): # Function for EM Column elif source == 'Cochrane': if file_type == '.txt': emails = '' - return emails -def format_ep_column(entry, source, file_type): # Function for EP Column ("End Page") +def format_ep_column(entry, source, file_type): end_page = '' if source == 'Web_of_Science': if file_type == '.bib': try: - end_page = entry.get('pages', '').split("-") - if end_page[0] < end_page[1]: - end_page = end_page[1] - except: + pages = entry.get('pages', '').split("-") + # PATCH 4 (EP): same string comparison issue as format_bp_column. + # → cast to int before comparing to get correct numeric ordering. + if len(pages) == 2 and int(pages[0]) < int(pages[1]): + end_page = pages[1] + except (ValueError, IndexError): end_page = '' elif file_type == '.txt' or file_type == '.ciw': end_page = entry.get('EP', [''])[0] elif source == 'PubMed': if file_type == '.txt': try: - end_page = entry.get('PG', '').split("-") - if end_page[0] < end_page[1]: - end_page = end_page[1] - except: + pages = entry.get('PG', '').split("-") + if len(pages) == 2 and int(pages[0]) < int(pages[1]): + end_page = pages[1] + except (ValueError, IndexError): end_page = '' elif source == 'Scopus': if file_type == '.bib': @@ -648,11 +622,10 @@ def format_ep_column(entry, source, file_type): # Function for EP Column elif source == 'Cochrane': if file_type == '.txt': end_page = '' - return end_page -def format_fu_column(entry, source, file_type): # Function for FU Column ("Funding Details") +def format_fu_column(entry, source, file_type): funding = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -678,11 +651,10 @@ def format_fu_column(entry, source, file_type): # Function for FU Column elif source == 'Cochrane': if file_type == '.txt': funding = '' - return funding -def format_fx_column(entry, source, file_type): # Function for FX Column (format--> "Funding Text") +def format_fx_column(entry, source, file_type): fx = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -705,11 +677,10 @@ def format_fx_column(entry, source, file_type): # Function for FX Column elif source == 'Cochrane': if file_type == '.txt': fx = '' - return fx -def format_id_column(entry, source, file_type): # Function for ID Column (format--> [Index Keywords]) +def format_id_column(entry, source, file_type): index_keywords = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -740,7 +711,6 @@ def format_id_column(entry, source, file_type): # Function for ID Column else: keywords = str(entry['MeSH terms']).split("; ") for keyword in keywords: - # keyword_dict = {'Terms': keyword} index_keywords.append(keyword) elif source == 'The_Lens': if file_type == '.csv': @@ -749,17 +719,15 @@ def format_id_column(entry, source, file_type): # Function for ID Column else: keywords = str(entry['Keywords']).split("; ") for keyword in keywords: - # keyword_dict = {'Terms': keyword} index_keywords.append(keyword) elif source == 'Cochrane': if file_type == '.txt': for keyword in entry.get('KY', '').split(";"): index_keywords.append(keyword) - return index_keywords -def format_is_column(entry, source, file_type): # Function for IS Column (format--> "Issue") +def format_is_column(entry, source, file_type): issue = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -784,11 +752,10 @@ def format_is_column(entry, source, file_type): # Function for IS Column elif source == 'Cochrane': if file_type == '.txt': issue = '' - return issue -def format_ji_column(entry, source, file_type): # Function for JI Column (format--> "Abbrev. Journal Name") +def format_ji_column(entry, source, file_type): abbrev_source_title = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -812,11 +779,10 @@ def format_ji_column(entry, source, file_type): # Function for JI Column elif source == 'Cochrane': if file_type == '.txt': abbrev_source_title = entry.get('SO', '') - return abbrev_source_title -def format_la_column(entry, source, file_type): # Function for LA Column (format--> "Language") +def format_la_column(entry, source, file_type): language = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -840,11 +806,10 @@ def format_la_column(entry, source, file_type): # Function for LA Column elif source == 'Cochrane': if file_type == '.txt': language = '' - return language -def format_oa_column(entry, source, file_type): # Function for OA Column (format--> [Open Access]) +def format_oa_column(entry, source, file_type): open_access = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -873,11 +838,10 @@ def format_oa_column(entry, source, file_type): # Function for OA Column elif source == 'Cochrane': if file_type == '.txt': open_access = '' - return open_access -def format_oi_column(entry, source, file_type): # Function for OI Column ([Orcid Number]") +def format_oi_column(entry, source, file_type): oi = [] if source == 'Web_of_Science': if file_type == '.bib': @@ -887,7 +851,7 @@ def format_oi_column(entry, source, file_type): # Function for OI Column oi.append(parts[-1].strip()) elif file_type == '.txt' or file_type == '.ciw': orcid_ids = list(entry.get('OI', '')) - if orcid_ids: # If the 'OI' field is not empty, split the string into parts using the delimiter '; ' and extract the ORCID number from each part. + if orcid_ids: for orcid in orcid_ids: orcid_parts = orcid.split("; ") for part in orcid_parts: @@ -916,11 +880,10 @@ def format_oi_column(entry, source, file_type): # Function for OI Column elif source == 'Cochrane': if file_type == '.txt': oi = '' - return oi -def format_pmid_column(entry, source, file_type): # Function for PMID Column (format--> "PubMed ID") +def format_pmid_column(entry, source, file_type): pmid = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -950,11 +913,10 @@ def format_pmid_column(entry, source, file_type): # Function for PMID Colu elif source == 'Cochrane': if file_type == '.txt': pmid = '' - return pmid -def format_pu_column(entry, source, file_type): # Function for PU Column (format--> "Publisher") +def format_pu_column(entry, source, file_type): publisher = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -978,11 +940,10 @@ def format_pu_column(entry, source, file_type): # Function for PU Column elif source == 'Cochrane': if file_type == '.txt': publisher = '' - return publisher -def format_py_column(entry, source, file_type): # Function for PY Column (format--> "Publication Year") +def format_py_column(entry, source, file_type): publication_year = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1007,11 +968,10 @@ def format_py_column(entry, source, file_type): # Function for PY Column elif source == 'Cochrane': if file_type == '.txt': publication_year = entry.get('YR', '') - return publication_year -def format_rp_column(entry, source, file_type): # Function for RP Column (format--> "Correspondence Address") +def format_rp_column(entry, source, file_type): correspondence_address = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1019,23 +979,19 @@ def format_rp_column(entry, source, file_type): # Function for RP Column first_email = '' affiliation_text = entry.get('affiliation', '') if affiliation_text: - affiliation_lines = affiliation_text.strip().split("\n") # Remove leading and trailing whitespaces from the 'affiliation' field and split it into lines. - + affiliation_lines = affiliation_text.strip().split("\n") for line in affiliation_lines: if "(Corresponding Author)" in line: correspondence_author = line break - emails = entry.get('author-email', '').split("\n") if emails: first_email = emails[0] - correspondence_address = correspondence_author + '; email: ' + first_email elif file_type == '.txt' or file_type == '.ciw': correspondence_author = entry.get('RP', '') if correspondence_author: correspondence_author = correspondence_author[0] - first_email = '' for email in entry.get('EM', ''): emails = email.split("; ") @@ -1059,22 +1015,21 @@ def format_rp_column(entry, source, file_type): # Function for RP Column elif source == 'Cochrane': if file_type == '.txt': correspondence_address = '' - return correspondence_address -def format_sc_column(entry, source, file_type): # Function for SC Column (format--> [Fields of Research]) +def format_sc_column(entry, source, file_type): fields = [] if source == 'Web_of_Science': if file_type == '.bib': fields = entry.get('research-areas', '').split("; ") elif file_type == '.txt' or file_type == '.ciw': original_fields = entry.get('SC', '') - if original_fields: # If the 'SC' field is not empty, split the string into parts using the delimiter '; ' and extract the field of research from each part. + if original_fields: for field in original_fields: field_parts = field.split(";") for part in field_parts: - if part.strip(): # This ensures we skip empty parts + if part.strip(): fields.append(part.strip()) else: fields.append('') @@ -1092,11 +1047,10 @@ def format_sc_column(entry, source, file_type): # Function for SC Column elif source == 'Cochrane': if file_type == '.txt': fields = '' - return fields -def format_sn_column(entry, source, file_type): # Function for SN Column (format--> "ISSN") +def format_sn_column(entry, source, file_type): issn = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1120,11 +1074,10 @@ def format_sn_column(entry, source, file_type): # Function for SN Column elif source == 'Cochrane': if file_type == '.txt': issn = entry.get('SN', '') - return issn -def format_so_column(entry, source, file_type): # Function for SO Column (format--> "Journal") +def format_so_column(entry, source, file_type): journal = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1154,18 +1107,16 @@ def format_so_column(entry, source, file_type): # Function for SO Column elif source == 'Cochrane': if file_type == '.txt': journal = entry.get('SO', '') - return journal -def format_sr_column(entry, source, file_type): # Function for SR Column (format--> "Author, Publication Year, Journal") +def format_sr_column(entry, source, file_type): sr = '' if source == 'Web_of_Science': if file_type == '.bib': author_dict = {} authors_str = entry.get('author', '').replace('\n', ' ') authors_list = authors_str.split(" and ") - for person in authors_list: parts = person.split(", ") if len(parts) == 2: @@ -1173,10 +1124,8 @@ def format_sr_column(entry, source, file_type): # Function for SR Column (forma else: surname = parts[0] names = ' '.join(parts[1:]) - initials = ''.join([name[0] + '.' for name in names.split() if name]) author_dict = {'Surname': surname, 'Name Initials': initials} - publication_year = entry.get('year', '') journal = entry.get('journal', '').replace('\n', ' ') if journal == '': @@ -1190,16 +1139,13 @@ def format_sr_column(entry, source, file_type): # Function for SR Column (forma else: surname = parts[0] names = ' '.join(parts[1:]) - publication_year = entry.get('PY', '') - journal = '' journal_entries = entry.get('SO', '') if journal_entries: for journal_entry in journal_entries: journal += journal_entry + ' ' journal = journal.rstrip() - sr = surname + ' ' + names + ', ' + publication_year[0] + ', ' + journal elif source == 'PubMed': if file_type == '.txt': @@ -1211,10 +1157,13 @@ def format_sr_column(entry, source, file_type): # Function for SR Column (forma elif source == 'Scopus': if file_type == '.bib': author = entry.get('author', '').split(" and ")[0] - surname, names = author.split(", ") - initials = '' - for name in names.split(" "): - initials += name[0] + '.' + parts = author.split(", ") + if len(parts) == 2: + surname, names = parts + else: + surname = parts[0] + names = ' '.join(parts[1:]) if len(parts) > 1 else '' + initials = ''.join([name[0] + '.' for name in names.split(" ") if name]) publication_year = entry.get('year', '') ta = entry.get('journal', '') sr = surname + ' ' + initials + ', ' + publication_year + ', ' + ta @@ -1252,11 +1201,10 @@ def format_sr_column(entry, source, file_type): # Function for SR Column (forma publication_year = entry.get('YR', '') ta = entry.get('SO', '') sr = author + ', ' + publication_year + ', ' + ta - return sr -def format_tc_column(entry, source, file_type): # Function for TC Column (format--> "Times Cited") +def format_tc_column(entry, source, file_type): times_cited = 0 if source == 'Web_of_Science': if file_type == '.bib': @@ -1286,11 +1234,10 @@ def format_tc_column(entry, source, file_type): # Function for TC Column (forma elif source == 'Cochrane': if file_type == '.txt': times_cited = 0 - return times_cited -def format_ti_column(entry, source, file_type): # Function for TI Column (format--> "Title") +def format_ti_column(entry, source, file_type): title = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1318,11 +1265,10 @@ def format_ti_column(entry, source, file_type): # Function for TI Column (forma elif source == 'Cochrane': if file_type == '.txt': title = entry.get('TI', '') - return title -def format_ut_column(entry, source, file_type): # Function for UT Column (format--> "Publication ID") +def format_ut_column(entry, source, file_type): publication_id = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1353,11 +1299,10 @@ def format_ut_column(entry, source, file_type): # Function for UT Column (forma elif source == 'Cochrane': if file_type == '.txt': publication_id = entry.get('ID', '') - return publication_id -def format_vl_column(entry, source, file_type): # Function for VL Column (format--> "VL: Volume") +def format_vl_column(entry, source, file_type): volume = '' if source == 'Web_of_Science': if file_type == '.bib': @@ -1381,148 +1326,137 @@ def format_vl_column(entry, source, file_type): # Function for VL Column (forma elif source == 'Cochrane': if file_type == '.txt': volume = '' - return volume def process_zip_file(zip_path, source, author): """ - Extract and process multiple files from a ZIP archive - + Extract and process multiple files from a ZIP archive. + Args: - zip_path: Path to the ZIP file - source: The source of the data - author: The author format preference - + zip_path: Path to the ZIP file. + source: The source of the data. + author: The author format preference. + Returns: - Combined JSON data from all files in the ZIP + Combined JSON data from all files in the ZIP. """ all_entries = [] processed_files = 0 failed_files = [] - max_files = 50 # Limite massimo di file per evitare sovraccarico - + max_files = 50 + try: with zipfile.ZipFile(zip_path, 'r') as zip_ref: - # Validate ZIP file size and content file_list = zip_ref.namelist() valid_files = [f for f in file_list if not f.startswith('.') and not f.endswith('/')] - + if len(valid_files) > max_files: raise ValueError(f"ZIP archive contains too many files ({len(valid_files)}). Maximum allowed: {max_files}") - - # Create a temporary directory to extract files + with tempfile.TemporaryDirectory() as temp_dir: zip_ref.extractall(temp_dir) - - # Get list of extracted files + extracted_files = [] for root, dirs, files in os.walk(temp_dir): for file in files: - if not file.startswith('.'): # Skip hidden files + if not file.startswith('.'): extracted_files.append(os.path.join(root, file)) - - # Process each extracted file + for file_path in extracted_files: try: - # Determine file type from extension file_ext = os.path.splitext(file_path)[1].lower() if file_ext in ['.txt', '.ciw', '.bib', '.csv', '.xlsx']: - # Process the file using the same logic as single files file_entries = process_single_file(file_path, source, file_ext, author) all_entries.extend(file_entries) processed_files += 1 else: print(f"Unsupported file type: {file_ext} for file {os.path.basename(file_path)}") except Exception as e: - # Log error but continue processing other files failed_files.append(os.path.basename(file_path)) print(f"Error processing file {os.path.basename(file_path)}: {str(e)}") continue - + print(f"Successfully processed {processed_files} files from ZIP archive.") if failed_files: print(f"Failed to process {len(failed_files)} files: {', '.join(failed_files)}") - + except zipfile.BadZipFile: raise ValueError("The uploaded file is not a valid ZIP archive.") except Exception as e: raise ValueError(f"Error extracting ZIP file: {str(e)}") - + if not all_entries: raise ValueError("No valid bibliographic data found in the ZIP archive. Please ensure it contains supported file formats (.txt, .csv, .bib, .xlsx).") - - # Convert combined entries to JSON + return json.dumps(all_entries, ensure_ascii=False, indent=4) def process_multiple_files(file_list, source, author): """ - Process multiple files selected by the user - + Process multiple files selected by the user. + Args: - file_list: List of file information dictionaries - source: The source of the data - author: The author format preference - + file_list: List of file information dictionaries. + source: The source of the data. + author: The author format preference. + Returns: - Combined JSON data from all files + Combined JSON data from all files. """ all_entries = [] processed_files = 0 failed_files = [] - + for file_info in file_list: try: file_path = file_info["datapath"] file_name = file_info["name"] - - # Determine if it's a ZIP file or regular file + if file_name.endswith(".zip"): - # Process ZIP file zip_json = process_zip_file(file_path, source, author) zip_entries = json.loads(zip_json) all_entries.extend(zip_entries) else: - # Process regular file file_entries = process_single_file(file_path, source, file_name, author) all_entries.extend(file_entries) - + processed_files += 1 print(f"Successfully processed: {file_name}") - + except Exception as e: failed_files.append(file_info["name"]) print(f"Error processing file {file_info['name']}: {str(e)}") continue - + print(f"Successfully processed {processed_files} files.") if failed_files: print(f"Failed to process {len(failed_files)} files: {', '.join(failed_files)}") - + if not all_entries: raise ValueError("No valid bibliographic data found in the selected files.") - + return json.dumps(all_entries, ensure_ascii=False, indent=4) def process_single_file(data, source, file_type, author): """ - Process a single file and return the list of entries - + Process a single file and return the list of entries. + Args: - data: The path to the input file - source: The source of the data - file_type: The file extension/type - author: The author format preference - + data: The path to the input file. + source: The source of the data. + file_type: The file extension/type. + author: The author format preference. + Returns: - A list of dictionaries containing the formatted data + A list of dictionaries containing the formatted data. """ list_bib_data = [] - + if source == "wos": source = "Web_of_Science" + if file_type.endswith("bib"): file_type = ".bib" bib_parser = BibTexParser() @@ -1551,7 +1485,7 @@ def process_single_file(data, source, file_type, author): file_type = ".csv" bib_data = pd.read_csv(data) list_bib_data = bib_data.to_dict(orient='records') - + elif source == "dimensions": source = "Dimensions" if file_type.endswith("xlsx"): @@ -1569,69 +1503,76 @@ def process_single_file(data, source, file_type, author): file_type = ".csv" bib_data = pd.read_csv(data) list_bib_data = bib_data.to_dict(orient='records') - + elif source == "pubmed": source = "PubMed" if file_type.endswith("txt"): file_type = ".txt" list_bib_data = parse_pubmed_data(data) - + elif source == "cochrane": source = "Cochrane" if file_type.endswith("txt"): file_type = ".txt" list_bib_data = parse_cochrane_data(data) - # Extract relevant data and store it in a list of dictionaries entries = [] for entry in list_bib_data: entry_data = { - 'AB': format_ab_column(entry, source, file_type), # Abstract - 'AF': format_af_column(entry, source, file_type), # Authors Full Name - 'AU': format_au_column(entry, source, file_type), # Author/s - 'AU_UN': format_au_un_column(entry, source, file_type), # Authors University - 'AU1_UN': format_au1_un_column(entry, source, file_type), # Authors First University - 'BP': format_bp_column(entry, source, file_type), # Beginning Page - 'EP': format_ep_column(entry, source, file_type), # Ending Page - 'CR': format_cr_column(entry, source, file_type), # Cited References - 'C1': format_c1_column(entry, source, file_type), # Authors Affiliation - 'DB': source, # Database - 'DE': format_de_column(entry, source, file_type), # Author Keywords - 'DI': format_di_column(entry, source, file_type), # DOI - 'DT': format_dt_column(entry, source, file_type), # Document Type - 'EM': format_em_column(entry, source, file_type), # Email - 'FU': format_fu_column(entry, source, file_type), # Funding Details - 'FX': format_fx_column(entry, source, file_type), # Funding Text - 'IS': format_is_column(entry, source, file_type), # Issue - 'JI': format_ji_column(entry, source, file_type), # Abbreviated Journal Name - 'ID': format_id_column(entry, source, file_type), # Index Keywords - 'LA': format_la_column(entry, source, file_type), # Language - 'OA': format_oa_column(entry, source, file_type), # Open Access - 'OI': format_oi_column(entry, source, file_type), # Orcid ID - 'PMID': format_pmid_column(entry, source, file_type), # PubMed ID - 'PU': format_pu_column(entry, source, file_type), # Publisher - 'PY': format_py_column(entry, source, file_type), # Publication Year - 'RP': format_rp_column(entry, source, file_type), # Correspondence Address - 'SC': format_sc_column(entry, source, file_type), # Fields of Research - 'SN': format_sn_column(entry, source, file_type), # ISSN - 'SO': format_so_column(entry, source, file_type), # Journal - 'SR': format_sr_column(entry, source, file_type), # Author, Publication Year, Journal - 'TC': format_tc_column(entry, source, file_type), # Times Cited - 'TI': format_ti_column(entry, source, file_type), # Title - 'UT': format_ut_column(entry, source, file_type), # Publication ID - 'VL': format_vl_column(entry, source, file_type), # Volume + 'AB': format_ab_column(entry, source, file_type), + 'AF': format_af_column(entry, source, file_type), + 'AU': format_au_column(entry, source, file_type), + 'AU_UN': format_au_un_column(entry, source, file_type), + 'AU1_UN': format_au1_un_column(entry, source, file_type), + 'BP': format_bp_column(entry, source, file_type), + 'EP': format_ep_column(entry, source, file_type), + 'CR': format_cr_column(entry, source, file_type), + 'C1': format_c1_column(entry, source, file_type), + 'DB': source, + 'DE': format_de_column(entry, source, file_type), + 'DI': format_di_column(entry, source, file_type), + 'DT': format_dt_column(entry, source, file_type), + 'EM': format_em_column(entry, source, file_type), + 'FU': format_fu_column(entry, source, file_type), + 'FX': format_fx_column(entry, source, file_type), + 'IS': format_is_column(entry, source, file_type), + 'JI': format_ji_column(entry, source, file_type), + 'ID': format_id_column(entry, source, file_type), + 'LA': format_la_column(entry, source, file_type), + 'OA': format_oa_column(entry, source, file_type), + 'OI': format_oi_column(entry, source, file_type), + 'PMID': format_pmid_column(entry, source, file_type), + 'PU': format_pu_column(entry, source, file_type), + 'PY': format_py_column(entry, source, file_type), + 'RP': format_rp_column(entry, source, file_type), + 'SC': format_sc_column(entry, source, file_type), + 'SN': format_sn_column(entry, source, file_type), + 'SO': format_so_column(entry, source, file_type), + 'SR': format_sr_column(entry, source, file_type), + 'TC': format_tc_column(entry, source, file_type), + 'TI': format_ti_column(entry, source, file_type), + 'UT': format_ut_column(entry, source, file_type), + 'VL': format_vl_column(entry, source, file_type), } - # Add other columns from 'columns' - for column in columns: - if column not in entry_data: # Avoid overwriting existing keys - entry_data[column] = entry.get(column, None) - - # Remove the column based on the value of the 'author' field + # PATCH 1: `columns` was referenced without being defined in the local + # scope — this caused NameError unless it was a global imported from + # utils. Added a safe fallback: only iterate if `columns` exists in the + # global scope, otherwise skip the extra-column loop silently. + # PATCH 2: entries from bibtexparser may not support .get() with a + # default — wrapped in try/except to avoid silent KeyError crashes. + extra_columns = globals().get('columns', []) + for column in extra_columns: + if column not in entry_data: + try: + entry_data[column] = entry.get(column, None) + except (AttributeError, TypeError): + entry_data[column] = None + if author == "surname": - entry_data.pop('AF', None) # Remove 'AF' if it exists + entry_data.pop('AF', None) elif author == "fullname": - entry_data.pop('AU', None) # Remove 'AU' if it exists + entry_data.pop('AU', None) entries.append(entry_data) @@ -1640,25 +1581,47 @@ def process_single_file(data, source, file_type, author): def biblio_json(data, source, type, author): """ - Function to format the data from the input file into a JSON format - + Format the data from the input file into JSON format. + + This function supports: + - original bibliographic exports processed by process_single_file() + - ZIP archives + - standardized CSV files produced by the ETL pipeline + Args: - data: The path to the input file - source: The source of the data - type: The type of the input file - author: The author of the data - + data: The path to the input file. + source: The source of the data. + type: The type/name of the input file. + author: The author format preference. + Returns: - A JSON string containing the formatted data + A JSON string containing the formatted data. """ - # Handle ZIP files - extract and process multiple files + if type.endswith("zip"): return process_zip_file(data, source, author) - - # Handle single files - use the new process_single_file function + + # PATCH: support standardized CSV files produced by the ETL pipeline. + # These CSVs already contain WoS-like columns such as TI, AU, PY, SO, SR. + # Therefore they must not be re-parsed with the old WoS/Scopus/PubMed formatters. + # + # Multi-value columns (AU, AF, C1, AU_CO, DE, ID, CR) are stored in these + # CSVs joined by ";" per spec Section 4.2 ("Delimiter Standard"). We must + # use load_standardized_csv() to split them back into real lists before + # converting to JSON — otherwise they would be serialized as plain + # ";"-joined strings, and every downstream author/keyword/citation count + # would silently come out as 0. + if type.endswith("csv"): + df_csv = pd.read_csv(data, keep_default_na=False, nrows=0) + + required_standard_cols = {"TI", "AU", "PY", "SO", "SR", "DB"} + + if required_standard_cols.issubset(set(df_csv.columns)): + df_csv = load_standardized_csv(data) + entries = df_csv.to_dict(orient="records") + return json.dumps(entries, ensure_ascii=False, indent=4) + entries = process_single_file(data, source, type, author) - - # Convert the list of dictionaries to JSON json_data = json.dumps(entries, ensure_ascii=False, indent=4) - - return json_data + + return json_data \ No newline at end of file diff --git a/www/services/histnetwork.py b/www/services/histnetwork.py index 7848d9744..c8e7bb22e 100644 --- a/www/services/histnetwork.py +++ b/www/services/histnetwork.py @@ -1,45 +1,94 @@ +"d9k3qp" + from .utils import * from .cocmatrix import * +import ast def histNetwork(df, min_citations=0, sep=";", network=True): - """ - Create a historical network of citations from a DataFrame containing metadata of scientific papers. - - Args: - df (DataFrame): A DataFrame containing metadata of scientific papers. - min_citations (int): Minimum number of citations to include a paper in the analysis. - sep (str): Separator used to separate references in the citation network. - network (bool): If True, a citation network is created. - - Returns: - A dictionary containing the following keys: - - NetMatrix: A DataFrame containing the citation network. - - histData: A DataFrame containing the metadata of the papers. - - M: A DataFrame containing the metadata of the papers with the Local Citation Score (LCS). - - LCS: A list containing the Local Citation Score of each paper. - """ - M = df.get() - db = M['DB'][0] - - # Ensure required fields are present - if 'DI' not in M: + + # PATCH: original code called df.get() without arguments, which crashes + # on a pandas DataFrame because pandas .get() requires a column name as argument. + # Fixed by checking isinstance(df, pd.DataFrame) first: + # - if it's a DataFrame → just copy it directly + # - if it's a Shiny reactive object → use .get() to unwrap it + if isinstance(df, pd.DataFrame): + M = df.copy() + else: + M = df.get().copy() + + # SAFETY CHECK + if M is None or M.empty: + print("Input dataframe is empty") + return None + + # SAFE DB HANDLING + if 'DB' not in M.columns or M['DB'].empty: + print("DB column missing") + return None + + db = str(M['DB'].iloc[0]) + + # ENSURE REQUIRED FIELDS + if 'DI' not in M.columns: M['DI'] = "" - M['DI'] = M['DI'].fillna("") - if 'CR' not in M: + M['DI'] = M['DI'].fillna("").astype(str) + + if 'CR' not in M.columns: print("\nYour collection does not contain Cited References metadata (Field CR is missing)\n") return None - # Fill missing values in TC - M['TC'] = M['TC'].fillna(0) + # ENSURE CR IS LIST + def _parse_cr(x, sep): + if isinstance(x, list): + return x + if pd.isna(x): + return [] + s = str(x).strip() + if s.startswith('['): + try: + return ast.literal_eval(s) + except (ValueError, SyntaxError): + pass + return [i.strip() for i in s.split(sep)] + + M['CR'] = M['CR'].apply(lambda x: _parse_cr(x, sep)) + + # SAFE TC HANDLING + if 'TC' not in M.columns: + M['TC'] = 0 + + M['TC'] = pd.to_numeric(M['TC'], errors='coerce').fillna(0) + + # SAFE PY HANDLING + if 'PY' in M.columns: + M['PY'] = pd.to_numeric(M['PY'], errors='coerce') + + # DATABASE ROUTING + # PATCH: added OPENALEX and PUBMED to the wos() branch. + # Both sources produce SR and DI fields in the format expected by wos(), + # so the same matching logic applies. Citation accuracy may be lower for + # OpenAlex because CR contains OpenAlex URLs instead of formatted strings, + # but the function will not crash. + if db in ("Web_of_Science", "OPENALEX", "PUBMED"): + results = wos( + M, + min_citations=min_citations, + sep=sep, + network=network + ) - if db == "Web_of_Science": - results = wos(M, min_citations=min_citations, sep=sep, network=network) elif db == "Scopus": - results = scopus(M, min_citations=min_citations, sep=sep, network=network) + results = scopus( + M, + min_citations=min_citations, + sep=sep, + network=network + ) + else: - print("\nDatabase not compatible with direct citation analysis\n") + print(f"\nDatabase '{db}' not recognized. Supported: Web_of_Science, OPENALEX, PUBMED, Scopus\n") return None return results @@ -49,98 +98,217 @@ def wos(M, min_citations, sep, network): print("\nWOS DB:\nSearching local citations (LCS) by reference items (SR) and DOIs...\n") - # Sort data by publication year + # SAFETY CHECK + required_cols = ['PY', 'CR'] + + for col in required_cols: + if col not in M.columns: + print(f"Missing required column: {col}") + return None + + # SORT DATA M = M.sort_values(by="PY").reset_index(drop=True) - # Add unique labels to papers + # UNIQUE LABELS M['Paper'] = np.arange(0, len(M)) M['nLABEL'] = np.arange(0, len(M)) - # Process cited references (CR) + # PROCESS REFERENCES CR = [] + for i, refs in enumerate(M['CR']): + + if not isinstance(refs, list): + continue + for ref in refs: - # Extract DOI + + if not isinstance(ref, str): + continue + + # DOI EXTRACTION doi = "" + if 'DOI' in ref: parts = ref.split('DOI', 1) doi = parts[1].strip() if len(parts) > 1 else "" - # Extract AU, PY, SO + + # REF PARTS ref_parts = ref.split(',') + au = ref_parts[0].replace('.', ' ').strip() if len(ref_parts) > 0 else "" py = ref_parts[1].strip() if len(ref_parts) > 1 else "" so = ref_parts[2].strip() if len(ref_parts) > 2 else "" + sr = f"{au}, {py}, {so}" - CR.append({'ref': ref, 'Paper': i, 'DI': doi, 'AU': au, 'PY': py, 'SO': so, 'SR': sr}) + + CR.append({ + 'ref': ref, + 'Paper': i, + 'DI': doi, + 'AU': au, + 'PY': py, + 'SO': so, + 'SR': sr + }) print(f"\nAnalyzing {len(CR)} reference items...\n") CR_df = pd.DataFrame(CR) - # Add LABEL field to M and CR - M['LABEL'] = M['SR_FULL'].fillna('').str.upper() + " DOI " + M['DI'].fillna('').str.upper() + # PATCH: if no valid references were parsed (e.g. OpenAlex URL-based CR), + # return early with LCS=0 for all documents to avoid hanging. + if CR_df.empty: + M['LCS'] = 0 + M['LABEL'] = M['SR_FULL'].fillna('').astype(str).str.upper() + histData = M[M['TC'] >= min_citations][['LABEL', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC']].copy() + histData.columns = ['Paper', 'Title', 'Author_Keywords', 'KeywordsPlus', 'DOI', 'Year', 'LCS', 'GCS'] + return {'NetMatrix': None, 'histData': histData, 'M': M, 'LCS': M['LCS'].tolist()} + + # SAFE SR_FULL + if 'SR_FULL' not in M.columns: + M['SR_FULL'] = "" + + M['LABEL'] = ( + M['SR_FULL'].fillna('').astype(str).str.upper() + + " DOI " + + M['DI'].fillna('').astype(str).str.upper() + ) + M['LABEL'] = M['LABEL'].str.strip() - CR_df['LABEL'] = CR_df['SR'].fillna('').str.upper() + " DOI " + CR_df['DI'].fillna('').str.upper() + + CR_df['LABEL'] = ( + CR_df['SR'].fillna('').astype(str).str.upper() + + " DOI " + + CR_df['DI'].fillna('').astype(str).str.upper() + ) + CR_df['LABEL'] = CR_df['LABEL'].str.strip() + print("Sample M LABEL:", M['LABEL'].iloc[0]) + print("Sample CR LABEL:", CR_df['LABEL'].iloc[0] if not CR_df.empty else "empty") + print("Overlap:", len(set(M['LABEL']) & set(CR_df['LABEL']))) + + # MATCH REFERENCES + L = pd.merge( + M, + CR_df, + on='LABEL', + how='left', + suffixes=('_M', '_CR') + ) - # Match references with papers (left join as in R) - L = pd.merge(M, CR_df, on='LABEL', how='left', suffixes=('_M', '_CR')) L = L[L['Paper_CR'].notnull()] - L['CITING'] = M.loc[L['Paper_CR'], 'LABEL'].values - L['nCITING'] = M.loc[L['Paper_CR'], 'nLABEL'].values - L['CIT_PY'] = M.loc[L['Paper_CR'], 'PY'].values - # Compute Local Citation Scores (LCS) + if len(L) > 0: + + L['CITING'] = M.loc[L['Paper_CR'], 'LABEL'].values + L['nCITING'] = M.loc[L['Paper_CR'], 'nLABEL'].values + L['CIT_PY'] = M.loc[L['Paper_CR'], 'PY'].values + + # COMPUTE LCS LCS = L.groupby('nLABEL').size().reset_index(name='LCS') - M['LCS'] = M['nLABEL'].map(LCS.set_index('nLABEL')['LCS']).fillna(0).astype(int) - # Prepare histData - histData = M[M['TC'] >= min_citations][['LABEL', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC']] - histData.columns = ['Paper', 'Title', 'Author_Keywords', 'KeywordsPlus', 'DOI', 'Year', 'LCS', 'GCS'] + M['LCS'] = ( + M['nLABEL'] + .map(LCS.set_index('nLABEL')['LCS']) + .fillna(0) + .astype(int) + ) + + # SAFE OPTIONAL COLUMNS + optional_cols = ['TI', 'DE', 'ID'] + + for col in optional_cols: + if col not in M.columns: + M[col] = "" + + histData = M[ + M['TC'] >= min_citations + ][['LABEL', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC']] + + histData.columns = [ + 'Paper', + 'Title', + 'Author_Keywords', + 'KeywordsPlus', + 'DOI', + 'Year', + 'LCS', + 'GCS' + ] WLCR = None - if network: - # Build citation network + + # NETWORK BUILDING + if network and len(L) > 0: + CITING = L.groupby('CITING').agg( LCR=('LABEL', lambda x: ';'.join(x.dropna())), PY=('CIT_PY', 'first'), Paper=('Paper_CR', 'first') ).reset_index().sort_values(by='PY') - # Assign LCR to the correct Paper index (Paper is 0-based) M['LCR'] = "" + for idx, row in CITING.iterrows(): + paper_idx = int(row['Paper']) + if 0 <= paper_idx < len(M): M.at[paper_idx, 'LCR'] = row['LCR'] - # Assign unique names to duplicated LABELs + # DUPLICATE LABEL HANDLING st = False i = 0 + while not st: + ind = M['LABEL'].duplicated(keep=False) + if ind.any(): + i += 1 - M.loc[ind, 'LABEL'] = M.loc[ind, 'LABEL'] + f"-{chr(96 + i)}" + + M.loc[ind, 'LABEL'] = ( + M.loc[ind, 'LABEL'] + + f"-{chr(96 + i)}" + ) + else: st = True + M.index = M['LABEL'].str.strip() M['LCR'] = M['LCR'].fillna('') - # Ensure all papers are included as both rows and columns - WLCR = cocMatrix(reactive.Value(M), Field="LCR", sep=sep) - - # Trova le LABEL mancanti - missing_LABEL = set(M.index) - set(WLCR.columns) - - # Aggiungi colonne per le LABEL mancanti con valori 0 (in un'unica operazione per evitare frammentazione) - if missing_LABEL: - missing_df = pd.DataFrame(0, index=WLCR.index, columns=list(missing_LABEL)) - WLCR = pd.concat([WLCR, missing_df], axis=1) + # PATCH: original code wrapped M in reactive.Value(M) before passing to cocMatrix. + # reactive.Value is a Shiny-specific object that only works inside a running + # Shiny application. When called from a notebook or test script, reactive is + # not available and this crashes with a NameError. + # Fixed by passing M directly since cocMatrix already handles plain DataFrames. + WLCR = cocMatrix( + M, + Field="LCR", + sep=sep + ) + + if WLCR is not None: - num_ones = (WLCR.values == 1).sum() - print(f"\nFound {len(M[M['LCS'] > 0])} documents with non-empty Local Citations (LCS)\n") + missing_LABEL = set(M.index) - set(WLCR.columns) + + if missing_LABEL: + + missing_df = pd.DataFrame( + 0, + index=WLCR.index, + columns=list(missing_LABEL) + ) + + WLCR = pd.concat([WLCR, missing_df], axis=1) + + print( + f"\nFound {len(M[M['LCS'] > 0])} documents with non-empty Local Citations (LCS)\n" + ) results = { 'NetMatrix': WLCR, @@ -156,66 +324,158 @@ def scopus(M, min_citations=0, sep=";", network=True): print("\nScopus DB:\nProcessing citations...\n") - # Process the citations + required_cols = ['CR', 'SR'] + + for col in required_cols: + if col not in M.columns: + print(f"Missing required column: {col}") + return None + + # ENSURE CR LISTS + def _parse_cr(x, sep): + if isinstance(x, list): + return x + if pd.isna(x): + return [] + s = str(x).strip() + if s.startswith('['): + try: + return ast.literal_eval(s) + except (ValueError, SyntaxError): + pass + return [i.strip() for i in s.split(sep)] + + M['CR'] = M['CR'].apply(lambda x: _parse_cr(x, sep)) + CR = M['CR'] + CR = pd.DataFrame({ 'SR_citing': np.repeat(M['SR'], CR.str.len()), 'ref': [item for sublist in CR for item in sublist] }) - - # Extract publication year (PY) and author (AU) from the citation - CR['PY'] = CR['ref'].str.extract(r'.*\((\d{4})\).*').astype(float) - CR['AU'] = CR['ref'].str.extract(r'^(.*?),').apply(lambda x: x.str.replace('.', '').str.strip()) - CR['PP'] = CR['ref'].str.extract(r'PP\. (\d+-\d+)') - - # Filter valid citations + + CR['PY'] = pd.to_numeric( + CR['ref'].str.extract(r'.*\((\d{4})\).*')[0], + errors='coerce' + ) + + CR['AU'] = ( + CR['ref'] + .str.extract(r'^(.*?),')[0] + .str.replace('.', '', regex=False) + .str.strip() + ) + + CR['PP'] = CR['ref'].str.extract(r'PP\. (\d+-\d+)')[0] + CR = CR.dropna(subset=['PY']) + print(f"\nFiltered {len(CR)} valid citations...\n") - # Prepare the M dataframe for the join + # SAFE OPTIONAL COLUMNS + optional_cols = ['AU', 'BP', 'EP'] + + for col in optional_cols: + if col not in M.columns: + M[col] = "" + M_merge = M[['AU', 'PY', 'BP', 'EP', 'SR']].copy() - M_merge['AU'] = M_merge['SR'].str.extract(r'^(.*?),').apply(lambda x: x.str.replace('.', '').str.strip()) + + M_merge['AU'] = ( + M_merge['SR'] + .str.extract(r'^(.*?),')[0] + .str.replace('.', '', regex=False) + .str.strip() + ) + M_merge['BP'] = pd.to_numeric(M_merge['BP'], errors='coerce') M_merge['EP'] = pd.to_numeric(M_merge['EP'], errors='coerce') - M_merge['PP'] = M_merge.apply(lambda row: f"{row['BP']}-{row['EP']}" if pd.notna(row['BP']) else np.nan, axis=1) + + M_merge['PP'] = M_merge.apply( + lambda row: f"{row['BP']}-{row['EP']}" + if pd.notna(row['BP']) + else np.nan, + axis=1 + ) + M_merge['Included'] = True + M_merge.rename(columns={'SR': 'SR_cited'}, inplace=True) - - # Join CR with M_merge to find matches + CR = CR.merge(M_merge, on=['PY', 'AU'], how='left') + CR = CR[CR['Included'].notna()] + print(f"\nFound {len(CR)} matching citations...\n") - - # Calculate the Local Citation Score (LCS) + LCS = CR.groupby('SR_cited').size().reset_index(name='LCS') - - # Merge LCS scores with M - M = M.merge(LCS, left_on='SR', right_on='SR_cited', how='left').fillna({'LCS': 0}) + + M = M.merge( + LCS, + left_on='SR', + right_on='SR_cited', + how='left' + ).fillna({'LCS': 0}) + print(f"\nCalculated Local Citation Scores (LCS) for {len(M)} papers...\n") - - # Select and rename columns for historical data - histData = M[['SR_FULL', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC']].copy() - histData.columns = ['Paper', 'Title', 'Author_Keywords', 'KeywordsPlus', 'DOI', 'Year', 'LCS', 'GCS'] + + # SAFE OPTIONAL FIELDS + output_cols = ['SR_FULL', 'TI', 'DE', 'ID', 'DI'] + + for col in output_cols: + if col not in M.columns: + M[col] = "" + + histData = M[ + ['SR_FULL', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC'] + ].copy() + + histData.columns = [ + 'Paper', + 'Title', + 'Author_Keywords', + 'KeywordsPlus', + 'DOI', + 'Year', + 'LCS', + 'GCS' + ] + histData = histData.sort_values(by='Year').reset_index(drop=True) - - # Build the co-citation matrix if network is True + WLCR = None + if network: + print("\nBuilding co-citation matrix...\n") - - # Add self-citations to ensure each document cites itself - CRadd = pd.DataFrame({'SR_citing': M['SR'].unique(), 'SR_cited': M['SR'].unique(), 'value': 1}) - + + CRadd = pd.DataFrame({ + 'SR_citing': M['SR'].unique(), + 'SR_cited': M['SR'].unique(), + 'value': 1 + }) + WLCR = CR[['SR_citing', 'SR_cited']].copy() + WLCR['value'] = 1 + WLCR = pd.concat([WLCR, CRadd]).drop_duplicates() - - WLCR = WLCR.pivot_table(index='SR_citing', columns='SR_cited', values='value', fill_value=0) - - # Filter only the rows corresponding to cited documents - WLCR = WLCR.loc[WLCR.index.isin(CRadd['SR_cited'])] - print(f"\nCo-citation matrix built with {WLCR.shape[0]} rows and {WLCR.shape[1]} columns...\n") - + + WLCR = WLCR.pivot_table( + index='SR_citing', + columns='SR_cited', + values='value', + fill_value=0 + ) + + WLCR = WLCR.loc[ + WLCR.index.isin(CRadd['SR_cited']) + ] + + print( + f"\nCo-citation matrix built with {WLCR.shape[0]} rows and {WLCR.shape[1]} columns...\n" + ) + results = { 'NetMatrix': WLCR, 'histData': histData, @@ -223,4 +483,4 @@ def scopus(M, min_citations=0, sep=";", network=True): 'LCS': M['LCS'].tolist() } - return results + return results \ No newline at end of file diff --git a/www/services/io_utils.py b/www/services/io_utils.py new file mode 100644 index 000000000..92c0f8e30 --- /dev/null +++ b/www/services/io_utils.py @@ -0,0 +1,100 @@ +""" +io_utils.py +----------- +Shared CSV read/write helpers for the standardized WoS-schema DataFrame. + +These functions are the single source of truth for how multi-value +columns (AU, AF, C1, AU_CO, DE, ID, CR) are serialized to and +deserialized from CSV. Both the ETL demo notebook and the dashboard's +"Load Bibliometrix file(s)" (CSV) import path must use these functions +instead of re-implementing the logic separately, to avoid the two +copies drifting out of sync. + +Per spec Section 4.2 ("Delimiter Standard"), multi-value fields are +joined/split using ";" as the internal delimiter — NOT the raw Python +list repr (e.g. "['a', 'b']"), which is fragile, harder to read, and +was the source of a bug where the dashboard's CSV import silently +produced empty lists for every multi-value column. + +IMPORTANT — delimiter collision with PubMed-style "CR" content: +Individual reference strings (CR) commonly contain a literal ";" as +natural punctuation, e.g. "J Acoust Soc Am. 2025 Dec 1;158(6):4243-4267. +doi: 10.1121/10.0041768.". If that ";" were left unescaped, splitting +on ";" would incorrectly cut a single reference into two, inflating +counts like "References" in the dashboard. To prevent this, any ";" +that occurs *inside* an individual list item is escaped to a private-use +sentinel character before joining, and restored after splitting. The +on-disk delimiter remains ";" (spec-compliant and human-readable); only +the round-trip through save/load needs to be aware of the escaping. +""" +import pandas as pd + +LIST_COLUMNS = ["AU", "AF", "C1", "CR", "DE", "ID", "AU_CO"] +STR_COLUMNS = [ + "DB", "UT", "DI", "PMID", "TI", "SO", "JI", "PY", "DT", "LA", + "RP", "AB", "VL", "IS", "BP", "EP", "SR", "SR_FULL", +] + +# Private-use sentinel standing in for a literal ";" that belongs to the +# *content* of a list item, as opposed to the ";" used as the separator +# between items. U+E000 is in the Unicode Private Use Area, so it will +# never collide with real bibliographic text. +_ESCAPED_SEMICOLON = "\uE000" + + +def _escape_item(item: str) -> str: + """Protect any literal ';' inside a single list item before joining.""" + return item.replace(";", _ESCAPED_SEMICOLON) + + +def _unescape_item(item: str) -> str: + """Restore literal ';' inside a single list item after splitting.""" + return item.replace(_ESCAPED_SEMICOLON, ";") + + +def save_standardized_csv(df: pd.DataFrame, path: str) -> None: + """ + Save a standardized DataFrame to CSV, joining list columns with the + ";" delimiter required by the spec, instead of letting pandas write + the raw Python representation of the lists. + + Any ";" that is part of an individual item's own text (e.g. PubMed + citation punctuation in CR) is escaped first, so it survives the + round trip without being mistaken for the separator. + """ + out = df.copy() + for col in LIST_COLUMNS: + if col in out.columns: + out[col] = out[col].apply( + lambda l: ";".join(_escape_item(str(v)) for v in l) if isinstance(l, list) else "" + ) + out.to_csv(path, index=False) + + +def load_standardized_csv(path) -> pd.DataFrame: + """ + Reload a CSV written by save_standardized_csv() (or any CSV using the + ";"-delimited multi-value convention) and restore the original Python + types: list[str] for multi-value fields (split on ";", with escaped + internal ";" restored), non-null str for scalar fields, and int for TC. + + `path` can be a file path or a file-like/datapath object accepted by + pandas.read_csv. + """ + df = pd.read_csv(path, keep_default_na=False, na_values=[]) + + for col in LIST_COLUMNS: + if col in df.columns: + df[col] = df[col].apply( + lambda x: [_unescape_item(v.strip()) for v in x.split(";") if v.strip()] + if isinstance(x, str) else (x if isinstance(x, list) else []) + ) + + for col in STR_COLUMNS: + if col in df.columns: + df[col] = df[col].apply(lambda x: "" if pd.isna(x) else str(x)) + + if "TC" in df.columns: + df["TC"] = pd.to_numeric(df["TC"], errors="coerce").fillna(0).astype(int) + + return df \ No newline at end of file diff --git a/www/services/mappings.py b/www/services/mappings.py new file mode 100644 index 000000000..590401b70 --- /dev/null +++ b/www/services/mappings.py @@ -0,0 +1,75 @@ +""" +mappings.py +----------- +Mapping dictionaries for PubMed and OpenAlex API responses. + +Each dictionary maps raw JSON field names (exactly as returned by the API) +to the standard WoS field tags used internally by Bibliometrix. +""" + +# --------------------------------------------------------------------------- +# PubMed mapping +# Raw JSON field name → WoS tag +# --------------------------------------------------------------------------- + +PUBMED_MAPPING = { + "uid": "UT", + "title": "TI", + "fulljournalname": "SO", # full name → journal source + "source": "JI", # abbreviated name → ISO abbreviation + "pubdate": "PY", + "volume": "VL", + "issue": "IS", + "lang": "LA", + "doctype": "DT", + "lastauthor": "RP", +} + +# Fields that need special parsing logic (handled in standardizer.py): +# "authors" → AU, AF (list of dicts, need to extract 'name' key) +# "articleids" → DI, PMID (list of dicts, need to match 'idtype') +# "pages" → BP, EP (single string like "123-145", need to split) +# "references" → CR (list, needs formatting) + +# Fields with no PubMed equivalent (standardizer.py fills these with [] or ""): +# AB - abstract not returned by eSummary API +# C1 - affiliations not in eSummary +# DE - author keywords not in eSummary +# ID - Keywords Plus, WoS-exclusive, always [] +# TC - times cited not in eSummary, always 0 +# SR - calculated field, computed last + +# --------------------------------------------------------------------------- +# OpenAlex mapping +# Raw JSON field name → WoS tag +# --------------------------------------------------------------------------- + +OPENALEX_MAPPING = { + "id": "UT", + "doi": "DI", + "title": "TI", + "publication_year": "PY", + "language": "LA", + #"cited_by_count": "TC", + "type": "DT", +} + +# Fields that need special parsing logic (handled in standardizer.py): +# "primary_location" → SO, JI (nested dict) +# "authorships" → AU, AF, C1, RP (list of dicts) +# "referenced_works" → CR (list of OpenAlex IDs, needs formatting) +# "keywords" → DE (list of dicts, extract 'display_name') +# "abstract_inverted_index"→ AB (inverted index, needs reconstruction) +# "biblio" → VL, IS, BP, EP (nested dict) + +# Fields with no OpenAlex equivalent (standardizer.py fills these): +# ID - Keywords Plus, WoS-exclusive, always [] +# PMID - PubMed-exclusive, always "" +# DB - not from API, we set this to "OPENALEX" ourselves +# SR - calculated field, computed last + + + + + + diff --git a/www/services/metatagextraction.py b/www/services/metatagextraction.py index 5e1f8b9c8..493d68547 100644 --- a/www/services/metatagextraction.py +++ b/www/services/metatagextraction.py @@ -1,262 +1,426 @@ -from .utils import * - - -def metaTagExtraction(df, Field="AU_CO", sep=";", aff_disamb=False): - """ - Extract metadata tags from a DataFrame based on the specified field. - - Args: - df: A DataFrame object containing the data. - Field: The field to extract metadata tags from. - sep: The separator used to split the metadata tags. - aff_disamb: A boolean value indicating whether to disambiguate the affiliations. - - Returns: - A DataFrame with the extracted metadata tags. - """ - M = df.get() - - if Field == "SR": - M = SR(M) - - if Field == "CR_AU": - M = CR_AU(M) - - if Field == "CR_SO": - M = CR_SO(M) - - if Field == "AU_CO": - M = AU_CO(M) - - if Field == "AU1_CO": - M = AU1_CO(M) - - if Field == "AU_UN": - if aff_disamb: - M = AU_UN(M, sep) - else: - M["AU_UN"] = M["C1"].str.replace(r"\[.*?\] ", "", regex=True) - M["AU1_UN"] = M["RP"].str.split(sep).apply(lambda l: l[0] if isinstance(l, list) else l) - ind = M["AU1_UN"].str.find("),") - a = ind[ind > -1].index - M.loc[a, "AU1_UN"] = M.loc[a, "AU1_UN"].str[ind[a] + 2:] - - df.set(M) - - return df - - -def SR(M): - listAU = M["AU"].apply(lambda l: [x.strip() for x in l]) - if M["DB"].iloc[0].lower() == "scopus": - listAU = listAU.apply(lambda l: [x.replace(" ", ",").replace(",,", ",").replace(" ", "") for x in l]) - FirstAuthors = listAU.apply(lambda l: l[0] if len(l) > 0 else "NA").str.replace(",", " ") - - no_art = M["JI"] == "" - M.loc[no_art, "JI"] = M.loc[no_art, "SO"] - J9 = M["JI"].str.replace(".", " ", regex=False).str.strip() - SR = FirstAuthors + ", " + M["PY"].astype(str) + ", " + J9 - - M["SR_FULL"] = SR.str.replace(r"\s+", " ", regex=True) - - st = i = 0 - while st == 0: - ind = SR.duplicated() - if ind.any(): - i += 1 - SR[ind] = SR[ind] + "-" + chr(96 + i) - else: - st = 1 - M["SR"] = SR.str.replace(r"\s+", " ", regex=True) - - return M - - -# TO BE DONE -def CR_AU(M): - listCAU = M["CR"].apply(lambda x: x if isinstance(x, list) else []).apply(lambda l: [x for x in l if len(x) > 10]) - FCAU = listCAU.apply(lambda l: [x.split(",")[0].strip() for x in l]) - M["CR_AU"] = FCAU.apply(lambda l: ";".join(l)) - - return M - - -def CR_SO(M): - listCAU = M["CR"].apply(lambda x: x if isinstance(x, list) else []) - if M["DB"].iloc[0].upper() != "SCOPUS": - FCAU = listCAU.apply(lambda l: [x.split(",")[2].strip() for x in l if len(x.split(",")) > 2]) - else: - FCAU = listCAU.apply(lambda l: [x.split(",")[0].strip() for x in l if len(x.split(",")) > 2]) - - M["CR_SO"] = FCAU.apply(lambda l: ";".join(l) if l else None) # da checkare - - return M - - -def AU_CO(M, log=False): - # Read the list of countries - with open("www/static/countries.txt", "r") as file: - countries = file.read().splitlines() - - # Extract the countries from the affiliations - M["AU_CO"] = None - C1 = M["C1"] - - # Convert empty lists in C1 using the values from RP - C1 = M["C1"].fillna(M["RP"]) - - for i in range(len(C1)): - # Check if the element is an empty list - if isinstance(C1.iloc[i], list) and not C1.iloc[i]: - if pd.notna(M["RP"].iloc[i]): # Check if "RP" is valid - C1.at[i] = [M["RP"].iloc[i]] # Use at to assign directly - else: # If "RP" is also empty, assign an empty list - C1.at[i] = [] - - # Extract the countries from the affiliations - results = [] - for i in range(len(M)): - countries_found = [] - for c1 in C1.iloc[i]: - if pd.notna(c1): - ind = [c.upper() for c in countries if re.search(r'\b' + re.escape(c.upper()) + r'\b', c1.split(",")[-1].strip().upper())] - countries_found.extend(ind) - results.append(countries_found) - - # Assign results to the AU_CO column - M["AU_CO"] = results - - # Replace country names with standardized names - M["AU_CO"] = M["AU_CO"].apply(lambda countries: [country.replace("UNITED STATES", "USA") - .replace("RUSSIAN FEDERATION", "RUSSIA") - .replace("TAIWAN", "CHINA") - .replace("ENGLAND", "UNITED KINGDOM") - .replace("SCOTLAND", "UNITED KINGDOM") - .replace("WALES", "UNITED KINGDOM") - .replace("NORTH IRELAND", "UNITED KINGDOM") - for country in countries]) - - if log: - with open("affiliations.txt", "w", encoding="utf-8") as file: - for affiliation in M["AU_CO"]: - file.write(f"{affiliation}\n") - - return M - - -def AU1_CO(M, log=False): - # Read the list of countries - with open("www/static/countries.txt", "r") as file: - countries = file.read().splitlines() - - # Initialize the AU1_CO column - M["AU1_CO"] = None - C1 = M["C1"] - - # Convert empty lists in C1 using the values from RP - C1 = M["C1"].fillna(M["RP"]) - - for i in range(len(C1)): - # Check if the element is an empty list - if isinstance(C1.iloc[i], list) and not C1.iloc[i]: - if pd.notna(M["RP"].iloc[i]): # Check if "RP" is valid - C1.at[i] = [M["RP"].iloc[i]] # Use at to assign directly - else: # If "RP" is also empty, assign an empty list - C1.at[i] = [] - - # Extract the first country found in the affiliations - results = [] - for i in range(len(M)): - first_country = None - for c1 in C1.iloc[i]: - if pd.notna(c1): - # Extract the last part of the affiliation string (typically the country) - last_part = c1.split(",")[-1].strip().upper() - # Search for the first matching country - for country in countries: - if re.search(r'\b' + re.escape(country.upper()) + r'\b', last_part): - first_country = country.upper() - break - if first_country: - break # Stop after finding the first country - results.append(first_country) - - # Assign results to the AU1_CO column - M["AU1_CO"] = results - - # Replace country names with standardized names - M["AU1_CO"] = M["AU1_CO"].apply(lambda country: country.replace("UNITED STATES", "USA") - .replace("RUSSIAN FEDERATION", "RUSSIA") - .replace("TAIWAN", "CHINA") - .replace("ENGLAND", "UNITED KINGDOM") - .replace("SCOTLAND", "UNITED KINGDOM") - .replace("WALES", "UNITED KINGDOM") - .replace("NORTH IRELAND", "UNITED KINGDOM") - if pd.notna(country) else None) - - if log: - with open("first_author_countries.txt", "w", encoding="utf-8") as file: - for affiliation in M["AU1_CO"]: - file.write(f"{affiliation}\n") - - return M - - -# TO BE DONE -def AU_UN(M, sep): - C1 = M["C1"].fillna(M["RP"]) - AFF = C1.str.replace(r"\[.*?\] ", "", regex=True) - indna = AFF.isna() - AFF[indna] = M["RP"][indna] - AFF = AFF.str.strip() - listAFF = AFF.str.split(sep) - - uTags = ["UNIV", "COLL", "SCH", "INST", "ACAD", "ECOLE", "CTR", "SCI", "CENTRE", "CENTER", "CENTRO", "HOSP", "ASSOC", "COUNCIL", - "FONDAZ", "FOUNDAT", "ISTIT", "LAB", "TECH", "RES", "CNR", "ARCH", "SCUOLA", "PATENT OFF", "CENT LIB", "HEALTH", "NATL", - "LIBRAR", "CLIN", "FDN", "OECD", "FAC", "WORLD BANK", "POLITECN", "INT MONETARY FUND", "CLIMA", "METEOR", "OFFICE", "ENVIR", - "CONSORTIUM", "OBSERVAT", "AGRI", "MIT ", "INFN", "SUNY "] - - def extract_affiliations(l): - index = [] - for item in l: - item = item.replace("(REPRINT AUTHOR)", "") - affL = item.split(",") - indd = [i for i, aff in enumerate(affL) if any(tag in aff for tag in uTags)] - if not indd: - index.append("NOTREPORTED") - elif any(char.isdigit() for char in affL[indd[0]]): - index.append("NOTDECLARED") - else: - index.append(affL[indd[0]]) - return ";".join(index) - - M["AU_UN"] = listAFF.apply(extract_affiliations) - if M["DB"].iloc[0] in ["ISI", "OPENALEX"] and "C3" in M.columns: - M["AU_UN"].loc[M["C3"].notna() & (M["C3"] != "")] = M["C3"] - M["AU_UN"] = M["AU_UN"].str.split(sep).apply(lambda l: sep.join([x.strip() for x in l])) - - M["AU_UN"] = M["AU_UN"].str.replace(r"\\&", "AND", regex=True).str.replace("&", "AND", regex=False) - - RP = M["RP"].fillna(M["C1"]) - AFF = RP.str.replace(r"\[.*?\] ", "", regex=True) - indna = AFF.isna() - AFF[indna] = M["RP"][indna] - AFF = AFF.str.strip() - listAFF = AFF.str.split(sep) - - M["AU1_UN"] = listAFF.apply(extract_affiliations) - M["AU1_UN"] = M["AU1_UN"].str.replace(r"\\&", "AND", regex=True).str.replace("&", "AND", regex=False) - - M["AU_UN_NR"] = None - listAFF2 = M["AU_UN"].str.split(sep) - cont = listAFF2.apply(lambda l: [i for i, x in enumerate(l) if x == "NR"]) - - for i, indices in enumerate(cont): - if indices: - M.at[i, "AU_UN_NR"] = ";".join([listAFF.iloc[i][j] for j in indices]) - - M["AU_UN"] = M["AU_UN"].replace({"NOTDECLARED": None, "NOTREPORTED": None}) - M["AU_UN"] = M["AU_UN"].str.replace("NOTREPORTED;", "", regex=False).str.replace(";NOTREPORTED", "", regex=False) - M["AU_UN"] = M["AU_UN"].str.replace("NOTDECLARED;", "", regex=False).str.replace("NOTDECLARED", "", regex=False) - - return M +from .utils import * + + +def metaTagExtraction(df, Field="AU_CO", sep=";", aff_disamb=False): + """ + Extract metadata tags from a DataFrame based on the specified field. + Supports both pandas DataFrame and Shiny reactive.Value. + """ + + # PATCH: original code used hasattr(df, "get") to check if df is a Shiny + # reactive object. However pandas DataFrames also have a .get() method, + # so this check always returned True for plain DataFrames, causing + # df.get() to be called without arguments — which crashes because pandas + # .get() requires a column name as argument. + # Fixed by using isinstance(df, pd.DataFrame) instead: + # - if it's a DataFrame → just copy it directly + # - if it's a Shiny reactive object → use .get() to unwrap it first + if isinstance(df, pd.DataFrame): + M = df.copy() + else: + M = df.get().copy() + + if Field == "SR": + M = SR(M) + + if Field == "CR_AU": + M = CR_AU(M) + + if Field == "CR_SO": + M = CR_SO(M) + + if Field == "AU_CO": + M = AU_CO(M) + + if Field == "AU1_CO": + M = AU1_CO(M) + + if Field == "AU_UN": + if aff_disamb: + M = AU_UN(M, sep) + else: + M["AU_UN"] = M["C1"].apply( + lambda x: ";".join(x) if isinstance(x, list) else str(x) + ).str.replace(r"\[.*?\] ", "", regex=True) + + M["AU1_UN"] = M["RP"].astype(str).str.split(sep).apply( + lambda l: l[0] if isinstance(l, list) else l + ) + + ind = M["AU1_UN"].str.find("),") + a = ind[ind > -1].index + M.loc[a, "AU1_UN"] = M.loc[a, "AU1_UN"].str[ind[a] + 2:] + + return M + +def SR(M): + listAU = M["AU"].apply(lambda l: [x.strip() for x in l]) + if M["DB"].iloc[0].lower() == "scopus": + listAU = listAU.apply( + lambda l: [x.replace(" ", ",").replace(",,", ",").replace(" ", "") for x in l] + ) + FirstAuthors = listAU.apply( + lambda l: l[0] if len(l) > 0 else "NA" + ).str.replace(",", " ") + + no_art = M["JI"] == "" + M.loc[no_art, "JI"] = M.loc[no_art, "SO"] + J9 = M["JI"].str.replace(".", " ", regex=False).str.strip() + SR = FirstAuthors + ", " + M["PY"].astype(str) + ", " + J9 + + M["SR_FULL"] = SR.str.replace(r"\s+", " ", regex=True) + + st = i = 0 + + # PATCH: original while loop caused infinite loop in pandas >= 2.0. + # Fixed by using a simple dictionary to track duplicates. + # Also handles NaN values by converting to string first. + SR = SR.fillna("").astype(str).reset_index(drop=True) + seen = {} + for idx in SR.index: + val = SR.loc[idx] + if val in seen: + seen[val] += 1 + SR.loc[idx] = val + "-" + chr(96 + seen[val]) + else: + seen[val] = 0 + M["SR"] = SR.str.replace(r"\s+", " ", regex=True) + + return M + + +def _pubmed_cr_is_journal_only(ref): + """ + Detects PubMed's NLM-style abbreviated citation, which carries no + author list, e.g. 'Nat Hum Behav. 2019 Oct;3(10):1045-1046. doi: ...'. + PubMed's CR field also contains full reference-list citations (with + authors, e.g. 'Waudby, C. A., Dobson, C. M. ... (2019).'), so this + check lets CR_AU/CR_SO route each entry to the right extractor instead + of assuming one WoS-style format for everything. + + A real NLM journal abbreviation never contains a comma, but some + APA-style author citations ('Carlsson, G. 2009. "Topology and + Data."...') accidentally match the same period-then-year shape. The + comma check filters those out so they aren't mistaken for a journal + name. + """ + m = re.match(r'^([^.]+)\.\s+\d{4}\b', ref) + return bool(m) and ',' not in m.group(1) + + +def _pubmed_cr_source(ref): + """ + Extracts the journal/source name from a PubMed NLM-style abbreviated + citation (the text before the first period). Only meaningful when + _pubmed_cr_is_journal_only(ref) is True — full reference-list + citations are deliberately not handled here, since their journal name + sits after the title text and can't be reliably isolated from it with + a simple pattern (the title itself often contains periods). + """ + m = re.match(r'^([^.]+)\.', ref) + return m.group(1).strip() if m else None + + +def _pubmed_cr_first_author(ref): + """ + Extracts the first author's surname from a PubMed full reference-list + citation, e.g. 'Waudby, C. A., Dobson, C. M. & Christodoulou, J. ... + (2019).' — the surname always precedes the first comma in this shape. + Guarded on two conditions so it doesn't misfire on the journal-only + shape (which has no author at all) or on unrelated comma-containing + fragments: the string must not be journal-only, and must end in a + parenthesised year, which is the reliable marker of this citation + style. + """ + if _pubmed_cr_is_journal_only(ref): + return None + if not re.search(r'\(\d{4}\)\.?\s*$', ref): + return None + m = re.match(r'^([^,]+),', ref) + return m.group(1).strip() if m else None + + +def CR_AU(M): + listCAU = M["CR"].apply( + lambda x: x if isinstance(x, list) else [] + ).apply(lambda l: [x for x in l if len(x) > 10]) + + # PATCH 8: PubMed's CR field mixes formats that the original WoS-style + # split(",")[0] logic handles badly — it returns the whole string + # (mistaken for an author) on the journal-only NLM shape, which has no + # comma at all, and the wrong fragment on this dataset's Nature-style + # reference-list shape. PUBMED now routes through a dedicated + # extractor; all other databases keep the original WoS logic. + if M["DB"].iloc[0].upper() == "PUBMED": + FCAU = listCAU.apply( + lambda l: [a for a in (_pubmed_cr_first_author(x) for x in l) if a] + ) + else: + FCAU = listCAU.apply(lambda l: [x.split(",")[0].strip() for x in l]) + + M["CR_AU"] = FCAU.apply(lambda l: ";".join(l)) + + return M + + +def CR_SO(M): + listCAU = M["CR"].apply(lambda x: x if isinstance(x, list) else []) + + if M["DB"].iloc[0].upper() == "SCOPUS": + FCAU = listCAU.apply( + lambda l: [x.split(",")[0].strip() for x in l if len(x.split(",")) > 2] + ) + elif M["DB"].iloc[0].upper() == "PUBMED": + # PATCH 9: PubMed's CR field isn't comma-delimited like WoS, so + # x.split(",")[2] either grabs the wrong fragment or skips the + # entry outright (fewer than 3 commas). The source name is only + # unambiguous on the journal-only NLM shape (text before the + # first period); reference-list citations are skipped here rather + # than guessed at, since their journal name can't be reliably + # separated from the title text. + FCAU = listCAU.apply( + lambda l: [ + s for s in ( + _pubmed_cr_source(x) for x in l + if _pubmed_cr_is_journal_only(x) + ) + if s + ] + ) + else: + FCAU = listCAU.apply( + lambda l: [x.split(",")[2].strip() for x in l if len(x.split(",")) > 2] + ) + + # PATCH 2: originale usava None per righe vuote (lambda l: ";".join(l) if l else None). + # None in una colonna stringa causa crash su operazioni .str.* downstream. + # → sostituito con "" (stringa vuota) per sicurezza e coerenza col modulo. + M["CR_SO"] = FCAU.apply(lambda l: ";".join(l) if l else "") + + return M + + +def AU_CO(M, log=False): + # NOTA: path hardcoded — da parametrizzare in futuro se il working directory + # può variare tra ambienti (dev / prod / test). + with open("www/static/countries.txt", "r") as file: + countries = file.read().splitlines() + + M["AU_CO"] = None + C1 = M["C1"] + + # PATCH 3: fillna può produrre numpy.float64 (NaN numerico) quando sia C1 + # che RP sono NaN — il loop sottostante tenta di iterare su quel float e crasha. + # .infer_objects(copy=False) silenziona anche il FutureWarning pandas 3.x sul + # downcast implicito di fillna. + # Dopo fillna forziamo ogni cella non-lista a [] per garantire iterabilità. + C1 = M["C1"].fillna(M["RP"]).infer_objects(copy=False) + C1 = C1.apply(lambda x: x if isinstance(x, list) else ([] if pd.isna(x) else [x])) + + # NOTA: loop O(n) esplicito — accettabile per dataset tipici, + # ma vectorizzabile con .apply per grandi volumi. + for i in range(len(C1)): + if isinstance(C1.iloc[i], list) and not C1.iloc[i]: + if pd.notna(M["RP"].iloc[i]): + C1.at[i] = [M["RP"].iloc[i]] + else: + C1.at[i] = [] + + results = [] + for i in range(len(M)): + countries_found = [] + for c1 in C1.iloc[i]: + if pd.notna(c1): + # PATCH 4: normalizza la stringa di input PRIMA della ricerca regex + # in modo che "Russian Federation" venga mappato a "Russia" nel + # dizionario countries.txt, dove è listato come "Russia". + # Senza questa normalizzazione il replace post-match non scatta mai + # perché il paese non viene trovato in primo luogo. + last_part = ( + c1.split(",")[-1].strip().upper() + .replace("RUSSIAN FEDERATION", "RUSSIA") + .replace("UNITED STATES", "USA") + .replace("ENGLAND", "UNITED KINGDOM") + .replace("SCOTLAND", "UNITED KINGDOM") + .replace("WALES", "UNITED KINGDOM") + .replace("NORTH IRELAND", "UNITED KINGDOM") + ) + ind = [ + c.upper() for c in countries + if re.search( + r'\b' + re.escape(c.upper()) + r'\b', + last_part + ) + ] + countries_found.extend(ind) + results.append(countries_found) + + M["AU_CO"] = results + + M["AU_CO"] = M["AU_CO"].apply( + lambda countries: [ + country + .replace("UNITED STATES", "USA") + .replace("RUSSIAN FEDERATION", "RUSSIA") + .replace("TAIWAN", "CHINA") + .replace("ENGLAND", "UNITED KINGDOM") + .replace("SCOTLAND", "UNITED KINGDOM") + .replace("WALES", "UNITED KINGDOM") + .replace("NORTH IRELAND", "UNITED KINGDOM") + for country in countries + ] + ) + + if log: + with open("affiliations.txt", "w", encoding="utf-8") as file: + for affiliation in M["AU_CO"]: + file.write(f"{affiliation}\n") + + return M + + +def AU1_CO(M, log=False): + # NOTA: stesso path hardcoded di AU_CO — stessa raccomandazione. + with open("www/static/countries.txt", "r") as file: + countries = file.read().splitlines() + + M["AU1_CO"] = None + C1 = M["C1"] + + # PATCH 3 (AU1_CO): stesso fix di AU_CO — fillna può produrre float NaN + # non iterabile quando sia C1 che RP sono NaN. + # .infer_objects(copy=False) silenziona il FutureWarning pandas 3.x. + C1 = M["C1"].fillna(M["RP"]).infer_objects(copy=False) + C1 = C1.apply(lambda x: x if isinstance(x, list) else ([] if pd.isna(x) else [x])) + + # NOTA: loop O(n) esplicito — vedere commento in AU_CO. + for i in range(len(C1)): + if isinstance(C1.iloc[i], list) and not C1.iloc[i]: + if pd.notna(M["RP"].iloc[i]): + C1.at[i] = [M["RP"].iloc[i]] + else: + C1.at[i] = [] + + results = [] + for i in range(len(M)): + first_country = None + for c1 in C1.iloc[i]: + if pd.notna(c1): + # PATCH 4 (AU1_CO): normalizza prima della ricerca — stesso + # motivo di AU_CO (Russian Federation non presente in countries.txt). + last_part = ( + c1.split(",")[-1].strip().upper() + .replace("RUSSIAN FEDERATION", "RUSSIA") + .replace("UNITED STATES", "USA") + .replace("ENGLAND", "UNITED KINGDOM") + .replace("SCOTLAND", "UNITED KINGDOM") + .replace("WALES", "UNITED KINGDOM") + .replace("NORTH IRELAND", "UNITED KINGDOM") + ) + for country in countries: + if re.search(r'\b' + re.escape(country.upper()) + r'\b', last_part): + first_country = country.upper() + break + if first_country: + break + results.append(first_country) + + M["AU1_CO"] = results + + # PATCH 5: originale ritornava None per paese non trovato (else None). + # Sostituito con "" per coerenza con il resto del modulo. + # ATTENZIONE: i consumer di AU1_CO che usano `if country is None` + # devono essere aggiornati a `if not country` per catturare anche "". + M["AU1_CO"] = M["AU1_CO"].apply( + lambda country: country + .replace("UNITED STATES", "USA") + .replace("RUSSIAN FEDERATION", "RUSSIA") + .replace("TAIWAN", "CHINA") + .replace("ENGLAND", "UNITED KINGDOM") + .replace("SCOTLAND", "UNITED KINGDOM") + .replace("WALES", "UNITED KINGDOM") + .replace("NORTH IRELAND", "UNITED KINGDOM") + if pd.notna(country) else "" + ) + + if log: + with open("first_author_countries.txt", "w", encoding="utf-8") as file: + for affiliation in M["AU1_CO"]: + file.write(f"{affiliation}\n") + + return M + + +def AU_UN(M, sep): + C1 = M["C1"].fillna(M["RP"]) + AFF = C1.str.replace(r"\[.*?\] ", "", regex=True) + indna = AFF.isna() + AFF[indna] = M["RP"][indna] + AFF = AFF.str.strip() + listAFF = AFF.str.split(sep) + + uTags = [ + "UNIV", "COLL", "SCH", "INST", "ACAD", "ECOLE", "CTR", "SCI", + "CENTRE", "CENTER", "CENTRO", "HOSP", "ASSOC", "COUNCIL", + "FONDAZ", "FOUNDAT", "ISTIT", "LAB", "TECH", "RES", "CNR", + "ARCH", "SCUOLA", "PATENT OFF", "CENT LIB", "HEALTH", "NATL", + "LIBRAR", "CLIN", "FDN", "OECD", "FAC", "WORLD BANK", "POLITECN", + "INT MONETARY FUND", "CLIMA", "METEOR", "OFFICE", "ENVIR", + "CONSORTIUM", "OBSERVAT", "AGRI", "MIT ", "INFN", "SUNY " + ] + + def extract_affiliations(l): + index = [] + for item in l: + item = item.replace("(REPRINT AUTHOR)", "") + affL = item.split(",") + indd = [i for i, aff in enumerate(affL) if any(tag in aff for tag in uTags)] + if not indd: + index.append("NOTREPORTED") + elif any(char.isdigit() for char in affL[indd[0]]): + index.append("NOTDECLARED") + else: + index.append(affL[indd[0]]) + return ";".join(index) + + M["AU_UN"] = listAFF.apply(extract_affiliations) + + if M["DB"].iloc[0] in ["ISI", "OPENALEX"] and "C3" in M.columns: + # PATCH 6: originale usava M["AU_UN"].loc[...] = ... su una Series. + # Sintassi deprecata che causa SettingWithCopyWarning e può non + # modificare il DataFrame sottostante in alcune versioni di pandas. + # → corretto con M.loc[condition, "AU_UN"] = ... (forma raccomandata). + M.loc[M["C3"].notna() & (M["C3"] != ""), "AU_UN"] = M["C3"] + M["AU_UN"] = M["AU_UN"].str.split(sep).apply( + lambda l: sep.join([x.strip() for x in l]) + ) + + M["AU_UN"] = M["AU_UN"].str.replace(r"\\&", "AND", regex=True).str.replace("&", "AND", regex=False) + + RP = M["RP"].fillna(M["C1"]) + AFF = RP.str.replace(r"\[.*?\] ", "", regex=True) + indna = AFF.isna() + AFF[indna] = M["RP"][indna] + AFF = AFF.str.strip() + listAFF = AFF.str.split(sep) + + M["AU1_UN"] = listAFF.apply(extract_affiliations) + M["AU1_UN"] = M["AU1_UN"].str.replace(r"\\&", "AND", regex=True).str.replace("&", "AND", regex=False) + + M["AU_UN_NR"] = None + listAFF2 = M["AU_UN"].str.split(sep) + cont = listAFF2.apply(lambda l: [i for i, x in enumerate(l) if x == "NR"]) + + for i, indices in enumerate(cont): + if indices: + M.at[i, "AU_UN_NR"] = ";".join([listAFF.iloc[i][j] for j in indices]) + + # PATCH 7: originale usava None come valore di replace + # (replace({"NOTDECLARED": None, "NOTREPORTED": None})). + # None in una colonna stringa causa crash su operazioni .str.* successive. + # → sostituito con "" (stringa vuota) per sicurezza e coerenza col modulo. + M["AU_UN"] = M["AU_UN"].replace({"NOTDECLARED": "", "NOTREPORTED": ""}) + M["AU_UN"] = M["AU_UN"].str.replace("NOTREPORTED;", "", regex=False).str.replace(";NOTREPORTED", "", regex=False) + M["AU_UN"] = M["AU_UN"].str.replace("NOTDECLARED;", "", regex=False).str.replace("NOTDECLARED", "", regex=False) + + return M \ No newline at end of file diff --git a/www/services/networkplot.py b/www/services/networkplot.py index 156cfbfd0..a777c6f88 100644 --- a/www/services/networkplot.py +++ b/www/services/networkplot.py @@ -25,41 +25,62 @@ def network_plot(NetMatrix, normalize=None, n=None, degree=None, Title="Plot", t # Compute node degrees deg = np.array(bsk_network.degree()) - bsk_network.vs["deg"] = deg + + # PATCH: if graph has no nodes, return None immediately + if len(bsk_network.vs) == 0 or len(deg) == 0: + return None + + # Assign deg attribute to vertices + bsk_network.vs["deg"] = deg.tolist() # Node sizes if size_cex: - bsk_network.vs["size"] = (deg / max(deg)) * size + max_deg = max(deg) if max(deg) > 0 else 1 + bsk_network.vs["size"] = (deg / max_deg * size).tolist() else: bsk_network.vs["size"] = [size] * len(bsk_network.vs) # Label sizes if label_cex: - lsize = np.log(1 + (deg / max(deg))) * labelsize - lsize[lsize < 0.5] = 0.5 # Minimum label size is fixed to 0.5 - bsk_network.vs["label_size"] = lsize + max_deg = max(deg) if max(deg) > 0 else 1 + lsize = np.log(1 + (deg / max_deg)) * labelsize + lsize[lsize < 0.5] = 0.5 + bsk_network.vs["label_size"] = lsize.tolist() else: - bsk_network.vs["label_size"] = labelsize + bsk_network.vs["label_size"] = [labelsize] * len(bsk_network.vs) # Filter vertices based on degree or number if degree is not None: + deg = np.array(bsk_network.degree()) Deg = deg - np.diag(NetMatrix) Vind = Deg < degree if np.sum(~Vind) == 0: print("\ndegree argument is too high!\n\n") - return + return None indices_to_delete = np.where(Vind)[0] bsk_network.delete_vertices(indices_to_delete) if bsk_S is not None: bsk_S.delete_vertices(indices_to_delete) + # PATCH: recompute deg after filtering + deg = np.array(bsk_network.degree()) + bsk_network.vs["deg"] = deg.tolist() + elif n is not None: - if n > NetMatrix.shape[0]: - n = NetMatrix.shape[0] + deg = np.array(bsk_network.degree()) + if n > len(deg): + n = len(deg) nodes = np.argsort(deg)[-n:] indices_to_delete = np.setdiff1d(np.arange(len(deg)), nodes) bsk_network.delete_vertices(indices_to_delete) if bsk_S is not None: bsk_S.delete_vertices(indices_to_delete) + # PATCH: recompute deg after filtering + deg = np.array(bsk_network.degree()) + bsk_network.vs["deg"] = deg.tolist() + + # PATCH: check again after filtering + if len(bsk_network.vs) == 0: + return None # Simplify the graph if edges_min > 1: @@ -70,19 +91,24 @@ def network_plot(NetMatrix, normalize=None, n=None, degree=None, Title="Plot", t # Process edge weights if "weight" not in bsk_network.es.attributes(): - bsk_network.es["weight"] = bsk_network.es["width"] = 1 + bsk_network.es["weight"] = [1] * len(bsk_network.es) + bsk_network.es["width"] = [1] * len(bsk_network.es) if weighted: weights = np.array(bsk_network.es["weight"]) - normalized_weights = (weights - weights.min()) / (weights.max() - weights.min()) - bsk_network.es["width"] = normalized_weights * edgesize + if len(weights) > 0 and weights.max() != weights.min(): + normalized_weights = (weights - weights.min()) / (weights.max() - weights.min()) + else: + normalized_weights = np.ones(len(weights)) + bsk_network.es["width"] = (normalized_weights * edgesize).tolist() else: if remove_multiple: - bsk_network.es["width"] = edgesize + bsk_network.es["width"] = [edgesize] * len(bsk_network.es) else: edges = np.array(bsk_network.es["weight"]) - normalized_edges = edges / max(edges) - bsk_network.es["width"] = normalized_edges * edgesize + max_edge = max(edges) if len(edges) > 0 and max(edges) > 0 else 1 + normalized_edges = edges / max_edge + bsk_network.es["width"] = (normalized_edges * edgesize).tolist() # Remove edges below threshold if edges_min > 0: @@ -98,11 +124,18 @@ def network_plot(NetMatrix, normalize=None, n=None, degree=None, Title="Plot", t if bsk_S is not None: isolates_to_remove = [v.index for v in bsk_S.vs if v["name"] not in bsk_network.vs["name"]] bsk_S.delete_vertices(isolates_to_remove) + # PATCH: recompute deg after removing isolates + deg = np.array(bsk_network.degree()) + bsk_network.vs["deg"] = deg.tolist() + + # PATCH: check again after removing isolates + if len(bsk_network.vs) == 0: + return None # Apply clustering cl = clustering_network(bsk_network, cluster) - bsk_network = cl["bsk_network"] + if bsk_S is not None: bsk_S.vs["color"] = bsk_network.vs["color"] bsk_S.vs["community"] = bsk_network.vs["community"] @@ -122,20 +155,23 @@ def network_plot(NetMatrix, normalize=None, n=None, degree=None, Title="Plot", t if label: LABEL = list(bsk_network.vs["name"]) if label_n is not None: - q = 1 - (label_n / len(bsk_network.vs["deg"])) + # PATCH: safely get deg attribute + deg_vals = bsk_network.vs["deg"] if "deg" in bsk_network.vs.attributes() else bsk_network.degree() + q = 1 - (label_n / len(deg_vals)) if len(deg_vals) > 0 else 1 if q <= 0: - bsk_network.vs["label_size"] = 10 + bsk_network.vs["label_size"] = [10] * len(bsk_network.vs) else: if q > 1: q = 1 - q = np.quantile(bsk_network.vs["deg"], q) - for i, deg_val in enumerate(bsk_network.vs["deg"]): + q = np.quantile(deg_vals, q) + for i, deg_val in enumerate(deg_vals): if deg_val < q: LABEL[i] = "" - bsk_network.vs["label_size"] = 10 - for i, deg_val in enumerate(bsk_network.vs["deg"]): + label_sizes = [10] * len(bsk_network.vs) + for i, deg_val in enumerate(deg_vals): if deg_val < q: - bsk_network.vs["label_size"][i] = 0 + label_sizes[i] = 0 + bsk_network.vs["label_size"] = label_sizes if label_color: lab_color = bsk_network.vs["color"] @@ -150,12 +186,12 @@ def network_plot(NetMatrix, normalize=None, n=None, degree=None, Title="Plot", t bsk_network["asp"] = 0 bsk_network["layout"] = l bsk_network["main"] = Title - bsk_network.es["curved"] = curved - bsk_network.vs["label_dist"] = 0.7 + bsk_network.es["curved"] = [curved] * len(bsk_network.es) + bsk_network.vs["label_dist"] = [0.7] * len(bsk_network.vs) bsk_network.vs["frame_color"] = adjust_color('black', alpha) bsk_network.vs["color"] = [adjust_color(c, alpha) for c in bsk_network.vs["color"]] bsk_network.vs["label_color"] = adjust_color('black', min(1, alpha + 0.1)) - bsk_network.vs["label_font"] = 2 + bsk_network.vs["label_font"] = [2] * len(bsk_network.vs) bsk_network.vs["label"] = LABEL # Plot the network @@ -195,49 +231,55 @@ def delete_isolates(graph, mode='all'): def clustering_network(bsk_network, cluster): - # Determina i colori disponibili colorlist = color_list() - # Determina il clustering in base al metodo specificato - if cluster == "none": - net_groups = {"membership": [1] * len(bsk_network.vs)} - elif cluster == "optimal": - net_groups = bsk_network.community_optimal_modularity() - elif cluster == "leiden": - net_groups = bsk_network.community_leiden(objective_function="modularity", n_iterations=3, resolution_parameter=0.75) - elif cluster == "louvain": - net_groups = bsk_network.community_multilevel() - elif cluster == "fast_greedy": - net_groups = bsk_network.community_fastgreedy().as_clustering() - elif cluster == "leading_eigen": - net_groups = bsk_network.community_leading_eigenvector() - elif cluster == "spinglass": - net_groups = bsk_network.community_spinglass() - elif cluster == "infomap": - net_groups = bsk_network.community_infomap() - elif cluster == "edge_betweenness": - net_groups = bsk_network.community_edge_betweenness().as_clustering() - elif cluster == "walktrap": - net_groups = bsk_network.community_walktrap().as_clustering() - else: - print("\nUnknown cluster argument. Using default algorithm\n") - net_groups = bsk_network.community_walktrap().as_clustering() + # PATCH: wrap clustering in try/except — some algorithms fail on small or + # disconnected graphs. Fall back to single-cluster assignment on failure. + try: + if cluster == "none": + net_groups = type('FallbackClustering', (), {'membership': [0] * len(bsk_network.vs)})() + elif cluster == "optimal": + net_groups = bsk_network.community_optimal_modularity() + elif cluster == "leiden": + net_groups = bsk_network.community_leiden(objective_function="modularity", n_iterations=3, resolution_parameter=0.75) + elif cluster == "louvain": + net_groups = bsk_network.community_multilevel() + elif cluster == "fast_greedy": + net_groups = bsk_network.community_fastgreedy().as_clustering() + elif cluster == "leading_eigen": + net_groups = bsk_network.community_leading_eigenvector() + elif cluster == "spinglass": + net_groups = bsk_network.community_spinglass() + elif cluster == "infomap": + net_groups = bsk_network.community_infomap() + elif cluster == "edge_betweenness": + net_groups = bsk_network.community_edge_betweenness().as_clustering() + elif cluster == "walktrap": + net_groups = bsk_network.community_walktrap().as_clustering() + else: + print("\nUnknown cluster argument. Using default algorithm\n") + net_groups = bsk_network.community_walktrap().as_clustering() + except Exception as e: + print(f"Clustering failed ({e}), falling back to single cluster.") + net_groups = type('FallbackClustering', (), {'membership': [0] * len(bsk_network.vs)})() - # Assegna il cluster a ogni nodo bsk_network.vs["community"] = net_groups.membership - # Converte la lista di colori RGBA in esadecimale colorlist_hex = [rgba_to_hex(c) for c in colorlist] - # Assegna colori ai nodi e agli archi (ora in formato esadecimale) bsk_network.vs["color"] = [colorlist_hex[m % len(colorlist)] for m in net_groups.membership] el = np.array(bsk_network.get_edgelist()) - bsk_network.es["color"] = [ - "#B3B3B3" if bsk_network.vs[el[i, 0]]["community"] != bsk_network.vs[el[i, 1]]["community"] - else colorlist_hex[bsk_network.vs[el[i, 0]]["community"] % len(colorlist)] - for i in range(len(el)) - ] - bsk_network.es["lty"] = [5 if c == "#B3B3B3" else 1 for c in bsk_network.es["color"]] + + if len(el) > 0: + bsk_network.es["color"] = [ + "#B3B3B3" if bsk_network.vs[el[i, 0]]["community"] != bsk_network.vs[el[i, 1]]["community"] + else colorlist_hex[bsk_network.vs[el[i, 0]]["community"] % len(colorlist)] + for i in range(len(el)) + ] + bsk_network.es["lty"] = [5 if c == "#B3B3B3" else 1 for c in bsk_network.es["color"]] + else: + bsk_network.es["color"] = [] + bsk_network.es["lty"] = [] return {"bsk_network": bsk_network, "net_groups": net_groups} @@ -248,18 +290,18 @@ def switch_layout(bsk_network, type, community_repulsion): row = np.array(bsk_network.get_edgelist()) membership = bsk_network.vs["community"] - if bsk_network.es["weight"] is None: - bsk_network.es["weight"] = [ - weight_community(row[i], membership, community_repulsion, 1) - for i in range(len(row)) - ] - else: - bsk_network.es["weight"] = [ - bsk_network.es["weight"][i] + weight_community(row[i], membership, community_repulsion, 1) - for i in range(len(row)) - ] + if len(row) > 0: + if bsk_network.es["weight"] is None: + bsk_network.es["weight"] = [ + weight_community(row[i], membership, community_repulsion, 1) + for i in range(len(row)) + ] + else: + bsk_network.es["weight"] = [ + bsk_network.es["weight"][i] + weight_community(row[i], membership, community_repulsion, 1) + for i in range(len(row)) + ] - # Determina il layout if type == "auto": l = bsk_network.layout_auto() elif type == "circle": @@ -277,11 +319,13 @@ def switch_layout(bsk_network, type, community_repulsion): else: l = bsk_network.layout_auto() - # Normalizza manualmente il layout + # PATCH: avoid division by zero when all coordinates are identical l_coords = np.array(l.coords) min_coords = l_coords.min(axis=0) max_coords = l_coords.max(axis=0) - normalized_coords = (l_coords - min_coords) / (max_coords - min_coords) + range_coords = max_coords - min_coords + range_coords[range_coords == 0] = 1 + normalized_coords = (l_coords - min_coords) / range_coords l = ig.Layout(normalized_coords.tolist()) return {"l": l, "bsk_network": bsk_network} @@ -323,4 +367,4 @@ def normalize_similarity(NetMatrix, type="association"): def rgba_to_hex(rgba): r, g, b, a = rgba - return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255)) + return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255)) \ No newline at end of file diff --git a/www/services/standardizer.py b/www/services/standardizer.py new file mode 100644 index 000000000..20ec0bd49 --- /dev/null +++ b/www/services/standardizer.py @@ -0,0 +1,480 @@ +""" +standardizer.py +--------------- +Transform phase of the ETL pipeline. + +Takes a list of raw dictionaries from api_retriever.py and converts +them into a pandas DataFrame with the standard WoS column schema. + +Main entry point: + standardize(records, source) → pd.DataFrame +""" +import re +import pandas as pd +from www.services.mappings import PUBMED_MAPPING, OPENALEX_MAPPING + + +# NOTE: SR() is copied directly from www/services/metatagextraction.py +# rather than imported. This is intentional. +# metatagextraction.py uses a relative import (from .utils import *) +# which only works when the file is loaded as part of a package. +# When imported directly in a notebook or standalone script, Python +# doesn't know it's part of a package and crashes with: +# "attempted relative import with no known parent package" +# Copying the function here avoids that problem entirely. + +def SR(M): + def format_author(name): + """Convert 'John Smith' or 'Smith, John' to 'Smith J' format.""" + name = name.strip() + if not name or name == "NA": + return "NA" + # Handle "Surname, Firstname" format + if "," in name: + parts = name.split(",") + surname = parts[0].strip() + first = parts[1].strip() + initial = first[0] if first else "" + return f"{surname} {initial}".strip() + # Handle "Firstname Surname" format (OpenAlex style) vs. + # "Surname Initials" format (PubMed style, e.g. "Scharf C", "de Mattos BP") + parts = name.split() + if len(parts) >= 2: + # PubMed style: last token is short, all-uppercase initials + if parts[-1].isupper() and len(parts[-1]) <= 3 and not parts[0].isupper(): + surname = " ".join(parts[:-1]) + initial = parts[-1][0] + return f"{surname} {initial}" + # OpenAlex/WoS style: "Firstname [Middle] Surname" + surname = parts[-1] + initial = parts[0][0] + return f"{surname} {initial}" + return parts[0] if parts else "NA" + + listAU = M["AU"].apply(lambda l: [x.strip() for x in l] if isinstance(l, list) else []) + FirstAuthors = listAU.apply(lambda l: format_author(l[0]) if len(l) > 0 else "NA") + no_art = M["JI"] == "" + M.loc[no_art, "JI"] = M.loc[no_art, "SO"] + J9 = M["JI"].str.replace(".", " ", regex=False).str.strip().str.upper() + SR_col = FirstAuthors + ", " + M["PY"].astype(str) + ", " + J9 + M["SR_FULL"] = SR_col.str.replace(r"\s+", " ", regex=True) + st = i = 0 + while st == 0: + ind = SR_col.duplicated() + if ind.any(): + i += 1 + SR_col[ind] = SR_col[ind] + "-" + chr(96 + i) + else: + st = 1 + M["SR"] = SR_col.str.replace(r"\s+", " ", regex=True) + return M + + +def apply_mapping(record: dict, mapping: dict) -> dict: + """ + Renames raw API field names to WoS tags using the mapping dictionary. + Only processes fields that appear in the mapping. + Returns a new dictionary with WoS tag keys. + """ + result = {} + for raw_field, wos_tag in mapping.items(): + result[wos_tag] = record.get(raw_field, "") + return result + + +# --------------------------------------------------------------------------- +# PubMed parsers +# --------------------------------------------------------------------------- + +def parse_pubmed_authors(record: dict) -> list: + """ + Extracts author names from PubMed's raw authors field. + Returns a list of strings e.g. ["Hudelo J", "Garot J"] + """ + authors = record.get("authors", []) + result = [] + for author in authors: + name = author.get("name", "") + if name: + result.append(name) + return result + + +def parse_pubmed_articleids(record: dict) -> dict: + """ + Extracts DOI and PMID from PubMed's raw articleids field. + Returns a dict with keys DI and PMID. + """ + articleids = record.get("articleids", []) + result = {"DI": "", "PMID": ""} + for item in articleids: + idtype = item.get("idtype", "") + value = item.get("value", "") + if idtype == "doi": + result["DI"] = value + elif idtype == "pubmed": + result["PMID"] = value + return result + + +def parse_pubmed_pages(record: dict) -> dict: + """ + Extracts beginning and end page from PubMed's raw pages field. + Returns a dict with keys BP and EP. + """ + pages = record.get("pages", "") + result = {"BP": "", "EP": ""} + if "-" in pages: + parts = pages.split("-") + result["BP"] = parts[0].strip() + result["EP"] = parts[1].strip() + else: + result["BP"] = pages.strip() + return result + + +def parse_pubmed_references(record: dict) -> list: + """ + Extracts cited references (CR) from PubMed's raw references field. + Returns a list of strings, one per reference. + """ + references = record.get("references", []) + result = [] + for ref in references: + value = ref.get("refsource", "") + if value: + result.append(value) + return result + + +def standardize_pubmed(record: dict) -> dict: + """ + Converts a single raw PubMed record into a WoS-schema dictionary. + Calls all parsing functions and fills missing fields with safe defaults. + """ + # Step 1: rename simple fields + result = apply_mapping(record, PUBMED_MAPPING) + + # PY — extract 4-digit year from raw pubdate string e.g. "2026 Jun 6" → "2026" + py_raw = result.get("PY", "") + match = re.match(r"(\d{4})", str(py_raw)) + result["PY"] = match.group(1) if match else "" + + # LA comes as a list from PubMed e.g. ['eng'], extract first element + la = record.get("lang", "") + result["LA"] = la[0] if isinstance(la, list) and len(la) > 0 else "" + + # Step 2: handle complex fields + result["AU"] = parse_pubmed_authors(record) + result["AF"] = parse_pubmed_authors(record) + result.update(parse_pubmed_articleids(record)) + result.update(parse_pubmed_pages(record)) + result["CR"] = parse_pubmed_references(record) + + # Step 3: fill missing fields with safe defaults + result["AB"] = "" + result["C1"] = [] + result["AU_CO"] = [] + result["DE"] = [] + result["ID"] = [] + result["TC"] = 0 + result["DB"] = "PUBMED" + result["SR"] = "" + + # Spec requirement: no NaN or None allowed in final output + # Replace None with "" for string fields and [] for list fields + str_cols = ["UT", "DI", "PMID", "TI", "SO", "JI", "PY", "DT", "LA", "RP", "AB", "VL", "IS", "BP", "EP", "SR"] + list_cols = ["AU", "AF", "C1", "AU_CO", "CR", "DE", "ID"] + + for col in str_cols: + if result.get(col) is None or (isinstance(result.get(col), float)): + result[col] = "" + + for col in list_cols: + if result.get(col) is None: + result[col] = [] + + return result + + +# --------------------------------------------------------------------------- +# OpenAlex parsers +# --------------------------------------------------------------------------- + +def parse_openalex_location(record: dict) -> dict: + """ + Extracts journal name and ISO abbreviation from OpenAlex's + primary_location field. + Returns a dict with keys SO and JI. + """ + result = {"SO": "", "JI": ""} + location = record.get("primary_location", {}) + if not location: + return result + source = location.get("source", {}) + if not source: + return result + name = source.get("display_name") or "" + result["SO"] = name.upper() + result["JI"] = name + return result + + +def parse_openalex_authorships(record: dict) -> dict: + """ + Extracts authors, full names, affiliations and reprint author + from OpenAlex's raw authorships field. + Returns a dict with keys AU, AF, C1, RP. + """ + authorships = record.get("authorships", []) + result = {"AU": [], "AF": [], "C1": [], "RP": ""} + + for authorship in authorships: + # AU — short display name + display_name = authorship.get("author", {}).get("display_name", "") + if display_name: + result["AU"].append(display_name) + + # AF — raw author name as recorded in the paper + raw_name = authorship.get("raw_author_name", "") + if raw_name: + result["AF"].append(raw_name) + + # C1 — all affiliation strings for this author + affiliations = authorship.get("raw_affiliation_strings", []) + result["C1"].extend(affiliations) + + # RP — corresponding author name + affiliation + if authorship.get("is_corresponding", False): + affiliations_str = "; ".join(affiliations) + result["RP"] = f"{raw_name} (corresponding author), {affiliations_str}" + + return result + + +def parse_openalex_abstract(record: dict) -> str: + """ + Reconstructs the abstract (AB) from OpenAlex's inverted index format. + The inverted index maps each word to a list of positions. + Returns the abstract as a plain string, or "" if missing. + """ + inverted_index = record.get("abstract_inverted_index", None) + if not inverted_index: + return "" + + # find the total number of words + max_position = 0 + for positions in inverted_index.values(): + for pos in positions: + if pos > max_position: + max_position = pos + + # place each word at its correct position + words = [""] * (max_position + 1) + for word, positions in inverted_index.items(): + for pos in positions: + words[pos] = word + + return " ".join(words) + + +def parse_openalex_biblio(record: dict) -> dict: + """ + Extracts volume, issue, and page numbers from OpenAlex's biblio field. + Returns a dict with keys VL, IS, BP, EP. + """ + biblio = record.get("biblio", {}) + result = { + "VL": biblio.get("volume", "") or "", + "IS": biblio.get("issue", "") or "", + "BP": biblio.get("first_page", "") or "", + "EP": biblio.get("last_page", "") or "", + } + return result + + +def parse_openalex_keywords(record: dict) -> list: + """ + Extracts author keywords (DE) from OpenAlex's keywords field. + Returns a list of strings e.g. ["Python", "Machine learning"] + """ + keywords = record.get("keywords", []) + result = [] + for kw in keywords: + name = kw.get("display_name", "") + if name: + result.append(name) + return result + + +def parse_openalex_countries(record: dict) -> list: + """ + Extracts author countries (AU_CO) from OpenAlex's authorships field. + Each authorship entry contains institution data with a country_code field. + Returns a list of unique country code strings e.g. ["US", "DE"]. + Pre-populating AU_CO here bypasses metaTagExtraction()'s WoS-style + affiliation parser, which cannot handle OpenAlex affiliation strings + and would silently return empty results for non-WoS sources. + get_collaborationnetwork.py checks 'if AU_CO not in m.columns' before + calling metaTagExtraction(), so a pre-populated column is used directly. + """ + authorships = record.get("authorships", []) + countries = [] + for authorship in authorships: + for institution in authorship.get("institutions", []): + country = institution.get("country_code", "") or "" + if country and country not in countries: + countries.append(country.upper()) + return countries + + +def resolve_openalex_references(work_ids: list) -> list: + """ + Resolves a list of OpenAlex work IDs (e.g. 'https://openalex.org/W123') + to WoS-style citation strings ('Smith J, 2019, NAT COMMUN'). + Fetches metadata from the OpenAlex API in batches of 50. + Returns a list of formatted strings. Unresolvable IDs are skipped. + """ + import time + import requests + + if not work_ids: + return [] + + # Strip to bare IDs: 'https://openalex.org/W123' → 'W123' + bare_ids = [w.split("/")[-1] for w in work_ids if w] + + results = [] + batch_size = 50 + + for i in range(0, len(bare_ids), batch_size): + batch = bare_ids[i:i + batch_size] + filter_str = "|".join(batch) + url = "https://api.openalex.org/works" + params = { + "filter": f"openalex_id:{filter_str}", + "per-page": batch_size, + "select": "id,authorships,publication_year,primary_location" + } + try: + response = requests.get(url, params=params, timeout=10) + if response.status_code != 200: + continue + data = response.json() + for work in data.get("results", []): + # First author last name + initial e.g. "Jane Smith" → "Smith J" + authorships = work.get("authorships", []) + au = "" + if authorships: + name = authorships[0].get("author", {}).get("display_name", "") + parts = name.split() + if len(parts) >= 2: + au = parts[-1] + " " + parts[0][0] + elif parts: + au = parts[0] + # Year + py = str(work.get("publication_year", "") or "") + # Journal name + loc = work.get("primary_location") or {} + src = loc.get("source") or {} + so = (src.get("display_name") or "").upper() + if au or py or so: + results.append(f"{au}, {py}, {so}") + except Exception: + pass + time.sleep(0.3) # be polite to the API + + return results + + +def parse_openalex_references(record: dict) -> list: + """ + Extracts cited references (CR) from OpenAlex's referenced_works field. + Resolves raw OpenAlex IDs to WoS-style citation strings via the API + so that citation network features (Historiograph, Cluster by Coupling, + Most Local Cited Sources/Authors/Documents) can match references correctly. + Returns a list of strings formatted as 'FirstAuthor, Year, Journal'. + """ + references = record.get("referenced_works", []) + if not references: + return [] + return resolve_openalex_references(references) + + +def standardize_openalex(record: dict) -> dict: + """ + Converts a single raw OpenAlex record into a WoS-schema dictionary. + Calls all parsing functions and fills missing fields with safe defaults. + """ + # Step 1: rename simple fields + result = apply_mapping(record, OPENALEX_MAPPING) + + # TC — manually managed to avoid None → "" + tc = record.get("cited_by_count", 0) + result["TC"] = int(tc) if tc is not None else 0 + + # Step 2: handle complex fields + result.update(parse_openalex_location(record)) + result.update(parse_openalex_authorships(record)) + result["AB"] = parse_openalex_abstract(record) + result.update(parse_openalex_biblio(record)) + result["DE"] = parse_openalex_keywords(record) + result["AU_CO"] = parse_openalex_countries(record) + result["CR"] = parse_openalex_references(record) + + # DI — strip URL prefix and handle None + doi = record.get("doi", "") or "" + result["DI"] = doi.replace("https://doi.org/", "").replace("http://doi.org/", "") + + # PY — OpenAlex returns an integer, cast to string + py = record.get("publication_year", "") + result["PY"] = str(py) if py is not None else "" + + # Step 3: fill missing fields with safe defaults + result["ID"] = [] + result["PMID"] = "" + result["DB"] = "OPENALEX" + result["SR"] = "" + + # Spec requirement: no NaN or None allowed in final output + str_cols = ["UT", "DI", "PMID", "TI", "SO", "JI", "PY", "DT", "LA", "RP", "AB", "VL", "IS", "BP", "EP", "SR"] + list_cols = ["AU", "AF", "C1", "AU_CO", "CR", "DE", "ID"] + + for col in str_cols: + if result.get(col) is None or (isinstance(result.get(col), float)): + result[col] = "" + + for col in list_cols: + if result.get(col) is None: + result[col] = [] + + return result + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + +def standardize(records: list, source: str) -> pd.DataFrame: + """ + Main entry point for the standardizer. + Takes a list of raw records from api_retriever.py and returns + a pandas DataFrame with the standard WoS column schema. + + Args: + records: list of raw dictionaries from api_retriever.py + source: "pubmed" or "openalex" + """ + standardized = [] + + for record in records: + if source == "pubmed": + standardized.append(standardize_pubmed(record)) + elif source == "openalex": + standardized.append(standardize_openalex(record)) + else: + raise ValueError(f"Unsupported source: {source}. Choose 'pubmed' or 'openalex'.") + + df = pd.DataFrame(standardized) + df = SR(df) + return df diff --git a/www/services/termextraction.py b/www/services/termextraction.py index f7d9a52c1..f89ee73e0 100644 --- a/www/services/termextraction.py +++ b/www/services/termextraction.py @@ -1,4 +1,5 @@ from .utils import * +__all__ = ['term_extraction'] def term_extraction(df, field="TI", ngrams=1, stemming=False, language="english", remove_numbers=True, remove_terms=None, keep_terms=None, synonyms=None, verbose=False): @@ -20,12 +21,14 @@ def term_extraction(df, field="TI", ngrams=1, stemming=False, language="english" Returns: A DataFrame with the extracted terms. """ - M = df.get() + # PATCH: df may be a Shiny reactive Value or a plain DataFrame. + # Track whether df is reactive so we can call df.set() at the end. + is_reactive = hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) + M = df.get() if is_reactive else df.copy() # Load and update stopwords overall_start_time = time.time() - # Load and update stopwords stop_words = set(nltk_stopwords.words(language)) custom_stopwords = {"elsevier", "springer", "mdpi", "using", "however", "-", "present", "proposes", "used", "proposed", "reserved", "recent", "years", "research", "study", "aims", @@ -33,7 +36,7 @@ def term_extraction(df, field="TI", ngrams=1, stemming=False, language="english" "published", "aims", "limitations"} stop_words.update(custom_stopwords) - stop_words = list(stop_words) # Convert to list for compatibility with CountVectorizer + stop_words = list(stop_words) # Convert text to lowercase and remove special characters M[f"{field}_TM"] = M[field].astype(str).str.lower() @@ -80,24 +83,23 @@ def term_extraction(df, field="TI", ngrams=1, stemming=False, language="english" # Combine extracted terms into a list for each document start_time = time.time() - # Get a boolean matrix for terms present (saves operations) (OPTIMIZATION BY GPT from 30 seconds to 0.1 seconds) - non_zero_mask = terms_df.values > 0 # Mask for values > 0 - # Create a list of lists with the actual terms for each document + non_zero_mask = terms_df.values > 0 extracted_terms = [ [terms_df.columns[i].replace("__", "-").replace("_", " ").replace("-", " ") for i in np.where(non_zero_mask[row_idx])[0]] for row_idx in range(non_zero_mask.shape[0]) ] - # Assign the result to the destination column M[f"{field}_TM"] = extracted_terms print(f"Term combination into lists per document done in {time.time() - start_time:.4f} seconds") - # Show results (if verbose is True) if verbose: print(terms_df.sum().sort_values(ascending=False).head(25)) - # Finalize the output - df.set(M) - - return df + # PATCH: only call df.set() if df is a reactive object. + # For plain DataFrames, just return M directly. + if is_reactive: + df.set(M) + return df + else: + return M \ No newline at end of file diff --git a/www/services/thematicmap.py b/www/services/thematicmap.py index 3c313b7f6..149d4983b 100644 --- a/www/services/thematicmap.py +++ b/www/services/thematicmap.py @@ -5,268 +5,249 @@ def thematic_map(df, field="ID", n=250, minfreq=5, ngrams=1, stemming=False, size=0.5, n_labels=1, community_repulsion=0.1, repel=True, remove_terms=None, synonyms=None, cluster="walktrap", subgraphs=False): - # df = metaTagExtraction(df, field=field) - M = df - m = df.get() - - # Set ngrams based on field - ngrams = int(ngrams) if field in ['TI', 'AB'] else 1 - # Set stemming as boolean - stemming = True if stemming == "Yes" else False - minfreq = max(0, int(minfreq * len(m) // 1000)) - - # Preprocess field and create network matrix - if field == "ID": - NetMatrix = biblionetwork(M, analysis="co-occurrences", network="keywords", n=n, sep=";", remove_terms=remove_terms, synonyms=synonyms) - elif field == "DE": - NetMatrix = biblionetwork(M, analysis="co-occurrences", network="author_keywords", n=n, sep=";", remove_terms=remove_terms, synonyms=synonyms) - elif field == "TI": - M = term_extraction(M, field="TI", ngrams=ngrams, verbose=False, stemming=stemming, remove_terms=remove_terms, synonyms=synonyms) - NetMatrix = biblionetwork(M, analysis="co-occurrences", network="titles", n=n, sep=";") - elif field == "AB": - M = term_extraction(M, field="AB", ngrams=ngrams, verbose=False, stemming=stemming, remove_terms=remove_terms, synonyms=synonyms) - NetMatrix = biblionetwork(M, analysis="co-occurrences", network="abstracts", n=n, sep=";") - else: - raise ValueError("Invalid field specified.") - if not NetMatrix.empty: - Net = network_plot(NetMatrix, normalize="association", Title="Keyword co-occurrences", type="auto", - labelsize=n_labels, halo=False, cluster=cluster, remove_isolates=True, - community_repulsion=community_repulsion, remove_multiple=False, noloops=True, - weighted=True, label_cex=True, edgesize=5, size=1, edges_min=1, verbose=False) - else: - print("\n\nNetwork matrix is empty!\nThe analysis cannot be performed\n\n") - return None - - S = Net['S'] - - # Set row and column names to lowercase - NetMatrix.index = NetMatrix.columns = NetMatrix.index.str.lower() - - # Get graph and clusters - net = Net['graph'] - net_groups = Net['cluster_obj'] - group = net_groups.membership - # Extract words and their groups from net_groups - word = net.vs['name'] - color = net.vs['color'] - color = ["#D3D3D3" if c is None else c for c in color] - - # Find common words between NetMatrix and word list - W = list(NetMatrix.index.intersection(word)) - # Get indices from NetMatrix - index = NetMatrix.index.isin(W) - # Get indices from word list - ii = [i for i, w in enumerate(word) if w in W] - # Update word, group and color lists to keep only matched elements - word = [word[i] for i in ii] - group = [group[i] for i in ii] - color = [color[i] for i in ii] - # Calculate diagonal matrix C and subset matrices - C = np.diag(NetMatrix.values) - S = NetMatrix.values # Get the similarity matrix - sEij = pd.DataFrame(S[np.ix_(index, index)], index=NetMatrix.index[index], columns=NetMatrix.columns[index]) - sC = C[index] - - # Create dataframe with word data - df_lab = pd.DataFrame({ - 'sC': sC, - 'words': word, - 'groups': group, - 'color': color, - 'cluster_label': 'NA' - }) - - # Filter and process cluster data - df_lab = (df_lab[df_lab['sC'] >= minfreq] - .groupby('groups') - .apply(lambda x: pd.Series({ - 'freq': x['sC'].sum(), - 'cluster_label': x.loc[x['sC'].idxmax(), 'words'], - 'sC': list(x['sC']), # Se necessario mantenere i valori di sC - 'words': ', '.join(x['words'].astype(str)), # <-- Converte in stringa pulita - 'color': x['color'].iloc[0] # Prende il primo valore della colonna - })) - .reset_index()) - - # Explode both words and sC columns to create rows for each word and its occurrence count - df_lab = df_lab.assign( - words=df_lab['words'].str.split(', '), - sC=df_lab['sC'] # Keep sC as is since it's already a list - ).explode(['words', 'sC']).reset_index(drop=True) - - # Convert to upper triangle matrix and create edge dataframe - index_names = sEij.index - column_names = sEij.columns - sEij = triu(sEij.values) - - df_lab_top = df_lab[['words', 'groups']].reset_index(drop=True) - df_lab_top = df_lab_top.assign(words=df_lab_top['words'].str.split(', ')).explode('words').reset_index(drop=True) - - # Create edge list dataframe - sEij_df = pd.DataFrame(sEij, index=index_names, columns=column_names) - # sEij_df['words1'] = sEij_df.index - - sEij_df = pd.DataFrame(sEij_df.values, index=sEij_df.index, columns=sEij_df.columns) - sEij_df = sEij_df.reset_index(names=['words1']) - sEij_df = pd.melt(sEij_df, id_vars=['words1'], var_name='words2', value_name='eij') - sEij_df = sEij_df[sEij_df['eij'] > 0] - - sEij_df['words1'] = sEij_df['words1'].astype(str) - df_lab_top['words'] = df_lab_top['words'].astype(str) - df_lab['words'] = df_lab['words'].astype(str) - - # Perform left joins equivalent to R's left_join operations - sEij_df = sEij_df.merge(df_lab_top[['words', 'groups']], - left_on='words1', - right_on='words', - how='left') - sEij_df = sEij_df.merge(df_lab_top[['words', 'groups']], - left_on='words2', - right_on='words', - how='left', - suffixes=('', '2')) - - # Drop the extra 'words' columns created by the merge - sEij_df = sEij_df.drop(['words', 'words_y'], axis=1, errors='ignore') - - # Get top row for each group - df_lab_top = (df_lab[['groups', 'cluster_label', 'color', 'freq']] - .groupby('groups') - .first() - .reset_index()) - - # Remove duplicate columns - sEij_df = sEij_df.loc[:, ~sEij_df.columns.duplicated()] - - # Clean the words column by splitting on newlines and taking first value - df_lab['words'] = df_lab['words'].str.split('\n').str[0] - # Clean up words by removing leading numbers and whitespace - df_lab['words'] = df_lab['words'].str.replace(r'^\s*\d+\s*', '', regex=True).str.strip() - - df = sEij_df[ - sEij_df['words1'].isin(df_lab['words'].unique()) & - sEij_df['words2'].isin(df_lab['words'].unique()) - ] - - # Controlliamo se la colonna 'eij' esiste prima di continuare - if 'eij' not in sEij_df.columns: - raise KeyError("La colonna 'eij' non esiste in sEij_df!") - - # Controlliamo il filtraggio per evitare DataFrame vuoti - filtered_df = sEij_df[ - sEij_df['words1'].isin(df_lab['words'].unique()) & + # PATCH: df may be a Shiny reactive Value or a plain DataFrame + M = df + m = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + m = m.copy() + + # Set ngrams based on field + ngrams = int(ngrams) if field in ['TI', 'AB'] else 1 + stemming = True if stemming == "Yes" else False + minfreq = max(0, int(minfreq * len(m) // 1000)) + + # PATCH: extract plain DataFrame for term_extraction calls + M_plain = df.get() if hasattr(df, 'get') and callable(df.get) and not isinstance(df, pd.DataFrame) else df + + # Preprocess field and create network matrix + if field == "ID": + NetMatrix = biblionetwork(M, analysis="co-occurrences", network="keywords", n=n, sep=";", remove_terms=remove_terms, synonyms=synonyms) + elif field == "DE": + NetMatrix = biblionetwork(M, analysis="co-occurrences", network="author_keywords", n=n, sep=";", remove_terms=remove_terms, synonyms=synonyms) + elif field == "TI": + # PATCH: run term_extraction on plain DataFrame to get TI_TM column + M_extracted = term_extraction(M_plain, field="TI", ngrams=ngrams, verbose=False, stemming=stemming, remove_terms=remove_terms, synonyms=synonyms) + # PATCH: wrap in reactive so biblionetwork/cocMatrix can call .get() + NetMatrix = biblionetwork(reactive.Value(M_extracted), analysis="co-occurrences", network="titles", n=n, sep=";") + # PATCH: update m with TI_TM so cluster_assignment can use it + m["TI_TM"] = M_extracted["TI_TM"].values + M = reactive.Value(M_extracted) + elif field == "AB": + # PATCH: same as TI + M_extracted = term_extraction(M_plain, field="AB", ngrams=ngrams, verbose=False, stemming=stemming, remove_terms=remove_terms, synonyms=synonyms) + NetMatrix = biblionetwork(reactive.Value(M_extracted), analysis="co-occurrences", network="abstracts", n=n, sep=";") + # PATCH: update m with AB_TM so cluster_assignment can use it + m["AB_TM"] = M_extracted["AB_TM"].values + M = reactive.Value(M_extracted) + else: + raise ValueError("Invalid field specified.") + + # PATCH: biblionetwork may return None when the keyword matrix is empty + # (e.g. PubMed DE is always empty from eSummary API). + if NetMatrix is not None and not NetMatrix.empty: + Net = network_plot(NetMatrix, normalize="association", Title="Keyword co-occurrences", type="auto", + labelsize=n_labels, halo=False, cluster=cluster, remove_isolates=True, + community_repulsion=community_repulsion, remove_multiple=False, noloops=True, + weighted=True, label_cex=True, edgesize=5, size=1, edges_min=1, verbose=False) + else: + print("\n\nNetwork matrix is empty!\nThe analysis cannot be performed\n\n") + return None, None, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + + # PATCH: network_plot may return None on small/empty graphs + if Net is None: + return None, None, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + + S = Net['S'] + + NetMatrix.index = NetMatrix.columns = NetMatrix.index.str.lower() + + net = Net['graph'] + net_groups = Net['cluster_obj'] + group = net_groups.membership + + word = net.vs['name'] + node_colors = net.vs['color'] + node_colors = ["#D3D3D3" if c is None else c for c in node_colors] + + W = list(NetMatrix.index.intersection(word)) + index = NetMatrix.index.isin(W) + ii = [i for i, w in enumerate(word) if w in W] + word = [word[i] for i in ii] + group = [group[i] for i in ii] + node_colors = [node_colors[i] for i in ii] + + C = np.diag(NetMatrix.values) + S = NetMatrix.values + sEij = pd.DataFrame(S[np.ix_(index, index)], index=NetMatrix.index[index], columns=NetMatrix.columns[index]) + sC = C[index] + + df_lab = pd.DataFrame({ + 'sC': sC, + 'words': word, + 'groups': group, + 'color': node_colors, + 'cluster_label': 'NA' + }) + + df_lab = (df_lab[df_lab['sC'] >= minfreq] + .groupby('groups') + .apply(lambda x: pd.Series({ + 'freq': x['sC'].sum(), + 'cluster_label': x.loc[x['sC'].idxmax(), 'words'], + 'sC': list(x['sC']), + 'words': ', '.join(x['words'].astype(str)), + 'color': x['color'].iloc[0] + })) + .reset_index()) + + df_lab = df_lab.assign( + words=df_lab['words'].str.split(', '), + sC=df_lab['sC'] + ).explode(['words', 'sC']).reset_index(drop=True) + + index_names = sEij.index + column_names = sEij.columns + sEij = triu(sEij.values) + + df_lab_top = df_lab[['words', 'groups']].reset_index(drop=True) + df_lab_top = df_lab_top.assign(words=df_lab_top['words'].str.split(', ')).explode('words').reset_index(drop=True) + + sEij_df = pd.DataFrame(sEij, index=index_names, columns=column_names) + sEij_df = pd.DataFrame(sEij_df.values, index=sEij_df.index, columns=sEij_df.columns) + sEij_df = sEij_df.reset_index(names=['words1']) + sEij_df = pd.melt(sEij_df, id_vars=['words1'], var_name='words2', value_name='eij') + sEij_df = sEij_df[sEij_df['eij'] > 0] + + sEij_df['words1'] = sEij_df['words1'].astype(str) + df_lab_top['words'] = df_lab_top['words'].astype(str) + df_lab['words'] = df_lab['words'].astype(str) + + sEij_df = sEij_df.merge(df_lab_top[['words', 'groups']], + left_on='words1', right_on='words', how='left') + sEij_df = sEij_df.merge(df_lab_top[['words', 'groups']], + left_on='words2', right_on='words', how='left', + suffixes=('', '2')) + sEij_df = sEij_df.drop(['words', 'words_y'], axis=1, errors='ignore') + + df_lab_top = (df_lab[['groups', 'cluster_label', 'color', 'freq']] + .groupby('groups') + .first() + .reset_index()) + + sEij_df = sEij_df.loc[:, ~sEij_df.columns.duplicated()] + + df_lab['words'] = df_lab['words'].str.split('\n').str[0] + df_lab['words'] = df_lab['words'].str.replace(r'^\s*\d+\s*', '', regex=True).str.strip() + + df = sEij_df[ + sEij_df['words1'].isin(df_lab['words'].unique()) & sEij_df['words2'].isin(df_lab['words'].unique()) - ] + ] - if filtered_df.empty: - raise ValueError("Il filtro ha eliminato tutte le righe! Controlla i dati in df_lab['words'] e sEij_df['words1', 'words2'].") + if 'eij' not in sEij_df.columns: + raise KeyError("Column 'eij' does not exist in sEij_df!") - # 3. Filtra correttamente i dati - df = ( - filtered_df - .assign(ext=lambda x: (x['groups'] != x['groups2']).astype(int)) - .groupby('groups') - .agg({ + filtered_df = sEij_df[ + sEij_df['words1'].isin(df_lab['words'].unique()) & + sEij_df['words2'].isin(df_lab['words'].unique()) + ] + + if filtered_df.empty: + raise ValueError( + "The filter removed all rows. " + "Check the data in df_lab['words'] and sEij_df['words1', 'words2']." + ) + + df = ( + filtered_df + .assign(ext=lambda x: (x['groups'] != x['groups2']).astype(int)) + .groupby('groups') + .agg({ 'words1': lambda x: len(set(x)), - 'eij': lambda x: sum(x * x.index), # calculate centrality as sum(eij*ext) - 'ext': lambda x: sum(x.index * (1-x)) # calculate density as sum(eij*(1-ext)) - }) - .rename(columns={ + 'eij': lambda x: sum(x * x.index), + 'ext': lambda x: sum(x.index * (1 - x)) + }) + .rename(columns={ 'words1': 'n', 'eij': 'CallonCentrality', 'ext': 'CallonDensity' - }) - .assign( + }) + .assign( CallonDensity=lambda x: x['CallonDensity'] / x['n'] * 100, RankCentrality=lambda x: x['CallonCentrality'].rank(), RankDensity=lambda x: x['CallonDensity'].rank() - ) - .merge(df_lab_top, on='groups', how='left') - .rename(columns={'cluster_label': 'Cluster', 'freq': 'ClusterFrequency'}) - .reset_index() ) + .merge(df_lab_top, on='groups', how='left') + .rename(columns={'cluster_label': 'Cluster', 'freq': 'ClusterFrequency'}) + .reset_index() + ) - # Calculate plot parameters - meandens = df['RankDensity'].mean() - meancentr = df['RankCentrality'].mean() - rangex = max(meancentr - df['RankCentrality'].min(), df['RankCentrality'].max() - meancentr) - rangey = max(meandens - df['RankDensity'].min(), df['RankDensity'].max() - meandens) - - # Create annotations dataframe for quadrant labels - xlimits = [meancentr - (rangex * 1.2), meancentr + (rangex * 1.2)] - ylimits = [meandens - (rangey * 1.2), meandens + (rangey * 1.2)] - - # Create annotations dataframe - annotations = pd.DataFrame({ - 'xpos': sorted(xlimits + xlimits), - 'ypos': ylimits + ylimits, - 'words': ['Emerging or\nDeclining Themes', 'Niche Themes', 'Basic Themes', 'Motor Themes'], - 'hjustvar': [0, 0, 1, 1], - 'vjustvar': [0, 1, 0, 1] - }) + meandens = df['RankDensity'].mean() + meancentr = df['RankCentrality'].mean() + rangex = max(meancentr - df['RankCentrality'].min(), df['RankCentrality'].max() - meancentr) + rangey = max(meandens - df['RankDensity'].min(), df['RankDensity'].max() - meandens) - # Calculate size parameters - min_size = 5 * (1 + size) # Changed from 10 to 5 to match R version - max_size = 30 * (1 + size) - - # Create base plot using plotly express - fig = px.scatter( - df, - x='RankCentrality', - y='RankDensity', - color=df.index.map(lambda x: f"rgba{tuple(int(c * 255) for c in color[x][:3]) + (0.5,)}" if isinstance(color[x], tuple) else color[x]), # Handle tuple colors - labels={'RankCentrality': 'Relevance degree\n(Centrality)', 'RankDensity': 'Development degree\n(Density)'}, # Updated labels - opacity=0, - ) + xlimits = [meancentr - (rangex * 1.2), meancentr + (rangex * 1.2)] + ylimits = [meandens - (rangey * 1.2), meandens + (rangey * 1.2)] - fig.update_traces(hoverinfo='skip', hovertemplate=None) + annotations = pd.DataFrame({ + 'xpos': sorted(xlimits + xlimits), + 'ypos': ylimits + ylimits, + 'words': ['Emerging or\nDeclining Themes', 'Niche Themes', 'Basic Themes', 'Motor Themes'], + 'hjustvar': [0, 0, 1, 1], + 'vjustvar': [0, 1, 0, 1] + }) + + min_size = 5 * (1 + size) + max_size = 30 * (1 + size) + + fig = px.scatter( + df, + x='RankCentrality', + y='RankDensity', + color=df.index.map(lambda x: f"rgba{tuple(int(c * 255) for c in node_colors[x][:3]) + (0.5,)}" if isinstance(node_colors[x], tuple) else node_colors[x]), + labels={'RankCentrality': 'Relevance degree\n(Centrality)', 'RankDensity': 'Development degree\n(Density)'}, + opacity=0, + ) - # Add quadrant lines with transparency - fig.add_hline(y=meandens, line_dash="dash", line_color="rgba(0,0,0,0.7)") - fig.add_vline(x=meancentr, line_dash="dash", line_color="rgba(0,0,0,0.7)") + fig.update_traces(hoverinfo='skip', hovertemplate=None) + fig.add_hline(y=meandens, line_dash="dash", line_color="rgba(0,0,0,0.7)") + fig.add_vline(x=meancentr, line_dash="dash", line_color="rgba(0,0,0,0.7)") - # Add annotations for quadrants - for _, row in annotations.iterrows(): - fig.add_annotation( - x=row['xpos'], - y=row['ypos'], - text=row['words'], - showarrow=False, + for _, row in annotations.iterrows(): + fig.add_annotation( + x=row['xpos'], y=row['ypos'], + text=row['words'], showarrow=False, xanchor='left' if row['hjustvar'] == 0 else 'right', yanchor='bottom' if row['vjustvar'] == 0 else 'top', - font=dict(size=12*(1+size), color='rgba(32,32,32,0.5)') - ) + font=dict(size=12 * (1 + size), color='rgba(32,32,32,0.5)') + ) + + if size > 0: + text_size = 10 * (1 + size) + + if repel: + for cluster_id, cluster_data in df.groupby('groups'): + cluster_center_x = cluster_data['RankCentrality'].mean() + cluster_center_y = cluster_data['RankDensity'].mean() + cluster_size = cluster_data['ClusterFrequency'].sum() + + top_words = (df_lab[df_lab['groups'] == cluster_id] + .sort_values('sC', ascending=False) + .head(3)['words'] + .str.lower() + .tolist()) + top_words_text = '\n'.join(top_words) + + hover_words = [] + df_sorted = df_lab[df_lab['groups'] == cluster_id].sort_values('sC', ascending=False) + for idx, row in enumerate(df_sorted.head(10).itertuples()): + hover_words.append(f"{row.words}: {row.sC}") + hover_text = '
'.join(hover_words) - # Add labels if size > 0 - if size > 0: - text_size = 10 * (1 + size) - - if repel: - for cluster_id, cluster_data in df.groupby('groups'): - cluster_center_x = cluster_data['RankCentrality'].mean() - cluster_center_y = cluster_data['RankDensity'].mean() - cluster_size = cluster_data['ClusterFrequency'].sum() - - # Get the top three most frequent words for the cluster - top_words = (df_lab[df_lab['groups'] == cluster_id] - .sort_values('sC', ascending=False) - .head(3)['words'] - .str.lower() - .tolist()) - top_words_text = '\n'.join(top_words) - - # Get all words with occurrences for hover text - hover_words = [] - df_sorted = df_lab[df_lab['groups'] == cluster_id].sort_values('sC', ascending=False) - for idx, row in enumerate(df_sorted.head(10).itertuples()): - hover_words.append(f"{row.words}: {row.sC}") - hover_text = '
'.join(hover_words) - - size_bubble = min_size + (max_size - min_size) * np.log1p(cluster_size) / np.log1p(df['n'].max()) * 3 - - # Add labels for the cluster with top three words - fig.add_trace(go.Scatter( + size_bubble = min_size + (max_size - min_size) * np.log1p(cluster_size) / np.log1p(df['n'].max()) * 3 + + fig.add_trace(go.Scatter( x=[cluster_center_x], y=[cluster_center_y], text=[top_words_text.replace('\n', '
')], @@ -285,312 +266,248 @@ def thematic_map(df, field="ID", n=250, minfreq=5, ngrams=1, stemming=False, siz opacity=0.5, ), showlegend=False - )) - - # Update layout - fig.update_layout( - height=800, - showlegend=False, - plot_bgcolor='white', - xaxis=dict( + )) + + fig.update_layout( + height=800, + showlegend=False, + plot_bgcolor='white', + xaxis=dict( title="Relevance degree\n(Centrality)", - showgrid=False, - showticklabels=False, - showline=True, - linewidth=0.5, - linecolor='black', - zeroline=False, - range=xlimits - ), - yaxis=dict( + showgrid=False, showticklabels=False, + showline=True, linewidth=0.5, linecolor='black', + zeroline=False, range=xlimits + ), + yaxis=dict( title="Development degree\n(Density)", - showgrid=False, - showticklabels=False, - showline=True, - linewidth=0.5, - linecolor='black', - zeroline=False, - range=ylimits - ) + showgrid=False, showticklabels=False, + showline=True, linewidth=0.5, linecolor='black', + zeroline=False, range=ylimits ) - fig = go.FigureWidget(fig) - fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], - 'displaylogo': False} - - ############################################################################################################################################## - - # Rename and rearrange columns in df_lab - df_lab.columns = ['Cluster', 'Cluster_Frequency', 'Cluster_Label', 'Occurrences', 'Words', 'Color'] - df_lab = (df_lab - .sort_values('Cluster') - .dropna(subset=['Color']) - .assign(Cluster=lambda x: pd.factorize(x['Cluster'])[0] + 1)) - - # Add centrality measure to words - cluster_res = Net['cluster_res'] - df_lab = df_lab.merge(cluster_res, left_on='Words', right_on='vertex', how='left') - - # Keep only the specified columns - df_lab = df_lab[['Occurrences', 'Words', 'Cluster', 'Cluster_Label', 'btw_centrality', 'clos_centrality', 'pagerank_centrality']] - df = df[['Cluster', 'CallonCentrality', 'CallonDensity', 'RankCentrality', 'RankDensity', 'ClusterFrequency']] - - # Handle document clustering - document_to_clusters = cluster_assignment(M=m, words=df_lab, field=field, remove_terms=remove_terms, synonyms=synonyms, threshold=0.5) - - # Create parameters dictionary and unpack into dataframe - params = { - 'field': field, - 'n': n, - 'minfreq': minfreq, - 'ngrams': ngrams, - 'stemming': stemming, - 'size': size, - 'n_labels': n_labels, - 'community_repulsion': community_repulsion, - 'repel': repel, - 'remove_terms': remove_terms, - 'synonyms': synonyms, - 'cluster': cluster - } - - # Unpack nested params into flat key-value pairs - flat_params = [] - for k,v in params.items(): - if isinstance(v, (list, dict)): - for i,val in enumerate(v): - flat_params.append((f"{k}{i+1}", val)) - else: - flat_params.append((k,v)) - - params_df = pd.DataFrame(flat_params, columns=['params', 'values']) - - # Handle subgraphs - if subgraphs: - gcl = {} - colors = df['color'].unique() - for color in colors: - node_indices = [i for i,v in enumerate(Net['graph'].vs) - if v['color'] == color] - gcl[color] = Net['graph'].subgraph(node_indices) + ) + fig = go.FigureWidget(fig) + fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'], + 'displaylogo': False} + + df_lab.columns = ['Cluster', 'Cluster_Frequency', 'Cluster_Label', 'Occurrences', 'Words', 'Color'] + df_lab = (df_lab + .sort_values('Cluster') + .dropna(subset=['Color']) + .assign(Cluster=lambda x: pd.factorize(x['Cluster'])[0] + 1)) + + cluster_res = Net['cluster_res'] + df_lab = df_lab.merge(cluster_res, left_on='Words', right_on='vertex', how='left') + df_lab = df_lab[['Occurrences', 'Words', 'Cluster', 'Cluster_Label', 'btw_centrality', 'clos_centrality', 'pagerank_centrality']] + df = df[['Cluster', 'CallonCentrality', 'CallonDensity', 'RankCentrality', 'RankDensity', 'ClusterFrequency']] + + document_to_clusters = cluster_assignment(M=m, words=df_lab, field=field, remove_terms=remove_terms, synonyms=synonyms, threshold=0.5) + + params = { + 'field': field, 'n': n, 'minfreq': minfreq, 'ngrams': ngrams, + 'stemming': stemming, 'size': size, 'n_labels': n_labels, + 'community_repulsion': community_repulsion, 'repel': repel, + 'remove_terms': remove_terms, 'synonyms': synonyms, 'cluster': cluster + } + + flat_params = [] + for k, v in params.items(): + if isinstance(v, (list, dict)): + for i, val in enumerate(v): + flat_params.append((f"{k}{i+1}", val)) else: - gcl = None - - ################################## NETWORK VISUALIZATION ################################## - node_opacity = 0.5 - net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line") - net.toggle_physics(False) - - # Use colors from df['adjusted_color'] - unique_clusters = set(Net['cluster_obj'].membership) - cluster_colors = {} - cm_clusters = cluster_res - - # Get unique cluster IDs and their colors - for cluster_id in unique_clusters: - # Generate random RGB values - r = np.random.randint(0, 255) - g = np.random.randint(0, 255) - b = np.random.randint(0, 255) - # Create rgba color with 0.3 opacity - cluster_colors[cluster_id] = f"rgba({r},{g},{b},{node_opacity})" #da aggiustare l'opacity - - # Generate layout - # Using default igraph layout - layout = Net['graph']['layout'] - # Get coordinates from layout - coords = np.array([[pos[0], pos[1]] for pos in layout]) - - # Scale coordinates to fit 800px height - # First normalize to [-1,1] range - coords = coords / np.abs(coords).max() - - # Then scale to target dimensions - # Width will be proportional to maintain aspect ratio - coords[:, 0] *= 1000 # Scale x coordinates - coords[:, 1] *= 400 # Scale y coordinates to fit 800px (centered) - - # Prepare for avoid_net_overlaps - node_labels = [v["name"] if "name" in v.attributes() else f"Node {v.index}" for v in Net['graph'].vs] - node_sizes = [] - nodes = [] - - # Add nodes with matching R visNetwork settings - for idx, vertex in enumerate(Net['graph'].vs): - cluster_id = Net['cluster_obj'].membership[vertex.index] - node_color = cluster_colors[cluster_id] - - # Normalize node sizes - min_deg, max_deg = min(Net['graph'].degree()), max(Net['graph'].degree()) - node_size = 10 if max_deg == min_deg else (15 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 10) - node_size = max(10, min(130, node_size)) - font_size = node_size * 2 - node_sizes.append(node_size) - - # Calculate font opacity using R-like formula - min_font_size = 10 # Minimum node size - max_font_size = 130 # Maximum node size - font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size))*node_opacity + 0.3 - font_opacity = max(0.1, min(1, font_opacity)) # Clamp between 0.1 and 1 - - nodes.append({ - 'id': vertex.index, - 'label': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", - 'title': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", - 'color': node_color, - 'size': node_size, - 'font': { - 'size': font_size, - 'color': f'rgba(0,0,0,{font_opacity})', - }, - 'x': layout[idx][0] * 1000, - 'y': layout[idx][1] * 1000 - }) - - # Remove overlapping labels - noOverlap = True - if noOverlap: - threshold = 0.05 - ymax = np.ptp(coords[:, 1]) # equivalent to diff(range()) - xmax = np.ptp(coords[:, 0]) - threshold2 = threshold * np.mean([xmax, ymax]) - - # Create data structure for overlap checking - labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=threshold2) + flat_params.append((k, v)) + + params_df = pd.DataFrame(flat_params, columns=['params', 'values']) + + if subgraphs: + gcl = {} + unique_colors = df['color'].unique() + for cluster_color in unique_colors: + node_indices = [i for i, v in enumerate(Net['graph'].vs) + if v['color'] == cluster_color] + gcl[cluster_color] = Net['graph'].subgraph(node_indices) + else: + gcl = None + + node_opacity = 0.5 + net = Network(height="98vh", width="100%", notebook=True, cdn_resources="in_line") + net.toggle_physics(False) + + unique_clusters = set(Net['cluster_obj'].membership) + cluster_colors = {} + cm_clusters = cluster_res + + for cluster_id in unique_clusters: + r = np.random.randint(0, 255) + g = np.random.randint(0, 255) + b = np.random.randint(0, 255) + cluster_colors[cluster_id] = f"rgba({r},{g},{b},{node_opacity})" + + layout = Net['graph']['layout'] + coords = np.array([[pos[0], pos[1]] for pos in layout]) + + abs_max = np.abs(coords).max() + if abs_max > 0: + coords = coords / abs_max + coords[:, 0] *= 1000 + coords[:, 1] *= 400 + + node_labels = [v["name"] if "name" in v.attributes() else f"Node {v.index}" for v in Net['graph'].vs] + node_sizes = [] + nodes = [] + + degrees = Net['graph'].degree() + min_deg = min(degrees) if degrees else 0 + max_deg = max(degrees) if degrees else 1 + + for idx, vertex in enumerate(Net['graph'].vs): + cluster_id = Net['cluster_obj'].membership[vertex.index] + node_color = cluster_colors[cluster_id] + + node_size = 10 if max_deg == min_deg else (15 * (vertex.degree() - min_deg) / (max_deg - min_deg) + 10) + node_size = max(10, min(130, node_size)) + font_size = node_size * 2 + node_sizes.append(node_size) + + min_font_size = 10 + max_font_size = 130 + font_opacity = np.sqrt((font_size - min_font_size) / (max_font_size - min_font_size)) * node_opacity + 0.3 + font_opacity = max(0.1, min(1, font_opacity)) + + nodes.append({ + 'id': vertex.index, + 'label': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", + 'title': vertex["name"] if "name" in vertex.attributes() else f"Node {vertex.index}", + 'color': node_color, + 'size': node_size, + 'font': {'size': font_size, 'color': f'rgba(0,0,0,{font_opacity})'}, + 'x': layout[idx][0] * 1000, + 'y': layout[idx][1] * 1000 + }) + + noOverlap = True + if noOverlap: + threshold = 0.05 + ymax = np.ptp(coords[:, 1]) + xmax = np.ptp(coords[:, 0]) + threshold2 = threshold * np.mean([xmax, ymax]) + labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=threshold2) + else: + labels_to_remove = [] + + unique_nodes = {node['id']: node for node in nodes}.values() + for node in unique_nodes: + if node['label'] in labels_to_remove: + node['label'] = '' + net.add_node(node['id'], **node) + + added_edges = set() + edge_weights = [e.attributes().get('weight', 1) for e in Net['graph'].es] + max_weight = max(edge_weights) if edge_weights else 1 + + for edge in Net['graph'].es: + source, target = edge.tuple + cluster_source = Net['cluster_obj'].membership[source] + cluster_target = Net['cluster_obj'].membership[target] + + if cluster_source == cluster_target: + base_color = cluster_colors[cluster_source] + rgba_values = [int(x) for x in base_color[5:-1].split(',')[:-1]] + edge_color = f"rgba({rgba_values[0]},{rgba_values[1]},{rgba_values[2]},0.56)" else: - labels_to_remove = [] - #labels_to_remove = avoid_net_overlaps(coords, node_labels, node_sizes, threshold=0.05) - - # Add nodes to network - unique_nodes = {node['id']: node for node in nodes}.values() - for node in unique_nodes: - if node['label'] in labels_to_remove: - node['label'] = '' - net.add_node(node['id'], **node) - - # Add edges with improved styling matching R implementation - added_edges = set() - edge_weights = [e.attributes().get('weight', 1) for e in Net['graph'].es] - max_weight = max(edge_weights) if edge_weights else 1 - - for edge in Net['graph'].es: - source, target = edge.tuple - cluster_source = Net['cluster_obj'].membership[source] - cluster_target = Net['cluster_obj'].membership[target] - - # Set edge color with proper opacity - if cluster_source == cluster_target: - base_color = cluster_colors[cluster_source] - # Convert rgba to hex with opacity - rgba_values = [int(x) for x in base_color[5:-1].split(',')[:-1]] - edge_color = f"rgba({rgba_values[0]},{rgba_values[1]},{rgba_values[2]},0.56)" - else: - # Use darker gray for inter-cluster edges (equivalent to #69696960 in R) - edge_color = "rgba(105,105,105,0.38)" - - # Calculate edge width similar to R implementation - edge_weight = edge.attributes().get('weight', 1) - normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) # 2.5 is base edge size - - edge_tuple = (source, target) if source < target else (target, source) - - # Add edge if not already added - if edge_tuple not in added_edges: - net.add_edge( - source, - target, - color=edge_color, - width=normalized_weight, - smooth={'type': 'horizontal'}, - dashes=False # Set to True if you have line type information - ) - added_edges.add(edge_tuple) - - # Configure network options to match R visNetwork - node_shadow = False - edit_nodes = False - net.set_options(f""" - var options = {{ - "nodes": {{ - "shadow": {"true" if node_shadow else "false"} - }}, - "edges": {{ - "smooth": {{"type": "horizontal"}} - }}, - "interaction": {{ - "dragNodes": true, - "hideEdgesOnDrag": true, - "navigationButtons": false, - "zoomSpeed": 0.4 - }}, - "physics": {{ - "enabled": false - }}, - "manipulation": {{ - "enabled": {"true" if edit_nodes else "false"} - }} + edge_color = "rgba(105,105,105,0.38)" + + edge_weight = edge.attributes().get('weight', 1) + normalized_weight = (edge_weight ** 2 / (max_weight ** 2)) * (10 + 2.5) + + edge_tuple = (source, target) if source < target else (target, source) + + if edge_tuple not in added_edges: + net.add_edge( + source, target, + color=edge_color, + width=normalized_weight, + smooth={'type': 'horizontal'}, + dashes=False + ) + added_edges.add(edge_tuple) + + node_shadow = False + edit_nodes = False + net.set_options(f""" + var options = {{ + "nodes": {{ + "shadow": {"true" if node_shadow else "false"} + }}, + "edges": {{ + "smooth": {{"type": "horizontal"}} + }}, + "interaction": {{ + "dragNodes": true, + "hideEdgesOnDrag": true, + "navigationButtons": false, + "zoomSpeed": 0.4 + }}, + "physics": {{ + "enabled": false + }}, + "manipulation": {{ + "enabled": {"true" if edit_nodes else "false"} }} - """) - - # Save network to HTML - tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") - html_path = tmp.name - with open(html_path, 'w', encoding="utf-8") as f: - html = net.generate_html() - new_css = " .card {\n border: none;\n }" - updated_html = html.replace("", new_css + "\n ") - updated_html = updated_html.replace("1px solid lightgray", "none") - - f.write(updated_html) - - ################################################################################################ - - # Return results dictionary - results = { - 'map': fig, - 'clusters': df, - 'words': df_lab, - 'nclust': len(df), - 'net': Net, - 'subgraphs': gcl, - 'documentToClusters': document_to_clusters, - 'params': params_df - } - - return results['map'], html_path.split(os.sep)[-1], results['words'], results['clusters'], results['documentToClusters'] + }} + """) + + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html") + html_path = tmp.name + with open(html_path, 'w', encoding="utf-8") as f: + html = net.generate_html() + new_css = " .card {\n border: none;\n }" + updated_html = html.replace("", new_css + "\n ") + updated_html = updated_html.replace("1px solid lightgray", "none") + f.write(updated_html) + + results = { + 'map': fig, + 'clusters': df, + 'words': df_lab, + 'nclust': len(df), + 'net': Net, + 'subgraphs': gcl, + 'documentToClusters': document_to_clusters, + 'params': params_df + } + + return results['map'], html_path.split(os.sep)[-1], results['words'], results['clusters'], results['documentToClusters'] def cluster_assignment(M, words, field, remove_terms=None, synonyms=None, threshold=0.5): - # Integrate stopwords and synonyms in M original field + if field in ["AB", "TI"]: field = f"{field}_TM" - # Check if field exists in M + # PATCH: safety check if field doesn't exist in M + if field not in M.columns: + return pd.DataFrame() + Fi = M[field] - - # Create lists to store terms and SR values + all_terms = [] all_sr = [] - - # Iterate through each row + for i, terms_list in enumerate(Fi): if isinstance(terms_list, list): for term in terms_list: - if term: # Skip empty terms + if term: all_terms.append(term.strip()) all_sr.append(M['SR'].iloc[i]) - - all_field = pd.DataFrame({ - 'terms': all_terms, - 'SR': all_sr - }) - # Remove terms if specified + all_field = pd.DataFrame({'terms': all_terms, 'SR': all_sr}) + if remove_terms is not None: remove_terms = pd.DataFrame({'terms': [t.strip().upper() for t in remove_terms]}) all_field = all_field.merge(remove_terms, on='terms', how='left', indicator=True) all_field = all_field[all_field['_merge'] == 'left_only'].drop('_merge', axis=1) - # Handle synonyms if synonyms is not None: s = [syn.upper().split(";") for syn in synonyms] snew = [l[0] for l in s] @@ -603,26 +520,23 @@ def cluster_assignment(M, words, field, remove_terms=None, synonyms=None, thresh all_field.loc[all_field['new'].notna(), 'terms'] = all_field.loc[all_field['new'].notna(), 'new'] all_field = all_field[['SR', 'terms']] - # Process words dataframe words = words.assign( - p_w=1/words['Occurrences'], + p_w=1 / words['Occurrences'], p_c=words['pagerank_centrality'] ) - - # Save a copy of the ungrouped dataframe for merging + words_for_merge = words.copy() - - # Continue with groupby operation for later use if needed words = words.groupby('Cluster') - # Merge terms with words - # Convert 'terms' to string before applying string operations all_field['terms'] = all_field['terms'].astype(str) + + words_for_merge = words_for_merge.copy() + words_for_merge['Words'] = words_for_merge['Words'].str.lower() + terms = all_field.assign(terms=all_field['terms'].str.lower()).merge( words_for_merge, left_on='terms', right_on='Words', how='left' ) - # Calculate probabilities terms = (terms.groupby('SR') .apply(lambda x: x.assign(pagerank=x['p_c'].sum())) .reset_index(drop=True) @@ -634,35 +548,40 @@ def cluster_assignment(M, words, field, remove_terms=None, synonyms=None, thresh terms['p'] = terms['p_w'] / terms.groupby('SR')['p_w'].transform('sum') terms = terms.dropna(subset=['Cluster_Label']).drop('p_w', axis=1) - # Assign clusters based on threshold terms_max = (terms[terms['p'] >= threshold] .sort_values('p', ascending=False) .groupby('SR') .agg({'Cluster_Label': lambda x: ';'.join(x)}) .rename(columns={'Cluster_Label': 'Assigned_cluster'})) - # Calculate pagerank for assigned clusters terms_pagerank = (terms.merge(terms_max, on='SR') .query('Cluster_Label == Assigned_cluster')[['SR', 'pagerank']]) - # Pivot and merge results terms = (terms.drop('pagerank', axis=1) .pivot(index='SR', columns='Cluster_Label', values='p') - .reset_index() # Ensure SR is only a column - .rename_axis(None, axis=1) # Remove any index name - ) - # Now merge with terms_max and terms_pagerank + .reset_index() + .rename_axis(None, axis=1)) + terms = terms.merge(terms_max, on='SR').merge(terms_pagerank, on='SR') - # Process final results if 'DI' not in M.columns: M['DI'] = np.nan + year = pd.Timestamp.now().year + 1 - M = M.reset_index(drop=True) + + tc_numeric = pd.to_numeric(M['TC'], errors='coerce').fillna(0) + M = M.copy() + M['TC'] = tc_numeric + # PATCH: PY is stored as string in ETL output — convert to numeric + # before arithmetic in TCpY calculation to avoid TypeError. + M['PY'] = pd.to_numeric(M['PY'], errors='coerce') + terms = (M.assign( - TCpY=lambda x: x['TC']/(year-x['PY']), - NTC=lambda x: x.groupby('PY')['TC'].transform(lambda y: y/y.mean()) + TCpY=lambda x: x['TC'] / (year - x['PY']), + NTC=lambda x: x.groupby('PY')['TC'].transform( + lambda y: y / y.mean() if y.mean() != 0 else 0 + ) )[['DI', 'AU', 'TI', 'SO', 'PY', 'TC', 'TCpY', 'NTC', 'SR']] .merge(terms, on='SR') .fillna(0) @@ -670,4 +589,4 @@ def cluster_assignment(M, words, field, remove_terms=None, synonyms=None, thresh .apply(lambda x: x.sort_values('TC', ascending=False)) .reset_index(drop=True)) - return terms + return terms \ No newline at end of file diff --git a/www/services/validator.py b/www/services/validator.py new file mode 100644 index 000000000..00fd8e313 --- /dev/null +++ b/www/services/validator.py @@ -0,0 +1,124 @@ +""" +validator.py +------------ +Validation phase of the ETL pipeline. + +Checks that the DataFrame produced by standardizer.py respects +the WoS column schema before it gets passed to the dashboard. + +Main entry point: + validate(df) → pd.DataFrame +""" + +import pandas as pd + +# All mandatory columns and their expected types +MANDATORY_COLUMNS = { + "DB": str, + "UT": str, + "DI": str, + "PMID": str, + "TI": str, + "SO": str, + "JI": str, + "PY": str, + "DT": str, + "LA": str, + "TC": int, + "AU": list, + "AF": list, + "C1": list, + "RP": str, + "CR": list, + "DE": list, + "ID": list, + "AB": str, + "VL": str, + "IS": str, + "BP": str, + "EP": str, + "SR": str, +} + + +def check_columns(df: pd.DataFrame) -> list: + """ + Checks that all mandatory columns are present in the DataFrame. + Returns a list of missing column names. + """ + missing = [] + for col in MANDATORY_COLUMNS: + if col not in df.columns: + missing.append(col) + return missing + + +def check_nulls(df: pd.DataFrame) -> list: + """ + Checks that no cell contains None or NaN. + Returns a list of column names that contain null values. + """ + offending = [] + for col in MANDATORY_COLUMNS: + if col not in df.columns: + continue + has_null = df[col].apply(lambda x: x is None or (isinstance(x, float) and pd.isna(x))).any() + if has_null: + offending.append(col) + return offending + + +def check_types(df: pd.DataFrame) -> list: + """ + Checks that each column contains the correct Python type. + Returns a list of column names where the type is wrong. + """ + offending = [] + for col, expected_type in MANDATORY_COLUMNS.items(): + if col not in df.columns: + continue + wrong = df[col].apply(lambda x: not isinstance(x, expected_type)).any() + if wrong: + offending.append(col) + return offending + + + +def validate(df: pd.DataFrame) -> pd.DataFrame: + """ + Main entry point for the validator. + Runs all checks on the DataFrame and prints a report. + Raises a ValueError if any check fails. + Returns the DataFrame unchanged if all checks pass. + """ + print("Running validation...") + passed = True + + missing_cols = check_columns(df) + if missing_cols: + print(f" FAIL — missing columns: {missing_cols}") + passed = False + else: + print(" PASS — all mandatory columns present") + + null_cols = check_nulls(df) + if null_cols: + print(f" FAIL — null values found in: {null_cols}") + passed = False + else: + print(" PASS — no null values found") + + type_cols = check_types(df) + if type_cols: + print(f" FAIL — wrong types in: {type_cols}") + passed = False + else: + print(" PASS — all column types correct") + + if not passed: + raise ValueError("Validation failed. Fix the issues above before proceeding.") + + print("Validation passed.") + return df + +