diff --git a/README.zh-CN.md b/README.zh-CN.md index 957ee7c9..389cc668 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -160,6 +160,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参 | **devto** | `top` `tag` `user` | 公开 | | **dictionary** | `search` `synonyms` `examples` | 公开 | | **arxiv** | `search` `paper` | 公开 | +| **pubmed** | `search` `article` `author` `citations` `related` | 公开 | | **paperreview** | `submit` `review` `feedback` | 公开 | | **wikipedia** | `search` `summary` `random` `trending` | 公开 | | **hackernews** | `top` `new` `best` `ask` `show` `jobs` `search` `user` | 公共 API | diff --git a/clis/pubmed/article.ts b/clis/pubmed/article.ts new file mode 100644 index 00000000..32420f4a --- /dev/null +++ b/clis/pubmed/article.ts @@ -0,0 +1,335 @@ +/** + * PubMed Article Details Adapter + * + * Get detailed information about a specific PubMed article by PMID. + * Uses EFetch API (XML) for full article details including abstract, + * MeSH terms, keywords, and author affiliations. + * + * API Documentation: + * - EFetch: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch + */ + +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { + eutilsFetch, + buildPubMedUrl, + truncateText, + prioritizeArticleType, +} from './utils.js'; + +/** + * Parse EFetch XML response to extract full article details + */ +function parseEFetchXml(xml: string, pmid: string) { + // Helper: extract text content between tags + const getTag = (src: string, tag: string): string => { + const m = src.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\/${tag}>`, 'i')); + return m ? m[1].replace(/<[^>]+>/g, '').trim() : ''; + }; + + const getAllTags = (src: string, tag: string): string[] => { + const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\/${tag}>`, 'gi'); + const results: string[] = []; + let m; + while ((m = re.exec(src)) !== null) { + results.push(m[1].replace(/<[^>]+>/g, '').trim()); + } + return results; + }; + + // Abstract - may have multiple AbstractText sections (structured abstract) + const abstractParts = getAllTags(xml, 'AbstractText'); + const abstract = abstractParts.join(' ').replace(/\s+/g, ' ').trim(); + + // Title + const title = getTag(xml, 'ArticleTitle'); + + // Journal + const journalTitle = getTag(xml, 'Title'); + const isoAbbreviation = getTag(xml, 'ISOAbbreviation'); + const volume = getTag(xml, 'Volume'); + const issue = getTag(xml, 'Issue'); + const pagination = getTag(xml, 'MedlinePgn'); + + // Publication date - must extract from JournalIssue/PubDate, not DateCompleted/DateRevised + const journalIssueMatch = xml.match(/]*>([\s\S]*?)<\/JournalIssue>/i); + const journalIssue = journalIssueMatch ? journalIssueMatch[1] : ''; + const pubDateMatch = journalIssue.match(/([\s\S]*?)<\/PubDate>/i); + const pubDate = pubDateMatch ? pubDateMatch[1] : ''; + + const year = getTag(pubDate, 'Year') || getTag(xml, 'MedlineDate').slice(0, 4); + const month = getTag(pubDate, 'Month'); + const day = getTag(pubDate, 'Day'); + const fullDate = [year, month, day].filter(Boolean).join(' '); + + // Authors and affiliations - collect all affiliations for each author + // Also track EqualContrib for co-first authors + const authorBlocks = xml.match(/]*>([\s\S]*?)<\/Author>/gi) || []; + const authors: Array<{ name: string; affiliations: string[]; equalContrib: boolean }> = authorBlocks.map(block => { + const lastName = getTag(block, 'LastName'); + const foreName = getTag(block, 'ForeName') || getTag(block, 'Initials'); + const collectiveName = getTag(block, 'CollectiveName'); + const name = collectiveName || `${lastName} ${foreName}`.trim(); + // Check for EqualContrib attribute + const equalContrib = /EqualContrib="Y"/i.test(block); + // Get all affiliations for this author (an author can have multiple AffiliationInfo blocks) + const affiliationBlocks = block.match(/([\s\S]*?)<\/AffiliationInfo>/gi) || []; + const affiliations = affiliationBlocks + .map(info => getTag(info, 'Affiliation')) + .filter(Boolean); + return { name, affiliations, equalContrib }; + }); + + // Identify co-first authors: only consecutive authors at the BEGINNING with EqualContrib="Y" + // Co-first authors must be first authors, not authors in the middle or end + const coFirstAuthors: typeof authors = []; + for (const author of authors) { + if (author.equalContrib) { + coFirstAuthors.push(author); + } else { + // Stop at first author without EqualContrib - co-first authors must be consecutive from start + break; + } + } + const firstAuthors = coFirstAuthors.length > 0 ? coFirstAuthors : [authors[0]].filter(Boolean); + + // Identify corresponding author: look for author with email in affiliations + // Corresponding authors usually have their email in the affiliation text + const extractEmail = (affil: string): string => { + const emailMatch = affil.match(/[\w.-]+@[\w.-]+\.\w+/); + return emailMatch ? emailMatch[0] : ''; + }; + + let correspondingAuthor = authors[authors.length - 1] || { name: '', affiliations: [], equalContrib: false }; + + // Try to find author with email in their affiliations (more accurate than just last author) + const authorsWithEmail = authors.filter(a => a.affiliations.some(aff => extractEmail(aff))); + if (authorsWithEmail.length > 0) { + // Use the last author that has an email (corresponding authors are typically at the end) + correspondingAuthor = authorsWithEmail[authorsWithEmail.length - 1]; + } + + // Unique affiliations - flatten all author affiliations and deduplicate + const allAffiliations = authors.flatMap(a => a.affiliations); + const uniqueAffiliations = [...new Set(allAffiliations)]; + + // MeSH terms + const meshBlocks = xml.match(/([\s\S]*?)<\/MeshHeading>/gi) || []; + const meshTerms = meshBlocks + .map(block => getTag(block, 'DescriptorName')) + .filter(Boolean) + .slice(0, 10); + + // Keywords + const keywords = getAllTags(xml, 'Keyword').filter(Boolean).slice(0, 10); + + // Article type - PubMed returns multiple types, prioritize more specific ones + const pubTypes = getAllTags(xml, 'PublicationType').filter(Boolean); + const articleType = prioritizeArticleType(pubTypes); + + // Language + const language = getTag(xml, 'Language'); + + // IDs: DOI + const doiMatch = xml.match(/([^<]+)<\/ArticleId>/i); + const doi = doiMatch ? doiMatch[1].trim() : ''; + + const pmcMatch = xml.match(/([^<]+)<\/ArticleId>/i); + const pmcId = pmcMatch ? pmcMatch[1].trim() : ''; + + // Build author list with their affiliations for detailed view + const authorListWithAffiliations = authors.map(a => ({ + name: a.name, + affiliations: a.affiliations, + equalContrib: a.equalContrib, + })); + + // Simple author name list for compact display + const authorNameList = authors.map(a => a.name); + + return { + pmid, + title, + abstract, + authors: { + list: authorListWithAffiliations, + names: authorNameList, + all: authorNameList.slice(0, 10).join(', ') + (authorNameList.length > 10 ? ', et al.' : ''), + first: firstAuthors[0]?.name || '', + firstWithAffiliations: { + name: firstAuthors[0]?.name || '', + affiliations: firstAuthors[0]?.affiliations || [], + }, + firstAuthors: firstAuthors.map(a => ({ + name: a.name, + affiliations: a.affiliations, + })), + corresponding: correspondingAuthor.name, + correspondingWithAffiliations: { + name: correspondingAuthor.name, + affiliations: correspondingAuthor.affiliations, + }, + count: authors.length, + affiliations: uniqueAffiliations, + }, + journal: { + title: journalTitle, + isoAbbreviation, + volume, + issue, + pagination, + }, + publication: { + year, + fullDate, + }, + ids: { + pmid, + doi, + pmc: pmcId, + }, + classification: { + articleType, + pubTypes, + language, + meshTerms, + keywords, + }, + url: buildPubMedUrl(pmid), + }; +} + +cli({ + site: 'pubmed', + name: 'article', + description: 'Get detailed information about a PubMed article by PMID (full abstract, MeSH terms, affiliations)', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { + name: 'pmid', + type: 'string', + required: true, + positional: true, + help: 'PubMed ID (e.g., "37780221")', + }, + { + name: 'full-abstract', + type: 'boolean', + default: false, + help: 'Show full abstract without truncation (table output only)', + }, + ], + columns: ['field', 'value'], + func: async (_page, args) => { + const pmid = args.pmid.trim(); + + if (!/^\d+$/.test(pmid)) { + throw new CliError( + 'INVALID_ARGUMENT', + `Invalid PMID format: ${pmid}`, + 'PMID should be a numeric string (e.g., "37780221")' + ); + } + + // Use EFetch to get full article details (XML includes abstract, MeSH, affiliations) + const xml = await eutilsFetch('efetch', { + id: pmid, + rettype: 'abstract', + }, 'xml'); + + if (!xml || xml.includes('') || !xml.includes('')) { + throw new CliError( + 'NOT_FOUND', + `Article with PMID ${pmid} not found`, + 'Check the PMID and try again' + ); + } + + const article = parseEFetchXml(xml, pmid); + + // Table format - reorganized sections + // Helper: extract email from affiliation text + const extractEmail = (affil: string): string => { + const emailMatch = affil.match(/[\w.-]+@[\w.-]+\.\w+/); + return emailMatch ? emailMatch[0] : ''; + }; + + const firstAuthors = article.authors.firstAuthors || [article.authors.firstWithAffiliations]; + const corrAuthor = article.authors.correspondingWithAffiliations; + const corrEmail = corrAuthor.affiliations.map(extractEmail).filter(Boolean)[0] || 'N/A'; + const firstAuthorNames = firstAuthors.map(a => a.name); + + const rows: Array<{ field: string; value: string }> = [ + { field: 'PMID', value: article.pmid }, + { field: 'Title', value: article.title }, + { field: '---', value: '---' }, + { field: 'Section', value: 'First Author & Corresponding Author Information' }, + ]; + + // Add first author(s) - support co-first authors + if (firstAuthors.length > 1) { + rows.push({ field: 'Co-first Authors', value: `${firstAuthors.length} authors` }); + firstAuthors.forEach((author, index) => { + rows.push({ + field: ` ${index + 1}. First Author`, + value: author.name, + }); + rows.push({ + field: ` Affiliations`, + value: author.affiliations.join('; ') || 'N/A', + }); + }); + } else { + rows.push({ field: 'First Author', value: firstAuthors[0]?.name || 'N/A' }); + rows.push({ + field: 'First Author Affiliations', + value: firstAuthors[0]?.affiliations.join('; ') || 'N/A', + }); + } + + rows.push( + { field: 'Likely Corresponding Author', value: corrAuthor.name }, + { field: ' Corresponding Author Affiliations', value: corrAuthor.affiliations.join('; ') || 'N/A' }, + { field: ' Corresponding Author Email', value: corrEmail }, + { field: ' (Inferred from email in affiliations, may not be accurate)', value: '' }, + { field: '---', value: '---' }, + { field: 'Section', value: 'All Authors Information' }, + ); + + // Add each author with their affiliations, mark co-first authors only + article.authors.list.forEach((author, index) => { + // Only mark as co-first if this author is in the firstAuthors list + const isCoFirst = firstAuthorNames.includes(author.name) ? ' ★' : ''; + rows.push({ + field: `${index + 1}. ${author.name}${isCoFirst}`, + value: author.affiliations.join('; ') || 'N/A', + }); + }); + + rows.push( + { field: '---', value: '---' }, + { field: 'Section', value: 'Journal Information' }, + { field: 'Journal', value: article.journal.title || article.journal.isoAbbreviation }, + { field: 'Year', value: article.publication.year }, + { field: 'Volume/Issue', value: `${article.journal.volume}${article.journal.issue ? `(${article.journal.issue})` : ''}` }, + { field: 'Pages', value: article.journal.pagination }, + { field: 'DOI', value: article.ids.doi || 'N/A' }, + { field: 'PMC ID', value: article.ids.pmc || 'N/A' }, + { field: '---', value: '---' }, + { field: 'Section', value: 'Article Classification' }, + { field: 'Article Type', value: article.classification.articleType }, + { field: 'Language', value: article.classification.language }, + { field: 'MeSH Terms', value: article.classification.meshTerms.join(', ') || 'N/A' }, + { field: 'Keywords', value: article.classification.keywords.join(', ') || 'N/A' }, + { field: '---', value: '---' }, + { field: 'Section', value: 'Abstract' }, + { field: 'Abstract', value: args['full-abstract'] ? article.abstract || 'N/A' : truncateText(article.abstract, 400) || 'N/A' }, + { field: '---', value: '---' }, + { field: 'URL', value: article.url } + ); + + return rows; + }, +}); diff --git a/clis/pubmed/author.ts b/clis/pubmed/author.ts new file mode 100644 index 00000000..00ccdf1c --- /dev/null +++ b/clis/pubmed/author.ts @@ -0,0 +1,181 @@ +/** + * PubMed Author Search Adapter + * + * Search for articles by author name and affiliation. + * Supports searching for first author, last author, or any author position. + * + * API Documentation: + * - ESearch: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch + * - ESummary: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESummary + */ + +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { + eutilsFetch, + extractAuthors, + extractDoi, + buildPubMedUrl, + truncateText, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'author', + description: 'Search PubMed articles by author name and affiliation', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { + name: 'name', + type: 'string', + required: true, + positional: true, + help: 'Author name (e.g., "Smith J", "Zhang Y", "John Smith")', + }, + { + name: 'limit', + type: 'int', + default: 20, + help: 'Maximum number of results (max 100)', + }, + { + name: 'affiliation', + type: 'string', + required: false, + help: 'Filter by author affiliation (e.g., "Harvard", "Stanford", "Beijing")', + }, + { + name: 'position', + type: 'string', + default: 'any', + help: 'Author position: any, first, last', + }, + { + name: 'year-from', + type: 'int', + required: false, + help: 'Filter: publication year from', + }, + { + name: 'year-to', + type: 'int', + required: false, + help: 'Filter: publication year to', + }, + { + name: 'sort', + type: 'string', + default: 'date', + help: 'Sort by: date, relevance', + }, + ], + columns: [ + 'rank', + 'pmid', + 'title', + 'authors', + 'journal', + 'year', + 'doi', + 'url', + ], + func: async (_page, args) => { + const limit = Math.min(Math.max(1, Number(args.limit)), 100); + + // Build author search query + let authorQuery = args.name; + + // Add position filter + if (args.position === 'first') { + authorQuery = `${authorQuery}[1au]`; + } else if (args.position === 'last') { + authorQuery = `${authorQuery}[lastau]`; + } else { + authorQuery = `${authorQuery}[au]`; + } + + // Build full query with filters + const searchTerms: string[] = [authorQuery]; + + if (args.affiliation) { + searchTerms.push(`${args.affiliation}[ad]`); + } + + if (args['year-from'] || args['year-to']) { + const from = args['year-from'] || '1900'; + const to = args['year-to'] || new Date().getFullYear(); + searchTerms.push(`${from}:${to}[PDAT]`); + } + + const searchQuery = searchTerms.join(' AND '); + + // Map sort options + const sortMap: Record = { + relevance: '', + date: 'pub_date', + }; + const sort = sortMap[args.sort] || ''; + + // Step 1: ESearch - Get PMIDs + const esearchParams: Record = { + term: searchQuery, + retmax: limit, + usehistory: 'y', + sort, + }; + + const esearchResult = await eutilsFetch('esearch', esearchParams); + + const pmidList = esearchResult.esearchresult?.idlist; + if (!pmidList || !Array.isArray(pmidList) || pmidList.length === 0) { + throw new CliError( + 'NOT_FOUND', + `No articles found for author "${args.name}"`, + 'Try a different name format or remove some filters' + ); + } + + // Step 2: ESummary - Get article details + const pmids = pmidList.join(','); + const esummaryResult = await eutilsFetch('esummary', { + id: pmids, + }); + + const articles = esummaryResult.result; + if (!articles || typeof articles !== 'object') { + throw new CliError( + 'PARSE_ERROR', + 'Failed to parse article data from PubMed', + 'The API response format may have changed' + ); + } + + // Process results + const results = pmidList.map((pmid: string, index: number) => { + const article = articles[pmid]; + if (!article) { + return null; + } + + const title = article.title || ''; + const authors = extractAuthors(article.authors, 3); + const journal = article.fulljournalname || article.source || ''; + const year = article.pubdate?.split(' ')?.[0] || ''; + const doi = extractDoi(article.articleids); + + return { + rank: index + 1, + pmid, + title: truncateText(title.replace(/\.$/, ''), 100), + authors, + journal: truncateText(journal, 50), + year, + doi, + url: buildPubMedUrl(pmid), + }; + }); + + return results.filter((r): r is NonNullable => r !== null); + }, +}); diff --git a/clis/pubmed/citations.ts b/clis/pubmed/citations.ts new file mode 100644 index 00000000..9afdf006 --- /dev/null +++ b/clis/pubmed/citations.ts @@ -0,0 +1,178 @@ +/** + * PubMed Citations Adapter + * + * Get citation relationships for a PubMed article: + * - "cited by": Articles that cite this article + * - "references": Articles cited by this article + * + * Uses ELink API to retrieve citation relationships. + * + * API Documentation: + * - ELink: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ELink + */ + +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { + eutilsFetch, + extractAuthors, + extractDoi, + buildPubMedUrl, + truncateText, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'citations', + description: 'Get citation relationships (cited by / references) for a PubMed article', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { + name: 'pmid', + type: 'string', + required: true, + positional: true, + help: 'PubMed ID (e.g., "37780221")', + }, + { + name: 'direction', + type: 'string', + default: 'citedby', + help: 'Citation direction: citedby (articles citing this) or references (articles cited by this)', + }, + { + name: 'limit', + type: 'int', + default: 20, + help: 'Maximum number of results (max 100)', + }, + ], + columns: [ + 'rank', + 'pmid', + 'title', + 'authors', + 'journal', + 'year', + 'doi', + 'url', + ], + func: async (_page, args) => { + const pmid = args.pmid.trim(); + const limit = Math.min(Math.max(1, Number(args.limit)), 100); + + // Validate PMID format + if (!/^\d+$/.test(pmid)) { + throw new CliError( + 'INVALID_ARGUMENT', + `Invalid PMID format: ${pmid}`, + 'PMID should be a numeric string (e.g., "37780221")' + ); + } + + // Validate direction + if (!['citedby', 'references'].includes(args.direction)) { + throw new CliError( + 'INVALID_ARGUMENT', + `Invalid direction: ${args.direction}`, + 'Direction must be "citedby" or "references"' + ); + } + + // Use ELink to get citation relationships + const elinkParams: Record = { + id: pmid, + cmd: 'neighbor', + }; + + // Set database from/to based on direction + if (args.direction === 'citedby') { + // Articles that cite this article + elinkParams.dbfrom = 'pubmed'; + elinkParams.linkname = 'pubmed_pubmed_citedin'; + } else { + // Articles cited by this article + elinkParams.dbfrom = 'pubmed'; + elinkParams.linkname = 'pubmed_pubmed_refs'; + } + + const elinkResult = await eutilsFetch('elink', elinkParams); + + // Extract linked PMIDs + const linkSet = elinkResult.linksets?.[0]; + if (!linkSet) { + throw new CliError( + 'NOT_FOUND', + `No citation data found for PMID ${pmid}`, + 'The article may not have citation relationships or the PMID is incorrect' + ); + } + + const linkSetDbs = linkSet.linksetdbs; + if (!linkSetDbs || !Array.isArray(linkSetDbs) || linkSetDbs.length === 0) { + const directionText = args.direction === 'citedby' ? 'cited by any articles' : 'any references'; + throw new CliError( + 'NOT_FOUND', + `This article is not ${directionText} in PubMed`, + 'Try the other direction or check the PMID' + ); + } + + // Get the links from the first (and usually only) linksetdb + const links = linkSetDbs[0].links; + if (!links || !Array.isArray(links) || links.length === 0) { + const directionText = args.direction === 'citedby' ? 'cited by any articles' : 'any references'; + throw new CliError( + 'NOT_FOUND', + `This article is not ${directionText} in PubMed`, + 'Try the other direction or check the PMID' + ); + } + + // Limit results + const pmidList = links.slice(0, limit); + + // Get article details using ESummary + const pmids = pmidList.join(','); + const esummaryResult = await eutilsFetch('esummary', { + id: pmids, + }); + + const articles = esummaryResult.result; + if (!articles || typeof articles !== 'object') { + throw new CliError( + 'PARSE_ERROR', + 'Failed to parse article data from PubMed', + 'The API response format may have changed' + ); + } + + // Process results + const results = pmidList.map((linkedPmid: string, index: number) => { + const article = articles[linkedPmid]; + if (!article) { + return null; + } + + const title = article.title || ''; + const authors = extractAuthors(article.authors, 3); + const journal = article.fulljournalname || article.source || ''; + const year = article.pubdate?.split(' ')?.[0] || ''; + const doi = extractDoi(article.articleids); + + return { + rank: index + 1, + pmid: linkedPmid, + title: truncateText(title.replace(/\.$/, ''), 100), + authors, + journal: truncateText(journal, 50), + year, + doi, + url: buildPubMedUrl(linkedPmid), + }; + }); + + return results.filter((r): r is NonNullable => r !== null); + }, +}); diff --git a/clis/pubmed/related.ts b/clis/pubmed/related.ts new file mode 100644 index 00000000..9edf310e --- /dev/null +++ b/clis/pubmed/related.ts @@ -0,0 +1,165 @@ +/** + * PubMed Related Articles Adapter + * + * Find articles related to a specific PubMed article. + * Uses ELink API with "neighbor" command to find similar articles. + * + * API Documentation: + * - ELink: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ELink + */ + +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { + eutilsFetch, + extractAuthors, + extractDoi, + buildPubMedUrl, + truncateText, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'related', + description: 'Find articles related to a PubMed article', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { + name: 'pmid', + type: 'string', + required: true, + positional: true, + help: 'PubMed ID (e.g., "37780221")', + }, + { + name: 'limit', + type: 'int', + default: 20, + help: 'Maximum number of results (max 100)', + }, + { + name: 'score', + type: 'boolean', + default: false, + help: 'Show similarity scores', + }, + ], + columns: [ + 'rank', + 'pmid', + 'title', + 'authors', + 'journal', + 'year', + 'score', + 'doi', + 'url', + ], + func: async (_page, args) => { + const pmid = args.pmid.trim(); + const limit = Math.min(Math.max(1, Number(args.limit)), 100); + + // Validate PMID format + if (!/^\d+$/.test(pmid)) { + throw new CliError( + 'INVALID_ARGUMENT', + `Invalid PMID format: ${pmid}`, + 'PMID should be a numeric string (e.g., "37780221")' + ); + } + + // Use ELink neighbor_score to get related articles with similarity scores + const elinkResult = await eutilsFetch('elink', { + id: pmid, + dbfrom: 'pubmed', + cmd: 'neighbor_score', + linkname: 'pubmed_pubmed', + }); + + // Extract linked PMIDs with scores + const linkSet = elinkResult.linksets?.[0]; + if (!linkSet) { + throw new CliError( + 'NOT_FOUND', + `No related articles found for PMID ${pmid}`, + 'The PMID may be incorrect or the article has no related articles' + ); + } + + const linkSetDbs = linkSet.linksetdbs; + if (!linkSetDbs || !Array.isArray(linkSetDbs) || linkSetDbs.length === 0) { + throw new CliError( + 'NOT_FOUND', + `No related articles found for PMID ${pmid}`, + 'This article has no related articles in PubMed' + ); + } + + // Get the links from the linksetdb + // ELink neighbor_score returns: links = [{ id: "12345", score: 41208390 }, ...] + const rawLinks: Array<{ id: string; score?: number } | string> = linkSetDbs[0].links; + if (!rawLinks || !Array.isArray(rawLinks) || rawLinks.length === 0) { + throw new CliError( + 'NOT_FOUND', + `No related articles found for PMID ${pmid}`, + 'This article has no related articles in PubMed' + ); + } + + // Normalize to { id, score } objects (ELink may return strings or objects) + const normalizedLinks: Array<{ id: string; score: number }> = rawLinks.map(link => { + if (typeof link === 'string') return { id: link, score: 0 }; + return { id: String(link.id), score: Number(link.score) || 0 }; + }); + + // Filter out the original PMID (ELink sometimes includes it as rank 1) + const filteredLinks = normalizedLinks.filter(link => link.id !== pmid); + + // Apply limit after filtering + const limitedLinks = filteredLinks.slice(0, limit); + + // Get article details using ESummary + const pmids = limitedLinks.map(l => l.id).join(','); + const esummaryResult = await eutilsFetch('esummary', { + id: pmids, + }); + + const articles = esummaryResult.result; + if (!articles || typeof articles !== 'object') { + throw new CliError( + 'PARSE_ERROR', + 'Failed to parse article data from PubMed', + 'The API response format may have changed' + ); + } + + // Process results + const results = limitedLinks.map(({ id: linkedPmid, score }, index: number) => { + const article = articles[linkedPmid]; + if (!article) { + return null; + } + + const title = article.title || ''; + const authors = extractAuthors(article.authors, 3); + const journal = article.fulljournalname || article.source || ''; + const year = article.pubdate?.split(' ')?.[0] || ''; + const doi = extractDoi(article.articleids); + + return { + rank: index + 1, + pmid: linkedPmid, + title: truncateText(title.replace(/\.$/, ''), 100), + authors, + journal: truncateText(journal, 50), + year, + score: args.score ? String(score) : '', + doi, + url: buildPubMedUrl(linkedPmid), + }; + }); + + return results.filter((r): r is NonNullable => r !== null); + }, +}); diff --git a/clis/pubmed/search.ts b/clis/pubmed/search.ts new file mode 100644 index 00000000..86f7546b --- /dev/null +++ b/clis/pubmed/search.ts @@ -0,0 +1,207 @@ +/** + * PubMed Search Adapter + * + * Search for articles in PubMed using E-utilities ESearch + ESummary APIs. + * Supports advanced filtering by date, author, journal, article type, etc. + * + * API Documentation: + * - ESearch: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch + * - ESummary: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESummary + */ + +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { + eutilsFetch, + buildSearchQuery, + extractAuthors, + extractDoi, + buildPubMedUrl, + truncateText, + formatArticleType, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'search', + description: 'Search PubMed articles with advanced filters', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { + name: 'query', + type: 'string', + required: true, + positional: true, + help: 'Search query (e.g., "machine learning cancer", "COVID-19 treatment")', + }, + { + name: 'limit', + type: 'int', + default: 20, + help: 'Maximum number of results (max 100)', + }, + { + name: 'author', + type: 'string', + required: false, + help: 'Filter by author name', + }, + { + name: 'journal', + type: 'string', + required: false, + help: 'Filter by journal name', + }, + { + name: 'year-from', + type: 'int', + required: false, + help: 'Filter: publication year from', + }, + { + name: 'year-to', + type: 'int', + required: false, + help: 'Filter: publication year to', + }, + { + name: 'article-type', + type: 'string', + required: false, + help: 'Filter by article type (e.g., "Review", "Clinical Trial", "Meta-Analysis")', + }, + { + name: 'has-abstract', + type: 'boolean', + default: false, + help: 'Only include articles with abstracts', + }, + { + name: 'free-full-text', + type: 'boolean', + default: false, + help: 'Only include articles with free full text', + }, + { + name: 'humans-only', + type: 'boolean', + default: false, + help: 'Only include human studies', + }, + { + name: 'english-only', + type: 'boolean', + default: false, + help: 'Only include English articles', + }, + { + name: 'sort', + type: 'string', + default: 'relevance', + help: 'Sort by: relevance, date, author, journal', + }, + ], + columns: [ + 'rank', + 'pmid', + 'title', + 'authors', + 'journal', + 'year', + 'article-type', + 'doi', + 'url', + ], + func: async (_page, args) => { + const limit = Math.min(Math.max(1, Number(args.limit)), 100); + + // Build search query with filters + const searchQuery = buildSearchQuery(args.query, { + author: args.author, + journal: args.journal, + yearFrom: args['year-from'], + yearTo: args['year-to'], + articleType: args['article-type'], + hasAbstract: args['has-abstract'], + hasFullText: args['free-full-text'], + humanOnly: args['humans-only'], + englishOnly: args['english-only'], + }); + + // Map sort options to E-utilities sort values + const sortMap: Record = { + relevance: '', + date: 'pub_date', + author: 'Author', + journal: 'JournalName', + }; + const sort = sortMap[args.sort] || ''; + + // Step 1: ESearch - Get PMIDs + const esearchParams: Record = { + term: searchQuery, + retmax: limit, + usehistory: 'y', + }; + + if (sort) { + esearchParams.sort = sort; + } + + const esearchResult = await eutilsFetch('esearch', esearchParams); + + const pmidList = esearchResult.esearchresult?.idlist; + if (!pmidList || !Array.isArray(pmidList) || pmidList.length === 0) { + throw new CliError( + 'NOT_FOUND', + 'No articles found matching your criteria', + 'Try broadening your search terms or removing some filters' + ); + } + + // Step 2: ESummary - Get article details + const pmids = pmidList.join(','); + const esummaryResult = await eutilsFetch('esummary', { + id: pmids, + }); + + const articles = esummaryResult.result; + if (!articles || typeof articles !== 'object') { + throw new CliError( + 'PARSE_ERROR', + 'Failed to parse article data from PubMed', + 'The API response format may have changed' + ); + } + + // Process results + const results = pmidList.map((pmid: string, index: number) => { + const article = articles[pmid]; + if (!article) { + return null; + } + + const title = article.title || ''; + const authors = extractAuthors(article.authors, 3); + const journal = article.fulljournalname || article.source || ''; + const year = article.pubdate?.split(' ')?.[0] || ''; + const articleType = formatArticleType(article.pubtype); + const doi = extractDoi(article.articleids); + + return { + rank: index + 1, + pmid, + title: truncateText(title.replace(/\.$/, ''), 100), + authors, + journal: truncateText(journal, 50), + year, + 'article-type': articleType, + doi, + url: buildPubMedUrl(pmid), + }; + }); + + return results.filter((r): r is NonNullable => r !== null); + }, +}); diff --git a/clis/pubmed/utils.ts b/clis/pubmed/utils.ts new file mode 100644 index 00000000..e69b627d --- /dev/null +++ b/clis/pubmed/utils.ts @@ -0,0 +1,326 @@ +/** + * PubMed adapter utilities. + * + * PubMed provides the E-utilities API for programmatic access. + * https://www.ncbi.nlm.nih.gov/books/NBK25501/ + * + * Rate limits: 3 requests/second without API key, 10 requests/second with API key + * + * Configuration via environment variables: + * - NCBI_API_KEY: Your NCBI API key for higher rate limits + * - NCBI_EMAIL: Your email (recommended by NCBI for identification) + */ + +import { CliError } from '@jackwener/opencli/errors'; + +const EUTILS_BASE = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'; + +/** + * Get API key from environment variable + */ +function getApiKey(): string | undefined { + return process.env.NCBI_API_KEY; +} + +/** + * Get email from environment variable (recommended by NCBI for identification) + */ +function getEmail(): string | undefined { + return process.env.NCBI_EMAIL; +} + +/** + * Get rate limit delay in milliseconds + * With API key: 100ms (10 req/s) + * Without API key: 350ms (3 req/s) + */ +function getRateLimitMs(): number { + return getApiKey() ? 100 : 350; +} + +/** + * Build E-utilities API URL + * Automatically includes API key if configured + */ +export function buildEutilsUrl( + tool: string, + params: Record +): string { + const searchParams = new URLSearchParams(); + searchParams.append('db', 'pubmed'); + + // Allow callers to override retmode (e.g., EFetch needs retmode=xml) + if (!params.retmode) { + searchParams.append('retmode', 'json'); + } + + // Add API key if available + const apiKey = getApiKey(); + if (apiKey) { + searchParams.append('api_key', apiKey); + } + + // Add email if available (recommended by NCBI) + const email = getEmail(); + if (email) { + searchParams.append('email', email); + } + + Object.entries(params).forEach(([key, value]) => { + if (value !== undefined && value !== null && value !== '') { + searchParams.append(key, String(value)); + } + }); + + return `${EUTILS_BASE}/${tool}.fcgi?${searchParams.toString()}`; +} + +/** + * Fetch data from E-utilities API with dynamic rate limiting + * Rate limit adjusts based on API key presence: + * - With API key: 100ms delay (10 req/s) + * - Without API key: 350ms delay (3 req/s) + * + * @param tool - E-utilities tool name (esearch, esummary, efetch, etc.) + * @param params - Query parameters + * @param retmode - Response format: 'json' (default) or 'xml' (for EFetch) + */ +export async function eutilsFetch( + tool: string, + params: Record, + retmode: 'json' | 'xml' = 'json' +): Promise { + const mergedParams = { ...params, retmode }; + const url = buildEutilsUrl(tool, mergedParams); + + // Dynamic rate limiting based on API key + const rateLimitMs = getRateLimitMs(); + await new Promise(resolve => setTimeout(resolve, rateLimitMs)); + + const resp = await fetch(url); + + if (!resp.ok) { + // Handle specific error codes + if (resp.status === 429) { + throw new CliError( + 'RATE_LIMIT_EXCEEDED', + 'PubMed API rate limit exceeded', + 'You are making requests too quickly. Wait a moment and try again, or configure an API key (NCBI_API_KEY environment variable)' + ); + } + if (resp.status === 403) { + throw new CliError( + 'API_KEY_INVALID', + 'PubMed API key invalid or expired', + 'Check your NCBI_API_KEY environment variable' + ); + } + throw new CliError( + 'FETCH_ERROR', + `PubMed E-utilities API HTTP ${resp.status}`, + 'Check your query parameters or try again later' + ); + } + + return retmode === 'xml' ? resp.text() : resp.json(); +} + +/** + * Extract author list from PubMed article + */ +export function extractAuthors(authorList: any[] | undefined, maxAuthors: number = 3): string { + if (!authorList || !Array.isArray(authorList) || authorList.length === 0) { + return ''; + } + + const authors = authorList.slice(0, maxAuthors).map(author => { + // ESummary format: { name, authtype, clusterid } + if (author.name) { + return author.name; + } + // EFetch format: { lastname, forename, initials, collectivename } + if (author.collectivename) { + return author.collectivename; + } + const lastName = author.lastname || ''; + const initials = author.initials || ''; + return `${lastName} ${initials}`.trim(); + }); + + if (authorList.length > maxAuthors) { + authors.push('et al.'); + } + + return authors.join(', '); +} + +/** + * Extract DOI from article IDs + */ +export function extractDoi(articleIdList: any[] | undefined): string { + if (!articleIdList || !Array.isArray(articleIdList)) { + return ''; + } + + const doiEntry = articleIdList.find((id: any) => id.idtype === 'doi'); + return doiEntry?.value || ''; +} + +/** + * Build PubMed URL from PMID + */ +export function buildPubMedUrl(pmid: string): string { + return `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`; +} + +/** + * Truncate text with ellipsis + */ +export function truncateText(text: string, maxLength: number): string { + if (!text || text.length <= maxLength) { + return text || ''; + } + return text.slice(0, maxLength) + '...'; +} + +/** + * Format article type + */ +export function formatArticleType(pubTypeList: any[] | undefined): string { + if (!pubTypeList || !Array.isArray(pubTypeList) || pubTypeList.length === 0) { + return 'Journal Article'; + } + + // ESummary format: pubtype is a string array e.g. ["Journal Article"] + if (typeof pubTypeList[0] === 'string') { + return prioritizeArticleType(pubTypeList as string[]); + } + + // EFetch format: pubtype is an object array e.g. [{ ui: "D016428", value: "Journal Article" }] + const values = pubTypeList.map((pt: any) => pt.value).filter(Boolean); + return prioritizeArticleType(values); +} + +/** + * Prioritize article types - return the most specific/meaningful type + * "Journal Article" is generic, prefer more specific types like "Review", "Meta-Analysis", etc. + */ +export function prioritizeArticleType(pubTypes: string[]): string { + if (!pubTypes || pubTypes.length === 0) { + return 'Journal Article'; + } + + // Define priority: more specific types are preferred over generic "Journal Article" + const priorityOrder = [ + 'Systematic Review', + 'Meta-Analysis', + 'Review', + 'Randomized Controlled Trial', + 'Clinical Trial', + 'Case Reports', + 'Comparative Study', + 'Multicenter Study', + 'Observational Study', + 'Editorial', + 'Comment', + 'Letter', + 'News', + 'Published Erratum', + 'Guideline', + 'Practice Guideline', + 'Consensus Development Conference', + 'Congress', + 'Lecture', + 'Interview', + 'Biography', + 'Portrait', + 'Historical Article', + 'Classical Article', + 'Legal Case', + 'Legislation', + 'Government Publication', + 'Technical Report', + 'Dataset', + 'Evaluation Study', + 'Validation Study', + 'Research Support, Non-U.S. Gov\'t', + 'Research Support, U.S. Gov\'t, Non-P.H.S.', + 'Research Support, U.S. Gov\'t, P.H.S.', + 'Research Support, N.I.H., Extramural', + 'Research Support, N.I.H., Intramural', + 'Research Support, American Recovery and Reinvestment Act', + 'Journal Article', // Generic, low priority + ]; + + // Find the highest priority type present in the list + for (const priorityType of priorityOrder) { + const match = pubTypes.find(pt => + pt.toLowerCase() === priorityType.toLowerCase() + ); + if (match) { + return match; + } + } + + // If no priority match, return the first non-generic type or the first one + const nonGeneric = pubTypes.find(pt => + pt.toLowerCase() !== 'journal article' + ); + return nonGeneric || pubTypes[0]; +} + +/** + * Build search query with filters + */ +export function buildSearchQuery( + query: string, + filters: { + author?: string; + journal?: string; + yearFrom?: number; + yearTo?: number; + articleType?: string; + hasAbstract?: boolean; + hasFullText?: boolean; + humanOnly?: boolean; + englishOnly?: boolean; + } +): string { + let searchTerms: string[] = [query]; + + if (filters.author) { + searchTerms.push(`${filters.author}[Author]`); + } + + if (filters.journal) { + searchTerms.push(`${filters.journal}[Journal]`); + } + + if (filters.yearFrom || filters.yearTo) { + const from = filters.yearFrom || '1900'; + const to = filters.yearTo || new Date().getFullYear(); + searchTerms.push(`${from}:${to}[PDAT]`); + } + + if (filters.articleType) { + searchTerms.push(`${filters.articleType}[PT]`); + } + + if (filters.hasAbstract) { + searchTerms.push('hasabstract[text]'); + } + + if (filters.hasFullText) { + searchTerms.push('free full text[sb]'); + } + + if (filters.humanOnly) { + searchTerms.push('humans[mesh]'); + } + + if (filters.englishOnly) { + searchTerms.push('english[lang]'); + } + + return searchTerms.join(' AND '); +} diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index c2614752..b3f12f0d 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -109,6 +109,7 @@ export default defineConfig({ { text: 'Xiaoyuzhou', link: '/adapters/browser/xiaoyuzhou' }, { text: 'Yahoo Finance', link: '/adapters/browser/yahoo-finance' }, { text: 'arXiv', link: '/adapters/browser/arxiv' }, + { text: 'PubMed', link: '/adapters/browser/pubmed' }, { text: 'paperreview.ai', link: '/adapters/browser/paperreview' }, { text: 'Barchart', link: '/adapters/browser/barchart' }, { text: 'Hugging Face', link: '/adapters/browser/hf' }, diff --git a/docs/adapters/browser/pubmed.md b/docs/adapters/browser/pubmed.md new file mode 100644 index 00000000..7688763a --- /dev/null +++ b/docs/adapters/browser/pubmed.md @@ -0,0 +1,59 @@ +# PubMed + +**Mode**: 🌐 Public · **Domain**: `pubmed.ncbi.nlm.nih.gov` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli pubmed search` | Search PubMed articles with advanced filters | +| `opencli pubmed article` | Get detailed article metadata by PMID | +| `opencli pubmed author` | Search by author name and affiliation | +| `opencli pubmed citations` | Get citation relationships | +| `opencli pubmed related` | Find semantically similar articles | + +## Usage Examples + +```bash +# Search for articles +opencli pubmed search "machine learning cancer" --year-from 2023 --has-abstract + +# Get article details by PMID +opencli pubmed article 37780221 + +# Get full abstract without truncation +opencli pubmed article 37780221 --full-abstract + +# JSON output +opencli pubmed search "COVID-19 treatment" -f json + +# Find related articles with similarity scores +opencli pubmed related 37780221 --score + +# Citation analysis +opencli pubmed citations 37780221 --direction citedby --limit 50 +``` + +## Prerequisites + +- No browser required — uses NCBI E-utilities public API +- Optional: Set `NCBI_API_KEY` environment variable for higher rate limits (10 req/s vs 3 req/s) + +## Rate Limits + +| Without API Key | With API Key | +|-----------------|--------------| +| 3 requests/second | 10 requests/second | + +### Getting an NCBI API Key + +1. Create an NCBI account at https://www.ncbi.nlm.nih.gov/account/ +2. Go to https://www.ncbi.nlm.nih.gov/account/settings/ +3. Generate an API key + +### Configuring Your API Key + +```bash +export NCBI_API_KEY=YOUR_API_KEY +export NCBI_EMAIL=your@email.com +``` diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 95473819..f439a779 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -72,6 +72,7 @@ Run `opencli list` for the live registry. | **[xiaoyuzhou](./browser/xiaoyuzhou)** | `podcast` `podcast-episodes` `episode` | 🌐 Public | | **[yahoo-finance](./browser/yahoo-finance)** | `quote` | 🌐 Public | | **[arxiv](./browser/arxiv)** | `search` `paper` | 🌐 Public | +| **[pubmed](./browser/pubmed)** | `search` `article` `author` `citations` `related` | 🌐 Public | | **[paperreview](./browser/paperreview)** | `submit` `review` `feedback` | 🌐 Public | | **[barchart](./browser/barchart)** | `quote` `options` `greeks` `flow` | 🌐 Public | | **[hf](./browser/hf)** | `top` | 🌐 Public |