Spaces:
Running
Running
| authorPubEdges <- function(keywords,pub_start_date,pub_end_date){ | |
| keywords <- keywords | |
| pub_start_date <- pub_start_date | |
| pub_end_date <- pub_end_date | |
| # create search engine function | |
| search_engine <- function(keywords,pub_start_date,pub_end_date){ | |
| suppressPackageStartupMessages(library(openalexR)) | |
| suppressPackageStartupMessages(library(tidyverse)) | |
| options(openalexR.mailto = "[email protected]") | |
| # search engine | |
| works_search <- oa_fetch( | |
| entity = "works", | |
| title.search = keywords, | |
| cited_by_count = ">50", | |
| from_publication_date = pub_start_date, | |
| to_publication_date = pub_end_date, | |
| options = list(sort = "cited_by_count:desc"), | |
| verbose = FALSE | |
| ) | |
| return(works_search) | |
| } | |
| # define nodes function | |
| authorPubNodes <- function(keywords,pub_start_date,pub_end_date){ | |
| keywords <- keywords | |
| pub_start_date <- pub_start_date | |
| pub_end_date <- pub_end_date | |
| # create search engine function | |
| search_engine <- function(keywords,pub_start_date,pub_end_date){ | |
| suppressPackageStartupMessages(library(openalexR)) | |
| suppressPackageStartupMessages(library(tidyverse)) | |
| options(openalexR.mailto = "[email protected]") | |
| # search engine | |
| works_search <- oa_fetch( | |
| entity = "works", | |
| title.search = keywords, | |
| cited_by_count = ">50", | |
| from_publication_date = pub_start_date, | |
| to_publication_date = pub_end_date, | |
| options = list(sort = "cited_by_count:desc"), | |
| verbose = FALSE | |
| ) | |
| return(works_search) | |
| } | |
| search_data <- search_engine(keywords,pub_start_date,pub_end_date) | |
| # grab authors and group them according to collaboration | |
| authors_collaboration_groups <- list() | |
| for (i in 1:nrow(search_data)){ | |
| authors_collaboration_groups[[i]] <- search_data$author[[i]][2] | |
| } | |
| # grab all authors | |
| all_authors <- c() | |
| for (i in 1:length(authors_collaboration_groups)) { | |
| all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]]) | |
| } | |
| # get length of each authors collaboration | |
| authors_length <- c() | |
| for(authors in 1:length(authors_collaboration_groups)){ | |
| authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow()) | |
| } | |
| # grab all publications | |
| publications <- list() | |
| for (i in 1:nrow(search_data)){ | |
| publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i]) | |
| } | |
| # place all publications in a vector | |
| all_publications <- c() | |
| for(i in 1:length(publications)){ | |
| all_publications <- c(all_publications,publications[[i]]) | |
| } | |
| # create author_to_publication data frame | |
| authors_to_publications <- data.frame( | |
| Authors = all_authors, | |
| Publications = all_publications | |
| ) | |
| # stack the df so that authors and publications | |
| # are together as one column | |
| stacked_df <- stack(authors_to_publications) | |
| stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows | |
| stacked_df <- stacked_df[-2] # delete second column in df | |
| # create author_publications_nodes df | |
| author_publication_nodes <- data.frame( | |
| Id = 1:nrow(stacked_df), | |
| Nodes = stacked_df$values, | |
| Label = stacked_df$values | |
| ) | |
| return(author_publication_nodes) | |
| } | |
| # run author nodes function | |
| author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date) | |
| # run search engine | |
| search_data <- search_engine(keywords,pub_start_date,pub_end_date) | |
| # grab authors and group them according to collaboration | |
| authors_collaboration_groups <- list() | |
| for (i in 1:nrow(search_data)){ | |
| authors_collaboration_groups[[i]] <- search_data$author[[i]][2] | |
| } | |
| # grab all authors | |
| all_authors <- c() | |
| for (i in 1:length(authors_collaboration_groups)) { | |
| all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]]) | |
| } | |
| # get length of each authors collaboration | |
| authors_length <- c() | |
| for(authors in 1:length(authors_collaboration_groups)){ | |
| authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow()) | |
| } | |
| # grab all publications | |
| publications <- list() | |
| for (i in 1:nrow(search_data)){ | |
| publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i]) | |
| } | |
| # place all publications in a vector | |
| all_publications <- c() | |
| for(i in 1:length(publications)){ | |
| all_publications <- c(all_publications,publications[[i]]) | |
| } | |
| # create author_to_publication data frame | |
| authors_to_publications <- data.frame( | |
| Authors = all_authors, | |
| Publications = all_publications | |
| ) | |
| # create edges data frame | |
| author_publication_edges <- data.frame( | |
| Source = authors_to_publications$Authors, | |
| Target = authors_to_publications$Publications, | |
| Type = "directed", | |
| Weight = 1.0 | |
| ) | |
| # replace edges with id from nodes data set | |
| replace_edges_with_ids <- function(author_edges, author_nodes) { | |
| # Create a lookup table for node values to their corresponding Ids | |
| node_lookup <- setNames(author_nodes$Id, author_nodes$Node) | |
| # Use the lookup table to replace Source and Target values in author_edges | |
| author_edges$Source <- node_lookup[author_edges$Source] | |
| author_edges$Target <- node_lookup[author_edges$Target] | |
| return(author_edges) | |
| } | |
| # Call the function with your data frames | |
| author_publication_edges <- replace_edges_with_ids(author_publication_edges, author_nodes) | |
| return(author_publication_edges) | |
| } | |