Introduction

This R Markdown is meant to facilitate the reproducibility of the findings in https://docs.google.com/document/d/166X4o_6HegeS0uUHRa4ahKedkaVEMeiRagCm7T9nOVU/edit?usp=sharing. Here we specifically focus on the Allen Brain Atlas neuronal projection and Slide-seq data, as detailed in Figures 5.

First, we load relevant packages.

library(nat)
library(plyr)
library(cocoframer)
library(purrr)
library(rgl)
library(RNifti)
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(tidyr)
library(plotly)
library(data.table)
library(dplyr)
library(gplots)
library(stringr)
library(RColorBrewer)
library(paletteer)
'%notin%' = Negate('%in%')

Figure 5

Calculating Whole-brain Projection Scores for each Molecular Cell Type

samples = list.files("Data/Projection/DataFiles/")
samples = grep("Intensity", samples, value = TRUE)
slideseq = readRDS("Data/Vascular/SupportFiles/100um_Celltypes_in_each_Voxel_and_totalsums.rds")
slideseq = slideseq[slideseq[,"total_sums"] != 0,] 
#For every sample in this directory
for(x in samples){
  projection = readRDS(paste0("Data/Projection/DataFiles/",x))
  intensity_type = left_join(slideseq, projection)
  cells = colnames(intensity_type)[1:1937]
  running_list = c()
  for (type in cells){
    ratio = intensity_type[,type]/intensity_type[, "total_sums"]
    int_score = intensity_type[,"energy"]
    products = ratio*int_score
    tot_score = sum(products)/ sum(intensity_type[,type] != 0)
    names(tot_score) = type
    running_list = c(running_list, tot_score)
  }
  pro = sub("Slideseq_registered_projection_", "", x)
  pro = sub("_100um_Intensity.rds", "", pro)
  
  saveRDS(running_list, paste0("Data/Projection/Results/ProjectionActivationScores_", pro, "_WholeBrain_100um_Slideseq.rds"))
}
#Take all of the projection scores calculated across cell types for the whole brain and summarize them into a single dataframe
#List files from the directory
wholeBrain = list.files("Data/Projection/Results/")
wholeBrain = grep("WholeBrain", wholeBrain, value = TRUE)
for(file in 1:length(wholeBrain)){
  scores = readRDS(paste0("Data/Projection/Results/", wholeBrain[file])) %>% data.frame()
  colnames(scores)  =  str_split(wholeBrain[file], "_", n = Inf, simplify = FALSE)[[1]][2]
  scores$CellTypes = rownames(scores)
  if(file == 1){
    runningDF = scores
  } else {
    runningDF = left_join(runningDF, scores)
  }
}
saveRDS(runningDF, "Data/Projection/Results/Whole_Brain_Projection_Matrix.rds")

To create a heatmap of the projection scores, we create a matrix of projections by cell types

#Read in projection matrix
wbMatrix = readRDS("Data/Projection/Results/Whole_Brain_Projection_Matrix.rds") #1937 by 499
rownames(wbMatrix) = wbMatrix$CellTypes
wbMatrix = t(wbMatrix)
wbMatrix = wbMatrix[-2,]
projections = rownames(wbMatrix)
wbMatrix <-  data.frame(apply(wbMatrix, 2, function(x) as.numeric(as.character(x))))  #Here, we convert character values into numeric values
wbMatrix = as.matrix(wbMatrix)
rownames(wbMatrix) = projections
#Eliminate cell types that are completely zero
wbMatrix <- wbMatrix[,colSums(is.na(wbMatrix))<nrow(wbMatrix)] #498 by 1891
#Read in our cell type region assignment dataframe. This has been filtered to remove cell types that occupy less than 10 voxels, and filtered to contain only neuronal cell types
collective = readRDS("Data/Projection/SupportFiles/CellType_RegionAssignment.rds")
# Keep only neuronal cell types that occupy a relevant number of voxels
wbMatrix = wbMatrix[,collective$CellType]
#Get Colors
cols = paletteer_d("ggsci::default_ucscgb")
regions = readRDS("Data/Projection/SupportFiles/ProjectionIds_Regions_Colors.rds")
colConvert = data.frame(unique(regions$BroadClass), c(cols[1:7],  cols[9], cols[14:15], cols[17], cols[20]))
colnames(colConvert) = c("BroadClass", "Cols")
regions = left_join(regions, colConvert)
# Sort the matrix by injection region
regions = regions[order(regions$BroadClass),]
wbMatrix = wbMatrix[match(regions$id, rownames(wbMatrix)),]
regionalColors = regions$Cols[match(regions$id, rownames(wbMatrix))]
# Assign colors to the columns (i.e. cell types) that reflect the spatial origin of a specific cell type
colnames(collective) = c("CellType", "BroadClass")
celltypeConvert = left_join(collective, colConvert)
celltypeConvert = celltypeConvert[order(celltypeConvert$BroadClass),]
wbMatrix = wbMatrix[, match(celltypeConvert$CellType, colnames(wbMatrix))]
centroidCols = celltypeConvert$Cols[match(celltypeConvert$CellType, colnames(wbMatrix))]
wbMatrix[wbMatrix >= 0.02] <- 0.02
#To capture the heatmap with the same dimensions observed in the publication, uncomment the following line and a dev.off() line as well
#png(width = 1000, height = 498, "/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/Results/Heatmap_0.02_Thresh_ColumnsClusteredbyHeatmap_addDendrogram.png")
heatmap.2(wbMatrix,scale="none",  dendrogram="column", trace="none", col = viridis::plasma(n = 20, direction = -1),margin=c(5,10), lwid=c(1.5,2.0), Rowv=FALSE,labRow = FALSE, labCol = FALSE, RowSideColors =  regionalColors, ColSideColors = centroidCols)

Next, we narrow our focus to a single region: the Thalamus.

#Calculate projection scores for only the Thalamus
meta = readRDS("Data/cFos/SupportFiles/Slideseq_beads_CCF_Registered_TopStruct.rds")
meta = filter(meta, meta$TopStruct == "TH")
slideseq = readRDS("Data/Vascular/SupportFiles/100um_Celltypes_in_each_Voxel_and_totalsums.rds")
slideseq = slideseq[slideseq[,"total_sums"] != 0,] 
beadVoxel = readRDS("Data/Projection/SupportFiles/SlideseqBeads_Voxel_Assignments_and_Coords.rds")
beadVoxel = filter(beadVoxel, beadVoxel$BeadBarcode %in% meta$BeadBarcode)
slideseq = filter(slideseq, slideseq$Voxel_ID %in% beadVoxel$Voxel_ID)
samples = list.files("Data/Projection/DataFiles/")
samples = grep("Intensity", samples, value = TRUE)
#For every sample in this directory
for(x in samples){
  projection = readRDS(paste0("Data/Projection/DataFiles/",x))
  intensity_type = left_join(slideseq, projection)
  cells = colnames(intensity_type)[1:1937]
  running_list = c()
  for (type in cells){
    ratio = intensity_type[,type]/intensity_type[, "total_sums"]
    int_score = intensity_type[,"energy"]
    products = ratio*int_score
    tot_score = sum(products)/ sum(intensity_type[,type] != 0)
    names(tot_score) = type
    running_list = c(running_list, tot_score)
  }
  pro = sub("Slideseq_registered_projection_", "", x)
  pro = sub("_100um_Intensity.rds", "", pro)
  
  saveRDS(running_list, paste0("Data/Projection/Results/ProjectionActivationScores_", pro, "_Thalamus_100um_Slideseq.rds"))
}
#Take all of the projection scores calculated across cell types for the thalamus and summarize them into a single data frame
#List files from the directory
thFiles = list.files("Data/Projection/Results/")
thFiles = grep("Thalamus", thFiles, value = TRUE)
for(file in 1:length(thFiles)){
  scores = readRDS(paste0("Data/Projection/Results/", thFiles[file])) %>% data.frame()
  colnames(scores)  =  str_split(thFiles[file], "_", n = Inf, simplify = FALSE)[[1]][2]
  scores$CellTypes = rownames(scores)
  if(file == 1){
    runningDF = scores
  } else {
    runningDF = left_join(runningDF, scores)  
  }
  print(str_split(thFiles[file], "_", n = Inf, simplify = FALSE)[[1]][2])
}
saveRDS(runningDF, "Data/Projection/Results/Thalamus_Projection_Matrix.rds")

We also look at just the hypothalamus

#Calculate projection scores for only the Thalamus
meta = readRDS("Data/cFos/SupportFiles/Slideseq_beads_CCF_Registered_TopStruct.rds")
meta = filter(meta, meta$TopStruct == "HY")
slideseq = readRDS("Data/Vascular/SupportFiles/100um_Celltypes_in_each_Voxel_and_totalsums.rds")
slideseq = slideseq[slideseq[,"total_sums"] != 0,] 
beadVoxel = readRDS("Data/Projection/SupportFiles/SlideseqBeads_Voxel_Assignments_and_Coords.rds")
beadVoxel = filter(beadVoxel, beadVoxel$BeadBarcode %in% meta$BeadBarcode)
slideseq = filter(slideseq, slideseq$Voxel_ID %in% beadVoxel$Voxel_ID)
samples = list.files("Data/Projection/DataFiles/")
samples = grep("Intensity", samples, value = TRUE)
#For every sample in this directory
for(x in samples){
  projection = readRDS(paste0("Data/Projection/DataFiles/",x))
  intensity_type = left_join(slideseq, projection)
  cells = colnames(intensity_type)[1:1937]
  running_list = c()
  for (type in cells){
    ratio = intensity_type[,type]/intensity_type[, "total_sums"]
    int_score = intensity_type[,"energy"]
    products = ratio*int_score
    tot_score = sum(products)/ sum(intensity_type[,type] != 0)
    names(tot_score) = type
    running_list = c(running_list, tot_score)
  }
  pro = sub("Slideseq_registered_projection_", "", x)
  pro = sub("_100um_Intensity.rds", "", pro)
  
  saveRDS(running_list, paste0("Data/Projection/Results/ProjectionActivationScores_", pro, "_Hypothalamus_100um_Slideseq.rds"))
}
#Take all of the projection scores calculated across cell types for the hypothalamus and summarize them into a single data frame
#List files from the directory
hyFiles = list.files("/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/Results/")
hyFiles = grep("Hypothalamus", hyFiles, value = TRUE)
for(file in 1:length(hyFiles)){
  scores = readRDS(paste0("/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/Results/", hyFiles[file])) %>% data.frame()
  colnames(scores)  =  str_split(hyFiles[file], "_", n = Inf, simplify = FALSE)[[1]][2]
  scores$CellTypes = rownames(scores)
  if(file == 1){
    runningDF = scores
  } else {
    runningDF = left_join(runningDF, scores)  
  }
  print(str_split(hyFiles[file], "_", n = Inf, simplify = FALSE)[[1]][2])
}
saveRDS(runningDF, "/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/Results/Hypothalamus_Projection_Matrix.rds")

We hand drew Figure 5B using Adobe Illustrator. However, the information to recreate this plot includes: the average projection scores for the four highest scoring molecular cell types for each isocortical projection region, the number of thalamic beads assigned to each of these cell types, and the anatomical region of the thalamus where the majority of the beads for each cell type are found.

#Read in the injection sites of each projection
decodeRegions = readRDS("Data/Projection/SupportFiles/Region_Assignments_ABA_Projection_IDS.rds")
#Now we need the scores of each projection
allThalamus = readRDS("Data/Projection/Results/Thalamus_Projection_Matrix.rds")
rownames(allThalamus) = allThalamus$CellTypes
#subset for projections that have an injection site in the isocortex
decodeRegions = filter(decodeRegions, decodeRegions$CorticalStatus == "Isocortex")
allThalamus = allThalamus[,subset(colnames(allThalamus), colnames(allThalamus) %in% decodeRegions$ProjectionID)]
#Remove cell types that didn't have a score for any of these projections
allThalamus = allThalamus %>% filter(!if_all(colnames(allThalamus)[1]:colnames(allThalamus)[126], is.na))
cells = rownames(allThalamus) %>% data.frame() %>% `colnames<-`("Cell Types") %>% separate("Cell Types", into = c("Broad", "Narrow"), sep = "_", remove = FALSE) 
cells = filter(cells, cells$Broad %in%  c("Ex","Inh","CholInh","SerEx", "Chol", "DopEx","ExInh", "CholEx" , "Ng", "NG"))
allThalamus = t(allThalamus[cells$`Cell Types`,])
#Now we need to calculate the average projection score for each molecular cell type by region, so we should end up with a matrix 19 by 576. 
decodeRegions = decodeRegions[match(rownames(allThalamus),decodeRegions$ProjectionID),]
options(scipen=999)
#We start by summing all scores for the neuronal projections that share an injection region for each molecular cell type 
rowsumThalamus = rowsum(allThalamus, decodeRegions$BroadRegion) %>% data.frame() %>% mutate(Region = rownames(.)) %>% pivot_longer(cols = "Ex_Defb1_Arhgap36": "Ex_Gabra6_Optc", names_to = "CellType", values_to = "SumScore" )
#Get counts for each region
numRegion = decodeRegions %>% group_by(BroadRegion) %>% tally() %>% data.frame() %>% `colnames<-`(c("Region", "n" ))
#We then divide the sum score for each molecular cell type for each injection region by the number of injections used to calculate the sum.
rowsumThalamus = left_join(rowsumThalamus, numRegion) %>% mutate(Average = SumScore/n)
# Now that we have the average projection score for each molecular cell type in each region, we need to count the number of beads each cell type
#has been confidentally assigned to within the thalamus. This uses the Slide-seq matrix, which can be quite memory intensive to load. Therefore,
# we provide the code we ran to generate the saved results, and recommend using this saved objects to continue your analysis. 
# slideseq = readRDS("/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/SupportFiles/Slideseq_metadata.rds")
# slideseq = select(slideseq, -c("nFeature_Spatial", "nCount_Spatial")) %>% filter(TopStruct == "TH")
# rctd = readRDS("/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/SupportFiles/Slideseq_beads_celltypes.rds")
# rctd = rctd[subset(rownames(rctd), rownames(rctd) %in% slideseq$BeadBarcode),] #176913 by 1937
# voxelSums = colSums(rctd) %>% data.frame() %>% `colnames<-`("NumBeads") %>% mutate(Celltypes = rownames(.)) %>% filter(NumBeads >= 30)
# saveRDS(voxelSums, "/nfs/turbo/umms-welchjd/BRAIN_initiative/Final_integration_workflow/Markdowns/Data/Projection/SupportFiles/Thalamus_celltypes_greaterthan30beads.rds")
beadSums = readRDS("Data/Projection/SupportFiles/Thalamus_celltypes_greaterthan30beads.rds")
#Limit to cell types in 30 or more voxels
rowsumThalamus = filter(rowsumThalamus, rowsumThalamus$CellType %in% beadSums$Celltypes)
colnames(beadSums)[2] = "CellType"
rowsumThalamus = left_join(rowsumThalamus, beadSums)
#Limit to cell types where the signal is greater than 0.03
rowsumThalamus = filter(rowsumThalamus, rowsumThalamus$Average > 0.03)
highTypes = rowsumThalamus %>% arrange(desc(Average)) %>% group_by((Region)) %>% slice(1:4)
# Add a region assignment for each molecular cell type
regAssigned = readRDS("Data/Projection/SupportFiles/CellType_Thalamus_Region_Assignments.rds")
highTypes = left_join(highTypes, regAssigned)
#Scale the lines to make them visible
highTypes = mutate(highTypes, AverageRescaled = Average*20)

Next, we want to examine the difference in projection scores for a single molecular cell type for projections with injection sites in the same region, but different subregions.

# First, we read in a list of projections that have injection site locations within the isocortex, and have a significant presence (>10 voxel) presence in the thalamus
decodeRegions = readRDS("Data/Projection/SupportFiles/ProjectionInjectionsIDs_TH_withABAcoords.rds")
decodeRegions = filter(decodeRegions, decodeRegions$BroadRegion == "SSp")
#Read in the thalamus projection scores
#The projection score of each projection for the cell type "Ex_Lypd6b_Dipk1c_2"
allThalamus = readRDS("Data/Projection/Results/Thalamus_Projection_Matrix.rds")
allThalamus = allThalamus[allThalamus[,"CellTypes"] == "Ex_Lypd6b_Dipk1c_2",subset(colnames(allThalamus), colnames(allThalamus) %in% decodeRegions$ProjectionID)]
allThalamus = t(allThalamus)
allThalamus = data.frame(allThalamus)
allThalamus$ProjectionID = as.numeric(rownames(allThalamus))
#Add coordinates
allThalamus = left_join(decodeRegions, allThalamus)
allThalamus = allThalamus[order(allThalamus$Abbreviation),]
colnames(allThalamus)[8] ="Ex_Lypd6b_Dipk1c_2"

#Get the right colors
cols = paletteer_d("ggsci::default_ucscgb")
colorPalette = c(cols[2], cols[6], cols[1], cols[7], cols[4], cols[3], cols[17])
#Load the ABA mesh
structures <- c("root", "SSp")
mesh_list <- map(structures, ccf_2017_mesh)
names(mesh_list) <- structures

fig = plot_ly()
fig = fig %>% add_markers( data= allThalamus, x = ~X, y = ~Y, z = ~Z, color = ~Abbreviation, size = ~Ex_Lypd6b_Dipk1c_2, colors = colorPalette,
                           marker = list(symbol = 'circle', sizemode = 'diameter'), sizes = c(5, 85)) 
fig = fig %>% add_trace(data = mesh_list, type = 'mesh3d', x=mesh_list$root$vb[1,], 
                        y=mesh_list$root$vb[2,],
                        z=mesh_list$root$vb[3,], 
                        i=mesh_list$root$it[1,]-1, 
                        j=mesh_list$root$it[2,]-1, 
                        k =mesh_list$root$it[3,]-1, 
                        facecolor = rep("gray",length(mesh_list$root$vb[2,])*2 ),
                        opacity = 0.1) 
fig = fig %>% add_trace(data = mesh_list, type = 'mesh3d', x=mesh_list$SSp$vb[1,], 
                        y=mesh_list$SSp$vb[2,],
                        z=mesh_list$SSp$vb[3,], 
                        i=mesh_list$SSp$it[1,]-1, 
                        j=mesh_list$SSp$it[2,]-1, 
                        k =mesh_list$SSp$it[3,]-1, 
                        facecolor = rep("forestgreen",length(mesh_list$SSp$vb[2,])*2 ),
                        opacity = 0.05) 

fig <- fig %>%  layout(title = "Projection Scores for Each SubRegion of the SSp",
                           scene = list(xaxis = list(title = '', showgrid = F , zerolinecolor = '#ffff', showticklabels=FALSE),
                                        yaxis = list(title = '', showgrid = F,zerolinecolor = '#ffff', showticklabels=FALSE),
                                        zaxis = list(title = '', showgrid = F, zerolinecolor = '#ffff', showticklabels=FALSE)))
fig