The toxicodb brick provides data from toxicodb.ca. In this post, we’ll use BioBricks.ai to look up gene and compound interaction data. To start, install biobricks.ai and then toxicodb:
import biobricks as bb, pyspark, subprocess, pandas as pdimport pyspark, pyspark.sql, pyspark.sql.functions as F, pyspark.sql.types as Tsubprocess.run("biobricks install toxicodb", shell=True)spark = pyspark.sql.SparkSession.builder.config("spark.driver.memory","4g").getOrCreate()toxicodb = bb.assets('toxicodb')
tbls = [{"table": table, "count": spark.read.parquet(path).count()} for table, path in toxicodb.__dict__.items()]pd.DataFrame(tbls).sort_values("count", ascending=False)
import plotly.express as pxfrom scipy.cluster.hierarchy import linkage, leaves_listfrom scipy.spatial.distance import pdist# Assuming 'hmn' is pre-defined with 'compound_name', 'gene_symbol', 'expr_change'# Calculate high variance genes and filter the dataframehighvar_genes = hmn.groupby('gene_symbol')['expr_change'].var().nlargest(200)hvdf = hmn[hmn['gene_symbol'].isin(highvar_genes.index)]# Pivot the DataFramepivot_df = hvdf.pivot(index="compound_name", columns="gene_symbol", values="expr_change").fillna(0)# Compute clustersrow_clusters = linkage(pdist(pivot_df, 'euclidean'), method='average')col_clusters = linkage(pdist(pivot_df.T, 'euclidean'), method='average')# Determine the order of rows and columns based on the clustersrow_order = leaves_list(row_clusters)col_order = leaves_list(col_clusters)# Reorder the DataFrame according to the clusteringclustered_df = pivot_df.iloc[row_order, col_order]# Create the heatmap using Plotly Expresslbls =dict(x="", y="", color="Expression Change")title ="TGGATES Gene Expression Change Over Dose in Human Cells"fig = px.imshow(clustered_df, labels=lbls, x=clustered_df.columns, y=clustered_df.index, aspect="auto", title=title)# Update the layout for transparent background and white textfig.update_layout( xaxis={'side': 'bottom', 'title_standoff': 10, 'color': 'white'}, yaxis={'title_standoff': 10, 'color': 'white'}, title={'text': title, 'x': 0.5, 'xanchor': 'center', 'font': {'color': 'white'}}, plot_bgcolor='rgba(0,0,0,0)', # Transparent background paper_bgcolor='rgba(0,0,0,0)', # Transparent surrounding margin=dict(l=20, r=20, t=50, b=20), font=dict(color='white', size=12) # Set font color to white)# Show the figurefig.show()
Unable to display output for mime type(s): application/vnd.plotly.v1+json