{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ed56c80a-2066-42ec-a199-3576aae50968",
   "metadata": {},
   "source": [
    "# SDK_Document_Lens_SC"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95aea447-4b8c-43d0-a00b-84cc3cbe7a8e",
   "metadata": {},
   "source": [
    "## Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1db9dd1-0d5e-48d7-b4ab-0934a2bad2cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python3 -m pip install bioturing_connector"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "deb8a6d8-dbbe-49bd-854d-0172eb84928a",
   "metadata": {},
   "source": [
    "## 1. Connect to host server"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "df88f7b3-de4a-4e6a-8730-c6275c260ce4",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-info\"> <b>Must run this step before any further analyses</b> </div>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09b17bd8-c0d1-4f9c-b7d2-620774f2c6e4",
   "metadata": {},
   "source": [
    "User's token is generated from host website"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "25c56829-e579-4354-8acf-3ce658c36212",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from bioturing_connector.typing import Species\n",
    "from bioturing_connector.typing import ChunkSize\n",
    "from bioturing_connector.typing import StudyType\n",
    "from bioturing_connector.typing import StudyUnit\n",
    "from bioturing_connector.typing import InputMatrixType\n",
    "from bioturing_connector.lens_sc_connector import LensSCConnector\n",
    "\n",
    "connector = LensSCConnector(\n",
    "  host=\"https://talk2data.bioturing.com/lens_sc/\",\n",
    "  token=\"cb8a76d79a264a55af79a2991f982ef7\",\n",
    "  ssl=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "cb203d7a-3fee-4bce-9bb1-1df74c3b29c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connecting to host at https://talk2data.bioturing.com/lens_sc/api/v1/test_connection\n",
      "Connection successful\n"
     ]
    }
   ],
   "source": [
    "connector.test_connection()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8789beab-c2bc-4c1c-a474-63f75f0ebf11",
   "metadata": {},
   "source": [
    "## 2. List groups, studies and s3"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "13ee4cb3-ee8d-4691-9078-81c5b6e5af27",
   "metadata": {},
   "source": [
    "### 2.1. Get info of available groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dcdc00de-3e39-43a9-89d2-3d82a336129e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': 'all_members',\n",
       "  'name': 'All members',\n",
       "  'visible': 1,\n",
       "  'creator': 'admin'},\n",
       " {'id': 'bioturing_public_studies',\n",
       "  'name': 'BioTuring Public Studies',\n",
       "  'visible': 1,\n",
       "  'creator': 'admin'},\n",
       " {'id': 'personal',\n",
       "  'name': 'Personal workspace',\n",
       "  'visible': 1,\n",
       "  'creator': 'admin'}]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_groups = connector.get_user_groups()\n",
    "user_groups"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cdd6ea7e-918f-4900-a575-e7de250d1fa3",
   "metadata": {},
   "source": [
    "### 2.2. List all available studies in a group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fe08fe63-b90b-4e8f-a06b-5b27b3e55fef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'uuid': '5c470f3b799d474e91d0ca65aec3cf56',\n",
       "  'study_title': 'TBD',\n",
       "  'study_hash_id': 'SMALL_COSMX',\n",
       "  'created_by': 'dev@bioturing.com'},\n",
       " {'uuid': '9b1d980887944d0199719ef8d3ddb17a',\n",
       "  'study_title': 'TBD',\n",
       "  'study_hash_id': 'XENIUM_BREAST_SMALL',\n",
       "  'created_by': 'dev@bioturing.com'}]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using group_id from step 2.1\n",
    "\n",
    "study_list = connector.get_all_studies_info_in_group(\n",
    "  group_id='personal',\n",
    "  species=Species.HUMAN.value,\n",
    ")\n",
    "study_list"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6f51af7e",
   "metadata": {},
   "source": [
    "### 2.3. List all s3 bucket of current user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47a1cdc1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': '505e49d2abee405f8a7b4ce2628d5270',\n",
       "  'bucket': 'bioturingdebug',\n",
       "  'prefix': ''},\n",
       " {'id': 'd938706094354d7eb4726d6c9b07de9c',\n",
       "  'bucket': 'talk2data',\n",
       "  'prefix': ''}]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "connector.get_user_s3()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f8eece6",
   "metadata": {},
   "source": [
    "### 2.4. List all shared s3 of a group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fc64d1d2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "connector.get_shared_s3_of_group('all_members')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b34a16e5-feff-457e-8a67-d9619a2c668a",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 3. Submit study"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f9a4d728-266c-493f-b598-84437b126f21",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\">NOTE: Get <b>group_id</b> from step <b>\"2.1. Get info of available groups\"</b></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8ec8dc96-658d-4f3f-99b0-ce15ee8dbf5e",
   "metadata": {},
   "source": [
    "### 3.1. Submit single cell - spatial dataset (COSMX, VISIUM, VIZGEN, ...)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "adbb4166-03c2-4423-b1ff-77cae7e7bf04",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### 3.1.1. Option 1: Submit study from s3"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2d14334c-787f-410a-8eaa-d9960b7fb05c",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "----\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "s3_id : str\n",
    "    ID of s3 bucket. Default: None\n",
    "    If s3_id is not provided, we will use the first s3 bucket configured on the platform.\n",
    "batch_info: List[dict]\n",
    "    File path and batch name information, the path DOES NOT include the bucket path configured on platform!\n",
    "    Example:\n",
    "      [{\n",
    "        'name': 'study_1',\n",
    "        'folder': 's3_path/study_folder',\n",
    "      }, {...}]\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "name: str\n",
    "    Name of the study.\n",
    "authors: List[str]\n",
    "    Authors of the study.\n",
    "abstract: str\n",
    "    Abstract of the study.\n",
    "species: str\n",
    "    Species of the study.\n",
    "    Support:  Species.HUMAN.value\n",
    "              Species.MOUSE.value\n",
    "              Species.PRIMATE.value\n",
    "              Species.OTHERS.value\n",
    "study_type: int\n",
    "    Format of the study\n",
    "    Support:  StudyType.VIZGEN.value\n",
    "              StudyType.COSMX.value\n",
    "              StudyType.XENIUM.value\n",
    "min_counts: int. Default: 0\n",
    "    Minimum number of counts required for a cell to pass filtering.\n",
    "min_genes: int. Default: 0\n",
    "    Minimum number of genes expressed required for a cell to pass filtering.\n",
    "max_counts: int. Default: inf\n",
    "    Maximum number of counts required for a cell to pass filtering.\n",
    "max_genes: int. Default: inf\n",
    "    Maximum number of genes expressed required for a cell to pass filtering.\n",
    "neg_controls_percentage: int. Default: 100\n",
    "    Maximum number of control/negative genes percentage required for a cell to pass filtering.\n",
    "    Ranging from 0 to 100\n",
    "  \n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "7d47fc3e-295a-4757-a6e4-79e8fa213381",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2023-09-26 08:29] Waiting in queue\n",
      "[2023-09-26 08:29] Downloading from s3: demo_data/small_cosmx/tx_file.csv\n",
      "[2023-09-26 08:29] Downloading from s3: demo_data/small_cosmx/R5779_TMA2-S6_fov_positions_file.csv\n",
      "[2023-09-26 08:29] Downloading from s3: demo_data/small_cosmx/CellLabels\n",
      "[2023-09-26 08:29] [List folder demo_data/small_cosmx/CellLabels] Files: demo_data/small_cosmx/CellLabels/CellLabels_F001.tif | demo_data/small_cosmx/CellLabels/CellLabels_F002.tif | demo_data/small_cosmx/CellLabels/CellLabels_F003.tif | demo_data/small_cosmx/CellLabels/CellLabels_F004.tif | demo_data/small_cosmx/CellLabels/CellLabels_F005.tif ; Folders: \n",
      "[2023-09-26 08:30] Downloading from s3: demo_data/small_cosmx/RawMorphologyImages\n",
      "[2023-09-26 08:30] [List folder demo_data/small_cosmx/RawMorphologyImages] Files: demo_data/small_cosmx/RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F001.TIF | demo_data/small_cosmx/RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F002.TIF | demo_data/small_cosmx/RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F003.TIF | demo_data/small_cosmx/RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F004.TIF | demo_data/small_cosmx/RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F005.TIF ; Folders: \n",
      "[2023-09-26 08:30] All files downloaded\n",
      "[2023-09-26 08:30] Reading batch: small_cosmx\n",
      "[2023-09-26 08:30] [small_cosmx] Preprocess data\n",
      "[2023-09-26 08:30] [small_cosmx] Indexing cell boundaries\n",
      "[2023-09-26 08:31] Finish: create_cell_boundaries_and_centers 54.29751372337341\n",
      "[2023-09-26 08:31] [small_cosmx] Indexing sample images\n",
      "[2023-09-26 08:35] Finish: indexing sample images 237.9231081008911\n",
      "[2023-09-26 08:35] [small_cosmx] Indexing transcripts\n",
      "[2023-09-26 08:40] Finish: create_cell_boundaries_and_centers 325.7730453014374\n",
      "[2023-09-26 08:40] [small_cosmx] Indexing matrix\n",
      "[2023-09-26 08:40] Finish batch: small_cosmx\n",
      "[2023-09-26 08:40] Preprocessing expression matrix: 12658 cells x 63702 genes\n",
      "[2023-09-26 08:40] Filtered: 11814 cells remain\n",
      "[2023-09-26 08:40] Waiting in queue (matrix processing) \n",
      "[2023-09-26 08:40] Normalizing expression matrix (matrix processing) \n",
      "[2023-09-26 08:40] Running PCA (matrix processing) \n",
      "[2023-09-26 08:40] Running venice binarizer (matrix processing) \n",
      "[2023-09-26 08:41] Study was successfully submitted\n",
      "[2023-09-26 08:41] DONE!!!\n",
      "Study submitted successfully!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_info = [{\n",
    "    'name': 'dataset1',\n",
    "    'folder': 's3_path/data_fol_1',\n",
    "  }, {\n",
    "    'name': 'dataset2',\n",
    "    'folder': 's3_path/data_fol_2',\n",
    "}]\n",
    "\n",
    "# --------\n",
    "\n",
    "## Demo submisison\n",
    "## The path DOES NOT include the bucket path configured on platform\n",
    "batch_info = [{\n",
    "    'name': 'small_cosmx',\n",
    "    'folder': 'demo_data/small_cosmx',\n",
    "  }]\n",
    "\n",
    "connector.submit_study_from_s3_lens_sc(\n",
    "  group_id='personal',\n",
    "  batch_info=batch_info,\n",
    "  study_id='COSMX_SMALL_DATASET',\n",
    "  name='This is my first study',\n",
    "  authors=['Huy Nguyen', 'Thao Truong'],\n",
    "  species=Species.HUMAN.value,\n",
    "  study_type=StudyType.COSMX.value,\n",
    "  min_genes=15,\n",
    "  neg_controls_percentage=5,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "77a5cca2-708d-4e12-84a1-fb8f5e2da94e",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### 3.1.2. Option 2: Submit study from local machine / server"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3dc20cec-48a3-45bd-910e-1b87c90f20b1",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "------\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "batch_info: List[dict]\n",
    "    File path and batch name information\n",
    "    Example:\n",
    "      [{\n",
    "        'name': 'dataset_1',\n",
    "        'folder': 'server_path/dataset_folder_1',\n",
    "      }, {...}]\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "name: str\n",
    "    Name of the study.\n",
    "authors: List[str]\n",
    "    Authors of the study.\n",
    "abstract: str\n",
    "    Abstract of the study.\n",
    "species: str\n",
    "    Species of the study.\n",
    "    Support:  Species.HUMAN.value\n",
    "              Species.MOUSE.value\n",
    "              Species.PRIMATE.value\n",
    "              Species.OTHERS.value\n",
    "study_type: int\n",
    "    Format of the study\n",
    "    Support:  StudyType.VIZGEN.value\n",
    "              StudyType.COSMX.value\n",
    "              StudyType.XENIUM.value\n",
    "min_counts: int. Default: 0\n",
    "    Minimum number of counts required for a cell to pass filtering.\n",
    "min_genes: int. Default: 0\n",
    "    Minimum number of genes expressed required for a cell to pass filtering.\n",
    "max_counts: int. Default: inf\n",
    "    Maximum number of counts required for a cell to pass filtering.\n",
    "max_genes: int. Default: inf\n",
    "    Maximum number of genes expressed required for a cell to pass filtering.\n",
    "neg_controls_percentage: int. Default: 100\n",
    "    Maximum number of control/negative genes percentage required for a cell to pass filtering.\n",
    "   Ranging from 0 to 100\n",
    "chunk_size: int\n",
    "    size of each separated chunk for uploading. Default: ChunkSize.CHUNK_100_MB.value\n",
    "    Support:\n",
    "          ChunkSize.CHUNK_5_MB.value\n",
    "          ChunkSize.CHUNK_100_MB.value\n",
    "          ChunkSize.CHUNK_500_MB.value\n",
    "          ChunkSize.CHUNK_1_GB.value\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "c6168f9e-d417-40c5-99c3-fce09036c607",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zipping neccesary files of batch [batch1]. \n",
      "Location: /mnt/gvol8080/data/SonVo/sonvo_ssd/sc_spatial/cosmx/small_cosmx/batch1.zip\n",
      "  adding: tx_file.csv (deflated 74%)\n",
      "  adding: R5779_TMA2-S6_fov_positions_file.csv (deflated 46%)\n",
      "  adding: CellLabels/ (stored 0%)\n",
      "  adding: CellLabels/CellLabels_F001.tif (deflated 31%)\n",
      "  adding: CellLabels/CellLabels_F002.tif (deflated 23%)\n",
      "  adding: CellLabels/CellLabels_F003.tif (deflated 23%)\n",
      "  adding: CellLabels/CellLabels_F004.tif (deflated 23%)\n",
      "  adding: CellLabels/CellLabels_F005.tif (deflated 22%)\n",
      "  adding: RawMorphologyImages/ (stored 0%)\n",
      "  adding: RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F001.TIF (deflated 7%)\n",
      "  adding: RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F002.TIF (deflated 6%)\n",
      "  adding: RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F003.TIF (deflated 9%)\n",
      "  adding: RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F004.TIF (deflated 6%)\n",
      "  adding: RawMorphologyImages/20230505_010716_S2_C902_P99_N99_F005.TIF (deflated 5%)\n",
      "/data/dev/SonVo/btr_connector_notebook\n",
      "Uploading all files to server...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "batch1.zip - chunk_0: 100MMB [00:00, 122MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_1: 100MMB [00:00, 117MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_2: 100MMB [00:00, 113MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_3: 100MMB [00:00, 117MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_4: 100MMB [00:00, 113MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_5: 100MMB [00:00, 119MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_6: 100MMB [00:00, 122MMB/s]                                                                                                                                                                                                \n",
      "batch1.zip - chunk_7:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                 | 71.8M/100M [00:01<00:00, 54.0MMB/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Delete zip files: [/mnt/gvol8080/data/SonVo/sonvo_ssd/sc_spatial/cosmx/small_cosmx/batch1.zip]\n",
      "[2023-09-26 08:49] Waiting in queue\n",
      "[2023-09-26 08:49] Reading batch: batch1\n",
      "[2023-09-26 08:49] [batch1] Preprocess data\n",
      "[2023-09-26 08:49] [batch1] Indexing cell boundaries\n",
      "[2023-09-26 08:50] Finish: create_cell_boundaries_and_centers 54.93383240699768\n",
      "[2023-09-26 08:50] [batch1] Indexing sample images\n",
      "[2023-09-26 08:54] Finish: indexing sample images 246.05648136138916\n",
      "[2023-09-26 08:54] [batch1] Indexing transcripts\n",
      "[2023-09-26 08:59] Finish: create_cell_boundaries_and_centers 327.1916997432709\n",
      "[2023-09-26 08:59] [batch1] Indexing matrix\n",
      "[2023-09-26 08:59] Finish batch: batch1\n",
      "[2023-09-26 08:59] Preprocessing expression matrix: 12658 cells x 63702 genes\n",
      "[2023-09-26 08:59] Filtered: 11814 cells remain\n",
      "[2023-09-26 08:59] Waiting in queue (matrix processing) \n",
      "[2023-09-26 08:59] Normalizing expression matrix (matrix processing) \n",
      "[2023-09-26 08:59] Running PCA (matrix processing) \n",
      "[2023-09-26 08:59] Running venice binarizer (matrix processing) \n",
      "[2023-09-26 08:59] Running t-SNE (matrix processing) \n",
      "[2023-09-26 08:59] Study was successfully submitted\n",
      "[2023-09-26 08:59] DONE!!!\n",
      "Study submitted successfully!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_info = [{\n",
    "    'name': 'batch1',\n",
    "    'folder': 'local_path/dataset_folder_1',\n",
    "  }, {\n",
    "    'name': 'batch2',\n",
    "    'folder': 'local_path/dataset_folder_2',\n",
    "  }, {...}]\n",
    "\n",
    "#----\n",
    "\n",
    "## Demo submission\n",
    "batch_info = [{\n",
    "    'name': 'batch1',\n",
    "    'folder': '/mnt/gvol8080/demo_data/cosmx/small_cosmx',\n",
    "}]\n",
    "connector.submit_study_from_local_lens_sc(\n",
    "  group_id='personal',\n",
    "  batch_info=batch_info,\n",
    "  study_id='COSMX_SMALL_DATASET',\n",
    "  name='This is my first study',\n",
    "  authors=['Huy Nguyen', 'Thao Truong'],\n",
    "  species=Species.HUMAN.value,\n",
    "  study_type=StudyType.COSMX.value,\n",
    "  min_genes=15,\n",
    "  neg_controls_percentage=5,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f6350747",
   "metadata": {},
   "source": [
    "#### 3.1.3. Option 3: Submit study with shared s3 of a group"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2f4c71fa",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "----\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "shared_s3_id : str\n",
    "    ID of s3 bucket.\n",
    "batch_info: List[dict]\n",
    "    File path and batch name information, the path DOES NOT include the bucket path configured on platform!\n",
    "    Example:\n",
    "      [{\n",
    "        'name': 'study_1',\n",
    "        'folder': 's3_path/study_folder',\n",
    "      }, {...}]\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "name: str\n",
    "    Name of the study.\n",
    "authors: List[str]\n",
    "    Authors of the study.\n",
    "abstract: str\n",
    "    Abstract of the study.\n",
    "species: str\n",
    "    Species of the study.\n",
    "    Support:  Species.HUMAN.value\n",
    "              Species.MOUSE.value\n",
    "              Species.PRIMATE.value\n",
    "              Species.OTHERS.value\n",
    "study_type: int\n",
    "    Format of the study\n",
    "    Support:  StudyType.VIZGEN.value\n",
    "              StudyType.COSMX.value\n",
    "              StudyType.XENIUM.value\n",
    "min_counts: int. Default: 0\n",
    "    Minimum number of counts required for a cell to pass filtering.\n",
    "min_genes: int. Default: 0\n",
    "    Minimum number of genes expressed required for a cell to pass filtering.\n",
    "max_counts: int. Default: inf\n",
    "    Maximum number of counts required for a cell to pass filtering.\n",
    "max_genes: int. Default: inf\n",
    "    Maximum number of genes expressed required for a cell to pass filtering.\n",
    "neg_controls_percentage: int. Default: 100\n",
    "    Maximum number of control/negative genes percentage required for a cell to pass filtering.\n",
    "    Ranging from 0 to 100\n",
    "  \n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f9dac9c",
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_info = [{\n",
    "    'name': 'dataset1',\n",
    "    'folder': 's3_path/data_fol_1',\n",
    "  }, {\n",
    "    'name': 'dataset2',\n",
    "    'folder': 's3_path/data_fol_2',\n",
    "}]\n",
    "\n",
    "# --------\n",
    "\n",
    "## Demo submisison\n",
    "## The path DOES NOT include the bucket path configured on platform\n",
    "batch_info = [{\n",
    "    'name': 'small_cosmx',\n",
    "    'folder': 'demo_data/small_cosmx',\n",
    "  }]\n",
    "\n",
    "connector.submit_study_from_shared_s3_lens_sc(\n",
    "  group_id='6b3cfc27fa694779a1b2a5015e438b94',\n",
    "  batch_info=batch_info,\n",
    "  study_id='COSMX_SMALL_DATASET',\n",
    "  name='This is my first study',\n",
    "  authors=['Huy Nguyen', 'Thao Truong'],\n",
    "  species=Species.HUMAN.value,\n",
    "  study_type=StudyType.COSMX.value,\n",
    "  min_genes=15,\n",
    "  neg_controls_percentage=5,\n",
    "  shared_s3_id='15de18d355b4ce0a1u512a5b45c8e3c'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10a96e4b-e621-422e-8257-9f5533483339",
   "metadata": {},
   "source": [
    "### 3.2. Submit proteomics dataset (CODEX, AKOYA, ...)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b15d51bb-f63b-4223-9b93-9fcd354d42cd",
   "metadata": {},
   "source": [
    "#### 3.2.1. Option 1: Submit study from s3"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b52b8700-a829-4dc0-bfe5-d5f9ef8b2e2e",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "-----\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "s3_id : str\n",
    "    ID of s3 bucket. Default: None\n",
    "    If s3_id is not provided, we will use the first s3 bucket configured on the platform.\n",
    "batch_info: Dict[]\n",
    "    File path and batch name information, the path DOES NOT included the bucket path!\n",
    "    Example:\n",
    "      {\n",
    "        'image': 's3_path/image.ome.tiff'\n",
    "      }\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "name: str\n",
    "    Name of the study.\n",
    "authors: List[str]\n",
    "    Authors of the study.\n",
    "abstract: str\n",
    "    Abstract of the study.\n",
    "species: str\n",
    "    Species of the study.\n",
    "    Support:  Species.HUMAN.value\n",
    "              Species.MOUSE.value\n",
    "              Species.PRIMATE.value\n",
    "              Species.OTHERS.value\n",
    "min_counts: int. Default: 0\n",
    "    Minimum number of counts required for a cell to pass filtering.\n",
    "min_genes: int. Default: 0\n",
    "    Minimum number of genes expressed required for a cell to pass filtering.\n",
    "max_counts: int. Default: inf\n",
    "    Maximum number of counts required for a cell to pass filtering.\n",
    "max_genes: int. Default: inf\n",
    "    Maximum number of genes expressed required for a cell to pass filtering.\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe4ba148-0d75-4da8-a72c-b32c25e1775b",
   "metadata": {},
   "outputs": [],
   "source": [
    "## ONLY accept 1 image per submission\n",
    "## The path DOES NOT include the bucket path configured on platform\n",
    "batch_info = {\n",
    "    'image': 's3_path/image.qptiff',\n",
    "  }\n",
    "batch_info = {\n",
    "    'image': 's3_path/image.ome.tiff',\n",
    "  }\n",
    "\n",
    "connector.submit_study_from_s3_proteomics(\n",
    "  group_id='personal',\n",
    "  batch_info=batch_info,\n",
    "  study_id='PROTEOMICS_BRAIN',\n",
    "  name='This is my first study',\n",
    "  authors=['Huy Nguyen', 'Thao Truong'],\n",
    "  species=Species.HUMAN.value,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fbce147-680e-4a7d-8a96-a738426e13a6",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### 3.1.2. Option 2: Submit study from local machine / server"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca56f429-69c0-4d66-96de-f8191086e8a1",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "------\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "batch_info: List[]\n",
    "    File path and batch name information\n",
    "    Example:\n",
    "      {\n",
    "        'image': 'server_path/image.ome.tiff'\n",
    "      }\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "name: str\n",
    "   Name of the study.\n",
    "authors: List[str]\n",
    "   Authors of the study.\n",
    "abstract: str\n",
    "    Abstract of the study.\n",
    "species: str\n",
    "    Species of the study.\n",
    "    Support:  Species.HUMAN.value\n",
    "              Species.MOUSE.value\n",
    "              Species.PRIMATE.value\n",
    "              Species.OTHERS.value\n",
    "min_counts: int. Default: 0\n",
    "   Minimum number of counts required for a cell to pass filtering.\n",
    "min_genes: int. Default: 0\n",
    "   Minimum number of genes expressed required for a cell to pass filtering.\n",
    "max_counts: int. Default: inf\n",
    "   Maximum number of counts required for a cell to pass filtering.\n",
    "max_genes: int. Default: inf\n",
    "   Maximum number of genes expressed required for a cell to pass filtering.\n",
    "chunk_size: int\n",
    "    size of each separated chunk for uploading. Default: ChunkSize.CHUNK_100_MB.value\\n\n",
    "    Support:\n",
    "          ChunkSize.CHUNK_5_MB.value\n",
    "          ChunkSize.CHUNK_100_MB.value\n",
    "          ChunkSize.CHUNK_500_MB.value\n",
    "          ChunkSize.CHUNK_1_GB.value\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bed329b-70c5-49e6-b435-68507d727ec1",
   "metadata": {},
   "outputs": [],
   "source": [
    "## ONLY accept 1 image per submission\n",
    "batch_info = {\n",
    "    'image': 'local_path/image.qptiff',\n",
    "  }\n",
    "batch_info = {\n",
    "    'image': 'local_path/image.ome.tiff',\n",
    "  }\n",
    "\n",
    "connector.submit_study_from_local_proteomics(\n",
    "  group_id='personal',\n",
    "  batch_info=batch_info,\n",
    "  study_id='PROTEOMICS_BRAIN',\n",
    "  name='This is my first study',\n",
    "  authors=['Huy Nguyen', 'Thao Truong'],\n",
    "  species=Species.HUMAN.value,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2b498957",
   "metadata": {},
   "source": [
    "#### 3.2.3. Option 3: Submit study with shared s3 of a group"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7faa167",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "-----\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "shared_s3_id : str\n",
    "    ID of s3 bucket.\n",
    "batch_info: Dict[]\n",
    "    File path and batch name information, the path DOES NOT included the bucket path!\n",
    "    Example:\n",
    "      {\n",
    "        'image': 's3_path/image.ome.tiff'\n",
    "      }\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "name: str\n",
    "    Name of the study.\n",
    "authors: List[str]\n",
    "    Authors of the study.\n",
    "abstract: str\n",
    "    Abstract of the study.\n",
    "species: str\n",
    "    Species of the study.\n",
    "    Support:  Species.HUMAN.value\n",
    "              Species.MOUSE.value\n",
    "              Species.PRIMATE.value\n",
    "              Species.OTHERS.value\n",
    "min_counts: int. Default: 0\n",
    "    Minimum number of counts required for a cell to pass filtering.\n",
    "min_genes: int. Default: 0\n",
    "    Minimum number of genes expressed required for a cell to pass filtering.\n",
    "max_counts: int. Default: inf\n",
    "    Maximum number of counts required for a cell to pass filtering.\n",
    "max_genes: int. Default: inf\n",
    "    Maximum number of genes expressed required for a cell to pass filtering.\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a3de459",
   "metadata": {},
   "outputs": [],
   "source": [
    "## ONLY accept 1 image per submission\n",
    "## The path DOES NOT include the bucket path configured on platform\n",
    "batch_info = {\n",
    "    'image': 's3_path/image.qptiff',\n",
    "  }\n",
    "batch_info = {\n",
    "    'image': 's3_path/image.ome.tiff',\n",
    "  }\n",
    "\n",
    "connector.submit_study_from_s3_proteomics(\n",
    "  group_id='6b3cfc27fa694779a1b2a5015e438b94',\n",
    "  batch_info=batch_info,\n",
    "  study_id='PROTEOMICS_BRAIN',\n",
    "  name='This is my first study',\n",
    "  authors=['Huy Nguyen', 'Thao Truong'],\n",
    "  species=Species.HUMAN.value,\n",
    "  shared_s3_id='15de18d355b4ce0a1u512a5b45c8e3c'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b53fe51a-7a2b-4a22-ad70-0473fd7f8538",
   "metadata": {},
   "source": [
    "## 4. Submit metadata"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0252c017-9662-4c8b-8d17-19578c2e8de1",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\">NOTE: Get <b>group_id</b> and <b>study_id (uuid)</b> from step <b>\"2. List groups and studies\"</b></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "026597a6-87e0-4926-8d1e-83baa57aed9e",
   "metadata": {},
   "source": [
    "### 4.1. Submit a dataframe directly "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e8ed9de0-e633-4da4-9570-021f38514732",
   "metadata": {},
   "source": [
    "This is an example metadata. Barcodes column must be DataFrame.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "18383247-bb97-4950-bc3d-5c5e06b7a927",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>fov</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Barcodes</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1_1</th>\n",
       "      <td>fov_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_2</th>\n",
       "      <td>fov_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_3</th>\n",
       "      <td>fov_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_4</th>\n",
       "      <td>fov_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_5</th>\n",
       "      <td>fov_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5_3301</th>\n",
       "      <td>fov_5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5_3313</th>\n",
       "      <td>fov_5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5_3314</th>\n",
       "      <td>fov_5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5_3321</th>\n",
       "      <td>fov_5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5_3324</th>\n",
       "      <td>fov_5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>11814 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            fov\n",
       "Barcodes       \n",
       "1_1       fov_1\n",
       "1_2       fov_1\n",
       "1_3       fov_1\n",
       "1_4       fov_1\n",
       "1_5       fov_1\n",
       "...         ...\n",
       "5_3301    fov_5\n",
       "5_3313    fov_5\n",
       "5_3314    fov_5\n",
       "5_3321    fov_5\n",
       "5_3324    fov_5\n",
       "\n",
       "[11814 rows x 1 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta_df = pd.read_csv('SMALL_COSMX_metadata.tsv', sep='\\t', index_col=0)\n",
    "meta_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "df777667-9090-4a8c-a7da-381d9d08a91b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Successful'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "connector.submit_metadata_from_dataframe(\n",
    "    species=Species.HUMAN.value,\n",
    "    study_id=\"5c470f3b799d474e91d0ca65aec3cf56\",\n",
    "    group_id='personal',\n",
    "    df=meta_df\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f28f3b69-5847-4c94-abdc-c6aa12811ed6",
   "metadata": {},
   "source": [
    "### 4.2. Submit file from local / server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "638fa955-7096-44c4-8851-ffe1fe2b1e07",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Successful'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "connector.submit_metadata_from_local(\n",
    "    species=Species.HUMAN.value,\n",
    "    study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "    group_id='personal',\n",
    "    file_path='./SMALL_COSMX_metadata.tsv'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fbafd539-da4d-43b0-b26b-dfe0c672142f",
   "metadata": {},
   "source": [
    "### 4.3. Submit file from s3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "881ea5a8-f696-42a4-b492-a400503e3d48",
   "metadata": {},
   "outputs": [],
   "source": [
    "connector.submit_metadata_from_s3(\n",
    "    species=Species.HUMAN.value,\n",
    "    study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "    group_id='personal',\n",
    "    file_path='demo_data/SMALL_COSMX_metadata.tsv'        #This path DOES NOT include the bucket path configured on platform e.g. s3://bioturing_bucket\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2b70bba5",
   "metadata": {},
   "source": [
    "### 4.4. Submit file from shared s3 of a group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e8216b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "connector.submit_metadata_from_shared_s3(\n",
    "    species=Species.HUMAN.value,\n",
    "    study_id='a1558f8ed6064095be86a091a4118c4a',\n",
    "    group_id='bioturing_public_studies',              #This function DOES NOT applied for group_id='personal'\n",
    "    file_path='test_bucket/GSE128223_meta.tsv',        #This path DOES NOT include the bucket path configured on platform e.g. s3://bioturing_bucket\n",
    "\tshared_s3_id='ce26142487ed4a3697bb8902bf9d9670'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7cab4732-521a-4605-a103-c36363621f46",
   "metadata": {},
   "source": [
    "## 5. Access study data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "118386d7-a5de-4e17-871b-203345912745",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\">NOTE: Get <b>study_id (uuid)</b> from step <b>\"2.2. List all available studies in a group\"</b></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9b81b3d-27f3-4aa4-a181-818e94843fe4",
   "metadata": {},
   "source": [
    "### 5.1. Get barcodes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "34ce2be9-6bd0-4ddf-b7bc-75ee2d307c3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['1_1', '1_2', '1_3', ..., '5_3314', '5_3321', '5_3324'],\n",
       "      dtype='<U6')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "barcodes = np.array(connector.get_barcodes(\n",
    "  study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "  species=Species.HUMAN.value,\n",
    "))\n",
    "barcodes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3785035a-dd98-4fd5-b312-06698108900c",
   "metadata": {},
   "source": [
    "### 5.2. Get features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8a377a4d-adbc-4c3a-bd32-b578217bc4d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['5S_RRNA', '5_8S_RRNA', '7SK', ..., 'NEGPRB18', 'NEGPRB10',\n",
       "       'NEGPRB15'], dtype='<U26')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = np.array(connector.get_features(\n",
    "  study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "  species=Species.HUMAN.value,\n",
    "))\n",
    "features"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4bd5793d-6f41-4cf0-835f-da9b7388c548",
   "metadata": {},
   "source": [
    "### 5.3. Get metadata dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "3225fb73-1ff3-42f2-ae0b-28db50ba3392",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Barcodes</th>\n",
       "      <th>Alexa-488_Histone_Nuclei</th>\n",
       "      <th>Alexa-546_G_None</th>\n",
       "      <th>Alexa-594_rRNA_CD298_B2M_Membrane</th>\n",
       "      <th>Alexa-647_GFAP_Astrocytes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1_1</td>\n",
       "      <td>49</td>\n",
       "      <td>4</td>\n",
       "      <td>19</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1_2</td>\n",
       "      <td>46</td>\n",
       "      <td>4</td>\n",
       "      <td>25</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1_3</td>\n",
       "      <td>77</td>\n",
       "      <td>5</td>\n",
       "      <td>22</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1_4</td>\n",
       "      <td>57</td>\n",
       "      <td>5</td>\n",
       "      <td>27</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1_5</td>\n",
       "      <td>38</td>\n",
       "      <td>5</td>\n",
       "      <td>30</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Barcodes  Alexa-488_Histone_Nuclei  Alexa-546_G_None  \\\n",
       "0      1_1                        49                 4   \n",
       "1      1_2                        46                 4   \n",
       "2      1_3                        77                 5   \n",
       "3      1_4                        57                 5   \n",
       "4      1_5                        38                 5   \n",
       "\n",
       "   Alexa-594_rRNA_CD298_B2M_Membrane  Alexa-647_GFAP_Astrocytes  \n",
       "0                                 19                         68  \n",
       "1                                 25                         96  \n",
       "2                                 22                         49  \n",
       "3                                 27                          8  \n",
       "4                                 30                         17  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata = connector.get_metadata(\n",
    "  study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "  species=Species.HUMAN.value\n",
    ")\n",
    "metadata.iloc[:5, :5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae37260a-47a5-419b-b60b-7945509bb24d",
   "metadata": {},
   "source": [
    "### 5.4. Get embeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a133f3b-1edb-4cc3-8156-5faec669bd42",
   "metadata": {},
   "source": [
    "#### 5.4.1. List all embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "b9d5a84c-b9aa-492d-ab5f-ce452aadb53e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'embedding_id': 'c4529a43ceaf40e98935f857aa1caa5c',\n",
       "  'embedding_name': 'PCA (no batch corrected)'},\n",
       " {'embedding_id': '63270cea38374086ae38c9bc142a1b30',\n",
       "  'embedding_name': 'tSNE (perplexity=30)'}]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "embeddings = connector.list_all_custom_embeddings(\n",
    "  study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "  species=Species.HUMAN.value,\n",
    ")\n",
    "embeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "460e1bd8-3731-4804-abcb-09e80c8fc2b8",
   "metadata": {},
   "source": [
    "#### 5.4.2. Access an embedding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7adbcbc2-8a98-49e2-9b03-aaf63e815d99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-3.2380335e-03, -2.1599566e-03,  8.6972013e-04, ...,\n",
       "        -6.6192023e-04,  2.0368092e-04,  7.6390570e-05],\n",
       "       [-3.0471983e-03, -2.6254782e-03,  1.5224112e-03, ...,\n",
       "         8.3375332e-04,  1.8367210e-03,  9.4208797e-04],\n",
       "       [-3.9695334e-03, -3.1506929e-03,  8.5247034e-04, ...,\n",
       "         8.9647510e-04,  1.2072887e-04, -6.8749214e-04],\n",
       "       ...,\n",
       "       [-8.9350082e-03, -1.0427534e-02,  5.4663382e-03, ...,\n",
       "        -3.6334249e-03, -1.3702468e-03,  1.4806709e-03],\n",
       "       [-7.5779855e-03, -7.6015377e-03, -2.1946256e-04, ...,\n",
       "        -3.1667415e-03, -4.9561551e-03, -3.7561799e-03],\n",
       "       [-3.5920746e-03, -7.0776208e-03, -3.0932256e-06, ...,\n",
       "        -8.8387710e-04,  3.7067404e-03, -2.5200413e-03]], dtype=float32)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chosen_embedding = connector.retrieve_custom_embedding(\n",
    "  study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "  species=Species.HUMAN.value,\n",
    "  embedding_id='c4529a43ceaf40e98935f857aa1caa5c',\n",
    ")\n",
    "chosen_embedding"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f0f891f-cae6-4f43-98fe-ba8a6bedc049",
   "metadata": {},
   "source": [
    "### 5.5. Query genes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ada2bf19-a504-41cc-a1fa-d49a63214b6b",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters:\n",
    "----\n",
    "group_id: str\n",
    "    ID of the group to submit the data to.\n",
    "study_id: str\n",
    "    If no value is provided, default id will be a random uuidv4 string\n",
    "gene_names: List[str], default=[]\n",
    "    If the value array is empty, the return value will be the whole matrix\n",
    "unit: str\n",
    "    Support:\n",
    "          StudyUnit.UNIT_RAW.value\n",
    "          StudyUnit.UNIT_LOGNORM.value\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e9df1399-872a-42b7-b30c-8c37513bbbfe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<11814x2 sparse matrix of type '<class 'numpy.float32'>'\n",
       "\twith 1649 stored elements in Compressed Sparse Column format>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gene_exp = connector.query_genes(\n",
    "  study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "  species=Species.HUMAN.value,\n",
    "  gene_names=['CD3D', 'CD8A'],\n",
    "  unit=StudyUnit.UNIT_RAW.value,\n",
    ")\n",
    "gene_exp"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eff9d6b8-b360-4537-b2e4-261166f70351",
   "metadata": {},
   "source": [
    "## 6. Standardize your metadata"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6b7ca6de-a87b-4041-a54c-7d8cde5a39f7",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\">NOTE: Get <b>group_id</b> and <b>study_id (uuid)</b> from step <b>\"2. List groups and studies\"</b></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7df99cdf-40e7-4c45-beba-1e2dcd93313a",
   "metadata": {},
   "source": [
    "### 6.1. Retrieve ontology tree"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1334ffaf",
   "metadata": {},
   "source": [
    "```\n",
    "Returns\n",
    "----------\n",
    "Ontologies tree : Dict[Dict]\n",
    "  In which:\n",
    "    'name': name of the node, which will be used in further steps\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30e700f3-a1a1-4937-8bf8-4b627a653dd5",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "connector.get_ontologies_tree(\n",
    "    species=Species.HUMAN.value,\n",
    "    group_id='bioturing_public_studies'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0dae50c-09f5-42a8-98d9-9bad8f0d93e2",
   "metadata": {},
   "source": [
    "### 6.2. Assign standardized terms"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "87b44e65-f253-4e48-9656-daf9d8e3de9d",
   "metadata": {},
   "source": [
    "```\n",
    "Parameters\n",
    "-----\n",
    "species: str\n",
    "      Species of the study.\n",
    "      Support:  Species.HUMAN.value\n",
    "                Species.MOUSE.value\n",
    "                Species.PRIMATE.value\n",
    "                Species.OTHERS.value\n",
    "group_id: str\n",
    "      ID of the group to submit the data to.\n",
    "study_id: str\n",
    "      ID of the study (uuid)\n",
    "metadata_field: str\n",
    "      column name of meta dataframe in platform (eg: author's tissue)\n",
    "metadata_value: str\n",
    "      metadata value within the metadata field (eg: normal lung)\n",
    "root_name: str\n",
    "      name of root in btr ontologies tree (eg: tissue)\n",
    "leaf_name: str\n",
    "      name of leaf in btr ontologies tree (eg: lung)\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba50c4b1-db3b-4152-8c1b-efea853128da",
   "metadata": {},
   "outputs": [],
   "source": [
    "# This function is only usable in a group (not 'personal')\n",
    "\n",
    "connector.assign_standardized_meta(\n",
    "    species=Species.HUMAN.value,\n",
    "    group_id='bioturing_public_studies',\n",
    "    study_id='5c470f3b799d474e91d0ca65aec3cf56',\n",
    "    metadata_field='Cell type',\n",
    "    metadata_value='TCRV delta 1 gamma-delta T cell',\n",
    "    root_name='cell type',\n",
    "    leaf_name='gamma-delta T cell',\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}