Download benchmarking data from S3 with Neuroglancer

This notebook explains how to:

  1. Read benchmarking data from S3 via Neuroglancer

  2. download raw benchmarking data to your local computer

Quick notes on the benchmarking data:

In octree format, data is labled in folders, labeled test_1 through test_25 and validation_1 through validation_25.

If when downloading, you get a reshape error, try first uploading segments and then re-uploading the volumes.

Known issues with a few of the files:

  • test_9,test_10 - didnt seem to have good swc alignment

  • test_24 - issues with the image

  • validation_11 - seems to be a shift between swcs and the image

[1]:
import napari
from napari.utils import nbscreenshot
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-1-6430b4c693d9> in <module>
----> 1 import napari
      2 from napari.utils import nbscreenshot

ModuleNotFoundError: No module named 'napari'

Define locations

[2]:
from brainlit.utils import session
from brainlit.utils.Neuron_trace import NeuronTrace

# #Can change to test_"1-25", validation_"1-25"
dest = "s3://open-neurodata/brainlit/benchmarking_data/validation_7"
dest_segments = "s3://open-neurodata/brainlit/benchmarking_data/validation_7"
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-2-6d6292ac09c6> in <module>
----> 1 from brainlit.utils import session
      2 from brainlit.utils.Neuron_trace import NeuronTrace
      3
      4 # #Can change to test_"1-25", validation_"1-25"
      5 dest = "s3://open-neurodata/brainlit/benchmarking_data/validation_7"

ModuleNotFoundError: No module named 'brainlit'

Create Neuroglancer session & download benchmarking volume

[3]:
%%capture
sess = session.NeuroglancerSession(url=dest, url_segments=dest_segments, mip=0)  # create session object
img, bounds, vertices = sess.pull_vertex_list(1, [1], 0, expand=True)  # get full benchmarking image
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-d18bbfed2415> in <module>
----> 1 sess = session.NeuroglancerSession(url=dest, url_segments=dest_segments, mip=0)  # create session object
      2 img, bounds, vertices = sess.pull_vertex_list(1, [1], 0, expand=True)  # get full benchmarking image

NameError: name 'session' is not defined

Download a specific .swc

[4]:
seg_id = 1 # Can change

G_paths = sess.get_segments(seg_id, bounds, rounding = False)
G = G_paths[0]
paths = G_paths[1]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-27bd1f88755c> in <module>
      1 seg_id = 1 # Can change
      2
----> 3 G_paths = sess.get_segments(seg_id, bounds, rounding = False)
      4 G = G_paths[0]
      5 paths = G_paths[1]

NameError: name 'sess' is not defined

Visualize with napari

[5]:
#viewer = napari.Viewer(ndisplay=3)
#viewer.add_image(img)
#viewer.add_shapes(data=paths, shape_type='path', edge_width=1.0, edge_color='blue', opacity=0.8)
[6]:
#nbscreenshot(viewer, canvas_only = True)

Download raw benchmarking data

This will download the benchmarking data in .tif and .swc format to a local destination

[7]:
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import os
from pathlib import Path
import numpy as np
from skimage import io
from tqdm import tqdm
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-7-8e97867fee22> in <module>
----> 1 import boto3
      2 from botocore import UNSIGNED
      3 from botocore.client import Config
      4 import os
      5 from pathlib import Path

ModuleNotFoundError: No module named 'boto3'
[8]:
cwd = Path(os.path.abspath(''))
data_dir = os.path.join(cwd, "data")
print(f"Downloading segments to {data_dir}")
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

im_dir = os.path.join(data_dir, "sample-tif-location")
if not os.path.exists(im_dir):
    os.makedirs(im_dir)

swc_dir = os.path.join(data_dir, "sample-swc-location")
if not os.path.exists(swc_dir):
    os.makedirs(swc_dir)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-0f3c6f009956> in <module>
----> 1 cwd = Path(os.path.abspath(''))
      2 data_dir = os.path.join(cwd, "data")
      3 print(f"Downloading segments to {data_dir}")
      4 if not os.path.exists(data_dir):
      5     os.makedirs(data_dir)

NameError: name 'Path' is not defined

On mac/linux, we use os.path.join to construct the s3 path. However on windows you should set prefix to “brainlit/benchmarking_data/tif-files”

[9]:
s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
bucket = s3.Bucket("open-neurodata")
prefix = "brainlit/benchmarking_data/tif-files" #use this for windows
# prefix = os.path.join("brainlit", "benchmarking_data", "tif-files") #use this for mac/linux
im_count = 0
for _ in bucket.objects.filter(Prefix=prefix):
    im_count += 1
for i, im_obj in enumerate(tqdm(bucket.objects.filter(Prefix=prefix))):
    if im_obj.key[-4:] == '.tif':
        im_name = os.path.basename(im_obj.key)
        im_path = os.path.join(im_dir, im_name)
        bucket.download_file(im_obj.key, im_path)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-b7c7aed6f718> in <module>
----> 1 s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
      2 bucket = s3.Bucket("open-neurodata")
      3 prefix = "brainlit/benchmarking_data/tif-files" #use this for windows
      4 # prefix = os.path.join("brainlit", "benchmarking_data", "tif-files") #use this for mac/linux
      5 im_count = 0

NameError: name 'boto3' is not defined

The below code can visualize a specified .tif file.

[10]:
file_name = "test_10-gfp.tif" # Can change to any image (test 1-25, validation 1-25)

im_file = Path(im_dir) / file_name
im = io.imread(im_file, plugin="tifffile")

#viewer = napari.Viewer(ndisplay=3)
#viewer.add_image(im)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-718d33f942be> in <module>
      1 file_name = "test_10-gfp.tif" # Can change to any image (test 1-25, validation 1-25)
      2
----> 3 im_file = Path(im_dir) / file_name
      4 im = io.imread(im_file, plugin="tifffile")
      5

NameError: name 'Path' is not defined
[11]:
#nbscreenshot(viewer, canvas_only = True)

Again, on windows you need to make the variable called prefix a string

[12]:
s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
bucket = s3.Bucket("open-neurodata")
prefix = "brainlit/benchmarking_data/Manual-GT" #use this for windows
# prefix = os.path.join("brainlit", "benchmarking_data", "Manual-GT") #use this for mac/linux
swc_count = 0
for _ in bucket.objects.filter(Prefix=prefix):
    swc_count += 1
for i, swc_obj in enumerate(tqdm(bucket.objects.filter(Prefix=prefix))):
    if swc_obj.key[-4:] == '.swc':
        idx = swc_obj.key.find('Manual-GT')
        swc_name = swc_obj.key[idx:]
        swc_path = os.path.join(swc_dir, swc_name)
        dir = os.path.dirname(swc_path)
        if not os.path.exists(dir):
            os.makedirs(dir)
        bucket.download_file(swc_obj.key, swc_path)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-99d6f55855dc> in <module>
----> 1 s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
      2 bucket = s3.Bucket("open-neurodata")
      3 prefix = "brainlit/benchmarking_data/Manual-GT" #use this for windows
      4 # prefix = os.path.join("brainlit", "benchmarking_data", "Manual-GT") #use this for mac/linux
      5 swc_count = 0

NameError: name 'boto3' is not defined
[13]:
from brainlit.utils.benchmarking_params import brain_offsets, vol_offsets, scales, type_to_date
from brainlit.utils.Neuron_trace import NeuronTrace
from pathlib import Path
import numpy as np
from skimage import io
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-13-92b4a7e75fe3> in <module>
----> 1 from brainlit.utils.benchmarking_params import brain_offsets, vol_offsets, scales, type_to_date
      2 from brainlit.utils.Neuron_trace import NeuronTrace
      3 from pathlib import Path
      4 import numpy as np
      5 from skimage import io

ModuleNotFoundError: No module named 'brainlit'
[14]:
im_dir = Path(im_dir)
swc_base_path = Path(swc_dir) / "Manual-GT"

gfp_files = list(im_dir.glob("**/*-gfp.tif"))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-7848c5508752> in <module>
----> 1 im_dir = Path(im_dir)
      2 swc_base_path = Path(swc_dir) / "Manual-GT"
      3
      4 gfp_files = list(im_dir.glob("**/*-gfp.tif"))

NameError: name 'Path' is not defined
[15]:
for im_num, im_path in enumerate(gfp_files):
    print(f"Image {im_num+1}/{len(gfp_files)}")
    print(im_path)

    f = im_path.parts[-1][:-8].split("_")
    image = f[0]
    date = type_to_date[image]
    num = int(f[1])

    scale = scales[date]
    brain_offset = brain_offsets[date]
    vol_offset = vol_offsets[date][num]
    im_offset = np.add(brain_offset, vol_offset)

    lower = int(np.floor((num - 1) / 5) * 5 + 1)
    upper = int(np.floor((num - 1) / 5) * 5 + 5)
    dir1 = date + "_" + image + "_" + str(lower) + "-" + str(upper)
    dir2 = date + "_" + image + "_" + str(num)
    swc_path = swc_base_path / dir1 / dir2
    swc_files = list(swc_path.glob("**/*.swc"))
    im = io.imread(im_path, plugin="tifffile")
    print(f"Image shape: {im.shape}")

    paths_total = []
    for swc_num, swc in enumerate(swc_files):
        if "0" in swc.parts[-1]:
            # skip the bounding box swc
            continue

        swc_trace = NeuronTrace(path=str(swc))
        paths = swc_trace.get_paths()
        swc_offset, _, _, _ = swc_trace.get_df_arguments()
        offset_diff = np.subtract(swc_offset, im_offset)

        for path_num, p in enumerate(paths):
            pvox = (p + offset_diff) / (scale) * 1000
            paths_total.append(pvox)

    break
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-7b8c9cb0be14> in <module>
----> 1 for im_num, im_path in enumerate(gfp_files):
      2     print(f"Image {im_num+1}/{len(gfp_files)}")
      3     print(im_path)
      4
      5     f = im_path.parts[-1][:-8].split("_")

NameError: name 'gfp_files' is not defined
[16]:
#viewer = napari.Viewer(ndisplay=3)
#viewer.add_image(np.swapaxes(im,0,2))
#viewer.add_shapes(data=paths_total, shape_type='path', edge_width=1.0, edge_color='blue', opacity=0.8)
[17]:
#nbscreenshot(viewer, canvas_only = True)