Skip to content

Commit a826122

Browse files
authored
Merge pull request #129 from jjjermiah/development
feat: adding manipulation of tags
2 parents 5944ed0 + ebbfc85 commit a826122

File tree

5 files changed

+225
-4
lines changed

5 files changed

+225
-4
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
name: coverage-report
4747
path: coverage-report
4848

49-
Codecov:
49+
Code-Coverage:
5050
needs: Unit-Tests
5151
runs-on: ubuntu-latest
5252
steps:
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from .tags import (
2+
convert_element_to_int,
3+
convert_int_to_element,
4+
LOOKUP_TAG,
5+
element_VR_lookup,
6+
getSeriesModality,
7+
)
8+
9+
from .tags import (
10+
subsetSeriesTags,
11+
getReferencedFrameOfReferenceSequence,
12+
getReferencedSeriesUIDS,
13+
)
14+
15+
__all__ = [
16+
"convert_element_to_int",
17+
"convert_int_to_element",
18+
"LOOKUP_TAG",
19+
"element_VR_lookup",
20+
"getSeriesModality",
21+
"subsetSeriesTags",
22+
"getReferencedFrameOfReferenceSequence",
23+
"getReferencedSeriesUIDS",
24+
]

src/nbiatoolkit/dicomtags/tags.py

Lines changed: 141 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
from math import log
12
from pydicom.datadict import dictionary_VR
23
from pydicom.datadict import tag_for_keyword
3-
from pydicom._dicom_dict import DicomDictionary
44
import pandas as pd
5-
from typing import Any, Union, List
5+
from typing import List
66

77

88
def convert_element_to_int(element_str: str) -> int:
@@ -154,6 +154,17 @@ def getSeriesModality(series_tags_df: pd.DataFrame) -> str:
154154
def subsetSeriesTags(series_tags_df: pd.DataFrame, element: str) -> pd.DataFrame:
155155
"""
156156
Subsets a DataFrame containing DICOM series tags based on the start and end elements.
157+
158+
Args:
159+
series_tags_df (pd.DataFrame): A DataFrame containing DICOM series tags.
160+
element (str): The element to subset the DataFrame.
161+
162+
Returns:
163+
pd.DataFrame: A DataFrame containing the subset of the series tags.
164+
165+
Raises:
166+
ValueError: If the element is not found in the series tags.
167+
ValueError: If more than two elements are found in the series tags.
157168
"""
158169

159170
locs: pd.DataFrame
@@ -162,13 +173,31 @@ def subsetSeriesTags(series_tags_df: pd.DataFrame, element: str) -> pd.DataFrame
162173
if len(locs) == 0:
163174
raise ValueError("Element not found in the series tags.")
164175

176+
if len(locs) == 1:
177+
raise ValueError(
178+
"Only one element found in the series tags. Ensure element is a sequence"
179+
)
180+
165181
if len(locs) > 2:
166182
raise ValueError("More than two elements found in the series tags.")
167183

168-
return series_tags_df.iloc[locs.index[0] : locs.index[1]]
184+
return series_tags_df.iloc[locs.index[0] : locs.index[1] + 1]
169185

170186

171187
def getReferencedFrameOfReferenceSequence(series_tags_df: pd.DataFrame) -> pd.DataFrame:
188+
"""
189+
Given a DataFrame containing DICOM series tags, retrieves the ReferencedFrameOfReferenceSequence.
190+
191+
Args:
192+
series_tags_df (pd.DataFrame): A DataFrame containing DICOM series tags.
193+
194+
Returns:
195+
pd.DataFrame: A DataFrame containing the ReferencedFrameOfReferenceSequence.
196+
197+
Raises:
198+
ValueError: If the series is not an RTSTRUCT.
199+
200+
"""
172201
modality = getSeriesModality(series_tags_df=series_tags_df)
173202
if modality != "RTSTRUCT":
174203
raise ValueError("Series is not an RTSTRUCT.")
@@ -220,3 +249,112 @@ def getReferencedSeriesUIDS(series_tags_df: pd.DataFrame) -> List[str]:
220249
UIDS: list[str] = value["data"].to_list()
221250

222251
return UIDS
252+
253+
254+
def getSequenceElement(
255+
sequence_tags_df: pd.DataFrame, element_keyword: str
256+
) -> pd.DataFrame:
257+
"""
258+
Given a DataFrame containing DICOM sequence tags, retrieves the search space
259+
based on the element keyword.
260+
261+
Args:
262+
sequence_tags_df (pd.DataFrame): A DataFrame containing DICOM sequence tags.
263+
element_keyword (str): The keyword of the element to search for.
264+
265+
Returns:
266+
pd.DataFrame: A DataFrame containing the search space based on the element keyword.
267+
268+
Raises:
269+
ValueError: If the element is not found in the sequence tags.
270+
ValueError: If more than two elements are found in the sequence tags.
271+
"""
272+
tag: int = LOOKUP_TAG(keyword=element_keyword)
273+
element: str = convert_int_to_element(combined_int=tag)
274+
275+
df: pd.DataFrame = subsetSeriesTags(
276+
series_tags_df=sequence_tags_df, element=element
277+
)
278+
279+
return df
280+
281+
282+
def camel_case_tag(string: str) -> str:
283+
"""
284+
Convert a string to camel case.
285+
286+
Args:
287+
string (str): The input string to be converted.
288+
289+
Returns:
290+
str: The camel case string.
291+
292+
Example:
293+
>>> camel_case_tag("hello world")
294+
'HelloWorld'
295+
296+
Note:
297+
This function does not actually convert to camel case to not modify
298+
the tags from the DICOM dictionary.
299+
"""
300+
return "".join(word for word in string.split())
301+
302+
303+
def extract_ROI_info(StructureSetROISequence) -> dict[str, dict[str, str]]:
304+
"""
305+
Extracts ROI information from the StructureSetROISequence.
306+
307+
Args:
308+
StructureSetROISequence (pandas.DataFrame): A pandas DataFrame representing the StructureSetROISequence.
309+
310+
Returns:
311+
dict[str, dict[str, str]]: A dictionary containing ROI information, where the key is the ROI number and the value is the ROI information.
312+
313+
Raises:
314+
ValueError: If ROI Number is not found in the StructureSetROISequence.
315+
"""
316+
317+
# Initialize an empty dictionary to store ROI information
318+
ROISet: dict[str, dict[str, str]] = {}
319+
320+
# get the rows where name = " ROI Number"
321+
ROI_indices = StructureSetROISequence[
322+
StructureSetROISequence["name"] == "ROI Number"
323+
].index
324+
325+
if ROI_indices.empty:
326+
raise ValueError("ROI Number not found in the StructureSetROISequence.")
327+
328+
# Iterate between the indices of the ROI numbers, to extract the ROI information
329+
# add to the dictionary where the key is the ROI number and the value is the ROI information
330+
for i in range(len(ROI_indices) - 1):
331+
ROI_number: str = StructureSetROISequence.loc[ROI_indices[i], "data"]
332+
333+
ROI_info: pd.DataFrame = StructureSetROISequence.loc[
334+
ROI_indices[i] + 1 : ROI_indices[i + 1] - 1
335+
]
336+
337+
ROISet[ROI_number] = {
338+
camel_case_tag(string=row["name"]): row["data"]
339+
for _, row in ROI_info.iterrows()
340+
}
341+
342+
return ROISet
343+
344+
345+
# def getRTSTRUCT_ROI_info(seriesUID: str) -> dict[str, dict[str, str]]:
346+
# """
347+
# Given a SeriesInstanceUID of an RTSTRUCT, retrieves the ROI information.
348+
349+
# Args:
350+
# seriesUID (str): The SeriesInstanceUID of the RTSTRUCT.
351+
352+
# Returns:
353+
# dict[str, dict[str, str]]: A dictionary containing the ROI information.
354+
# """
355+
356+
# RTSTRUCT_Tags = client.getDICOMTags(seriesUID)
357+
358+
# StructureSetROISequence = getSequenceElement(sequence_tags_df=RTSTRUCT_Tags, element_keyword="StructureSetROISequence")
359+
360+
# return extract_ROI_info(StructureSetROISequence)

src/nbiatoolkit/nbia.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
ReturnType,
2222
conv_response_list,
2323
)
24+
25+
from .dicomtags.tags import (
26+
getReferencedSeriesUIDS,
27+
extract_ROI_info,
28+
getSequenceElement,
29+
)
30+
2431
import pandas as pd
2532
import requests
2633
from requests.exceptions import JSONDecodeError as JSONDecodeError
@@ -615,6 +622,21 @@ def getDICOMTags(
615622

616623
return conv_response_list(response, returnType)
617624

625+
def getRefSeriesUIDs(
626+
self,
627+
SeriesInstanceUID: str,
628+
) -> List[str]:
629+
630+
tags_df = self.getDICOMTags(
631+
SeriesInstanceUID=SeriesInstanceUID,
632+
return_type=ReturnType.DATAFRAME,
633+
)
634+
635+
if type(tags_df) != pd.DataFrame:
636+
raise ValueError("DICOM Tags not df or not found in the response.")
637+
638+
return getReferencedSeriesUIDS(series_tags_df=tags_df)
639+
618640
def downloadSeries(
619641
self,
620642
SeriesInstanceUID: Union[str, list],

tests/test_tags.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pandas import DataFrame
12
import pytest
23
from src.nbiatoolkit import NBIAClient
34
from src.nbiatoolkit.dicomtags.tags import convert_int_to_element
@@ -154,3 +155,39 @@ def test_getSeriesModality(RTSTRUCT_Tags):
154155
def test_failsubsetSeriesTags(RTSTRUCT_Series):
155156
with pytest.raises(KeyError) as e:
156157
subsetSeriesTags(RTSTRUCT_Series, "(0008,0060)")
158+
159+
160+
def test_extract_ROI_info(RTSTRUCT_Tags):
161+
# tests both getSequenceElement and extract_ROI_info
162+
163+
StructureSetROISequence: DataFrame = getSequenceElement(
164+
sequence_tags_df=RTSTRUCT_Tags, element_keyword="StructureSetROISequence"
165+
)
166+
167+
# make sure that the StructureSetROISequence is not empty
168+
assert (
169+
not StructureSetROISequence.empty
170+
), "Expected StructureSetROISequence to not be empty, but got empty"
171+
172+
ROI_info: dict[str, dict[str, str]] = extract_ROI_info(StructureSetROISequence)
173+
174+
assert ROI_info is not None, "Expected ROI_info to not be None, but got None"
175+
176+
# ROI_info should have atleast 29 keys all of which are strings of ints from 1 to 28
177+
assert len(ROI_info) >= 26, f"Expected atleast 26 keys, but got {len(ROI_info)}"
178+
keys = [int(key) for key in ROI_info.keys()]
179+
180+
# assert all keys are between 1 and 29
181+
assert all(
182+
[1 <= key <= 29 for key in keys]
183+
), "Expected all keys to be between 1 and 28"
184+
185+
print("All test cases passed!")
186+
187+
188+
def test_getReferencedSeriesUIDS(client, RTSTRUCT_Series):
189+
result = client.getRefSeriesUIDs(RTSTRUCT_Series["SeriesInstanceUID"].values[0])
190+
191+
expected = ["1.3.6.1.4.1.14519.5.2.1.133742245714270925254982946723351496764"]
192+
193+
assert result == expected, f"Expected {expected}, but got {result}"

0 commit comments

Comments
 (0)