Skip to content

Commit

Permalink
Demo rfc80 poc na count for generic assay (#11039)
Browse files Browse the repository at this point in the history
* Add NA for generic-assay-bin-counts

* Filtering with NA for generic-assay-data-bin-counts

* Parens precautions

* unskip generic assay api tests

---------

Co-authored-by: alisman <[email protected]>
  • Loading branch information
fuzhaoyuan and alisman authored Oct 2, 2024
1 parent 2d913e9 commit fb8eaac
Show file tree
Hide file tree
Showing 9 changed files with 4,041 additions and 201 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.cbioportal.model.SampleTreatment;
import org.cbioportal.model.StudyViewFilterContext;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;

Expand Down Expand Up @@ -74,7 +75,7 @@ public interface StudyViewRepository {

List<ClinicalDataCount> getGenomicDataBinCounts(StudyViewFilterContext studyViewFilterContext, List<GenomicDataBinFilter> genomicDataBinFilters);

List<ClinicalDataCount> getGenericAssayDataBinCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes);
List<ClinicalDataCount> getGenericAssayDataBinCounts(StudyViewFilterContext studyViewFilterContext, List<GenericAssayDataBinFilter> genericAssayDataBinFilters);

List<MolecularProfile> getGenericAssayProfiles();
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.cbioportal.model.SampleTreatment;
import org.cbioportal.persistence.helper.AlterationFilterHelper;
import org.cbioportal.persistence.helper.StudyViewFilterHelper;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;

Expand Down Expand Up @@ -72,7 +73,7 @@ public interface StudyViewMapper {

List<ClinicalDataCount> getGenomicDataBinCounts(StudyViewFilterHelper studyViewFilterHelper, List<GenomicDataBinFilter> genomicDataBinFilters);

List<ClinicalDataCount> getGenericAssayDataBinCounts(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds);
List<ClinicalDataCount> getGenericAssayDataBinCounts(StudyViewFilterHelper studyViewFilterHelper, List<GenericAssayDataBinFilter> genericAssayDataBinFilters);

List<MolecularProfile> getGenericAssayProfiles();
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.cbioportal.persistence.helper.AlterationFilterHelper;
import org.cbioportal.persistence.helper.StudyViewFilterHelper;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -219,8 +220,8 @@ public List<ClinicalDataCount> getGenomicDataBinCounts(StudyViewFilterContext st
}

@Override
public List<ClinicalDataCount> getGenericAssayDataBinCounts(StudyViewFilterContext studyViewFilterContext, List<String> attributeIds) {
return mapper.getGenericAssayDataBinCounts(createStudyViewFilterHelper(studyViewFilterContext), attributeIds);
public List<ClinicalDataCount> getGenericAssayDataBinCounts(StudyViewFilterContext studyViewFilterContext, List<GenericAssayDataBinFilter> genericAssayDataBinFilters) {
return mapper.getGenericAssayDataBinCounts(createStudyViewFilterHelper(studyViewFilterContext), genericAssayDataBinFilters);
}

private void buildClinicalAttributeNameMap() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.cbioportal.model.SampleTreatmentReport;
import org.cbioportal.service.exception.StudyNotFoundException;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
Expand Down Expand Up @@ -50,7 +51,7 @@ public interface StudyViewColumnarService {

List<ClinicalDataCountItem> getGenomicDataBinCounts(StudyViewFilter studyViewFilter, List<GenomicDataBinFilter> genomicDataBinFilters);

List<ClinicalDataCountItem> getGenericAssayDataBinCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes);
List<ClinicalDataCountItem> getGenericAssayDataBinCounts(StudyViewFilter studyViewFilter, List<GenericAssayDataBinFilter> genericAssayDataBinFilters);

List<GenomicDataCountItem> getMutationTypeCountsByGeneSpecific(StudyViewFilter studyViewFilter, List<GenomicDataFilter> genomicDataFilters);
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.cbioportal.service.treatment.TreatmentCountReportService;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.CustomSampleIdentifier;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
Expand Down Expand Up @@ -90,8 +91,8 @@ public List<ClinicalDataCountItem> getGenomicDataBinCounts(StudyViewFilter study
}

@Override
public List<ClinicalDataCountItem> getGenericAssayDataBinCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes) {
return generateDataCountItemsFromDataCounts(studyViewRepository.getGenericAssayDataBinCounts(createContext(studyViewFilter), filteredAttributes));
public List<ClinicalDataCountItem> getGenericAssayDataBinCounts(StudyViewFilter studyViewFilter, List<GenericAssayDataBinFilter> genericAssayDataBinFilters) {
return generateDataCountItemsFromDataCounts(studyViewRepository.getGenericAssayDataBinCounts(createContext(studyViewFilter), genericAssayDataBinFilters));
}

public List<CopyNumberCountByGene> getCnaGenes(StudyViewFilter studyViewFilter) throws StudyNotFoundException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ public <T extends DataBinCountFilter, S extends DataBinFilter, U extends DataBin
attributeDatatypeMap = Collections.emptyMap();
}
case GenericAssayDataBinCountFilter genericAssayDataBinCountFilter -> {
unfilteredClinicalDataCounts = studyViewColumnarService.getGenericAssayDataBinCounts(partialFilter, uniqueKeys);
filteredClinicalDataCounts = studyViewColumnarService.getGenericAssayDataBinCounts(studyViewFilter, uniqueKeys);
unfilteredClinicalDataCounts = studyViewColumnarService.getGenericAssayDataBinCounts(partialFilter, genericAssayDataBinCountFilter.getGenericAssayDataBinFilters());
filteredClinicalDataCounts = studyViewColumnarService.getGenericAssayDataBinCounts(studyViewFilter, genericAssayDataBinCountFilter.getGenericAssayDataBinFilters());
attributeDatatypeMap = Collections.emptyMap();
}
default -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,12 @@
<!-- Apply Generic Assay Data Filter -->
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters().isEmpty()">
<foreach item="genericAssayDataFilter" collection="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters()" open="INTERSECT" separator="INTERSECT">
<include refid="numericalGenericAssayDataCountFilter">
(
<include refid="numericalGenericAssayDataFilter">
<property name="unique_id" value="sample_unique_id"/>
<property name="table_name" value="generic_assay_data_derived"/>
</include>
)
</foreach>
</if>
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters().isEmpty()">
Expand Down Expand Up @@ -469,59 +471,100 @@
</foreach>
</sql>

<sql id="selectAllNumericalGenericAssays">
SELECT sample_unique_id, value
FROM generic_assay_data_derived
WHERE profile_type = #{genericAssayDataFilter.profileType}
AND entity_stable_id = #{genericAssayDataFilter.stableId}
<!-- It needs to include all numerical data types. Currently it's only LIMIT-VALUE -->
AND datatype = 'LIMIT-VALUE'
</sql>

<!-- TODO: update the database scheme to include the data_type column -->
<sql id="numericalGenericAssayDataCountFilter">
SELECT ${unique_id}
FROM ${table_name}
WHERE entity_stable_id = '${genericAssayDataFilter.stableId}' AND
profile_type='${genericAssayDataFilter.profileType}'
<foreach item="dataFilterValue" collection="genericAssayDataFilter.values" open=" AND ((" separator=") OR (" close="))">
<trim prefix="" prefixOverrides="AND">
<if test="dataFilterValue.value eq 'NA'">
AND
<include refid="isAttributeValueNA">
<property name="attribute_value" value="value"/>
</include>
</if>
<if test="dataFilterValue.start != null and dataFilterValue.end == null">
AND match(value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start == null and dataFilterValue.end != null">
AND match(value, '^&lt;?=?[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start != null and dataFilterValue.end != null">
AND match(value, '^[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start != null or dataFilterValue.end != null">
<choose>
<when test="dataFilterValue.start == dataFilterValue.end">
AND abs(
minus(
<include refid="castStringValueToFloat">
<property name="attribute_value" value="value"/>
</include>,
${dataFilterValue.start}
)
) &lt; exp(-11)
</when>
<otherwise>
<if test="dataFilterValue.start != null">
AND
<include refid="castStringValueToFloat">
<property name="attribute_value" value="value"/>
</include> &gt; ${dataFilterValue.start}
</if>
<if test="dataFilterValue.end != null">
AND
<sql id="numericalGenericAssayDataFilter">
<!-- check if 'NA' is selected -->
<bind name="userSelectsNA" value="false" />
<bind name="userSelectsNumericalValue" value="false" />
<foreach item="dataFilterValue" collection="genericAssayDataFilter.values">
<choose>
<when test="dataFilterValue.value == 'NA'">
<bind name="userSelectsNA" value="true" />
</when>
<otherwise>
<bind name="userSelectsNumericalValue" value="true" />
</otherwise>
</choose>
</foreach>
<!-- if 'NA' is selected, prepare NA samples -->
<if test="userSelectsNA">
SELECT DISTINCT sd.sample_unique_id
FROM sample_derived sd
LEFT JOIN (<include refid="selectAllNumericalGenericAssays"/>) AS generic_numerical_query ON sd.sample_unique_id = generic_numerical_query.sample_unique_id
WHERE value IS null OR
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include> = 'NA'
</if>
<!-- if both 'NA' and non-NA are selected, union them together -->
<if test="userSelectsNA and userSelectsNumericalValue">
UNION ALL
</if>
<!-- if non-NA is selected, prepare non-NA samples -->
<if test="userSelectsNumericalValue">
SELECT DISTINCT sample_unique_id
FROM (<include refid="selectAllNumericalGenericAssays"/>) AS generic_numerical_query
WHERE
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include> != 'NA'
<foreach item="dataFilterValue" collection="genericAssayDataFilter.values" open=" AND ((" separator=") OR (" close="))">
<trim prefix="" prefixOverrides="AND">
<if test="dataFilterValue.value eq 'NA'">
AND
<include refid="isAttributeValueNA">
<property name="attribute_value" value="value"/>
</include>
</if>
<if test="dataFilterValue.start != null and dataFilterValue.end == null">
AND match(value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start == null and dataFilterValue.end != null">
AND match(value, '^&lt;?=?[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start != null and dataFilterValue.end != null">
AND match(value, '^[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start != null or dataFilterValue.end != null">
<choose>
<when test="dataFilterValue.start == dataFilterValue.end">
AND abs(
minus(
<include refid="castStringValueToFloat">
<property name="attribute_value" value="value"/>
</include> &lt;= ${dataFilterValue.end}
</if>
</otherwise>
</choose>
</if>
</trim>
</foreach>
</include>,
${dataFilterValue.start}
)
) &lt; exp(-11)
</when>
<otherwise>
<if test="dataFilterValue.start != null">
AND
<include refid="castStringValueToFloat">
<property name="attribute_value" value="value"/>
</include> &gt; ${dataFilterValue.start}
</if>
<if test="dataFilterValue.end != null">
AND
<include refid="castStringValueToFloat">
<property name="attribute_value" value="value"/>
</include> &lt;= ${dataFilterValue.end}
</if>
</otherwise>
</choose>
</if>
</trim>
</foreach>
</if>
</sql>

<sql id="categoricalGenericAssayDataCountFilter">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -622,24 +622,46 @@
</select>

<select id="getGenericAssayDataBinCounts" resultType="org.cbioportal.model.ClinicalDataCount">
<bind name="profileType" value="genericAssayDataBinFilters[0].profileType" />
<!-- get all non-NA value samples. A caveat here is that if user select only 'NA', this query will return empty (null) thus we need the 2 coalesce() below to handle this case -->
WITH generic_assay_query AS (
SELECT
concat(entity_stable_id, profile_type) AS attributeId,
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include> AS value,
cast(count(value) as INTEGER) AS count
FROM generic_assay_data_derived
<where>
<!-- Need to ensure no NA values -->
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include> != 'NA' AND
profile_type = #{profileType} AND
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
</include>
<foreach item="genericAssayDataBinFilter" collection="genericAssayDataBinFilters" open=" AND (" separator=" OR " close=")">
entity_stable_id = #{genericAssayDataBinFilter.stableId}
</foreach>
</where>
GROUP BY entity_stable_id, profile_type, value
),
generic_assay_sum AS (
SELECT
attributeId,
sum(count) as generic_assay_count
FROM generic_assay_query
GROUP BY attributeId
)
SELECT * FROM generic_assay_query
UNION ALL
<!-- The NA count is specially caculated using total sample count minus non-NA count, therefore
these 2 coalesces are here in case the non-NA subquery returned empty results and we need to provide properties needed to construct the target object -->
SELECT
concat(entity_stable_id, profile_type) AS attributeId,
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include> AS value,
count(value) AS count
FROM generic_assay_data_derived
<where>
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
</include>
AND concat(entity_stable_id, profile_type) IN
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
#{attributeId}
</foreach>
</where>
GROUP BY entity_stable_id, profile_type,
value
coalesce((SELECT attributeId FROM generic_assay_sum LIMIT 1), concat(#{genericAssayDataBinFilters[0].stableId}, #{profileType})) as attributeId,
'NA' as value,
cast(((SELECT * FROM (<include refid="getTotalSampleCount"/>)) - coalesce((SELECT generic_assay_count FROM generic_assay_sum LIMIT 1), 0)) as INTEGER) as count
</select>

<select id="getGenericAssayProfiles" resultType="org.cbioportal.model.MolecularProfile">
Expand Down
Loading

0 comments on commit fb8eaac

Please sign in to comment.