Skip to content

Commit

Permalink
More mokapot/percolator output
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Aug 16, 2023
1 parent 2ea5000 commit 1f017b2
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 9 deletions.
9 changes: 8 additions & 1 deletion ms2rescore/package_data/config_default.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@
},
"maxquant": {}
},
"rescoring_engine": { "mokapot": {} },
"rescoring_engine": {
"mokapot": {
"fasta_file": null,
"write_weights": true,
"write_txt": true,
"write_flashlfq": true
}
},
"config_file": null,
"psm_file": null,
"psm_file_type": "infer",
Expand Down
26 changes: 24 additions & 2 deletions ms2rescore/package_data/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"ms2rescore": {
"description": "General MS²Rescore settings.",
"type": "object",
"required": ["psm_file"],
"additionalProperties": false,
"properties": {
"feature_generators": {
Expand Down Expand Up @@ -173,9 +174,30 @@
"$defs": {
"mokapot": {
"$schema": "#/definitions/rescoring_engine",
"description": "Mokapot rescoring engine configuration",
"description": "Mokapot rescoring engine configuration. Additional properties are passed to the Mokapot brew function.",
"type": "object",
"additionalProperties": true
"additionalProperties": true,
"properties": {
"fasta_file": {
"description": "Path to FASTA file with protein sequences to use for protein inference",
"oneOf": [{ "type": "string" }, { "type": "null" }]
},
"write_weights": {
"description": "Write Mokapot weights to a text file",
"type": "boolean",
"default": false
},
"write_txt": {
"description": "Write Mokapot results to a text file",
"type": "boolean",
"default": false
},
"write_flashlfq": {
"description": "Write Mokapot results to a FlashLFQ-compatible file",
"type": "boolean",
"default": false
}
}
},
"percolator": {
"$schema": "#/definitions/rescoring_engine",
Expand Down
65 changes: 59 additions & 6 deletions ms2rescore/rescoring_engines/mokapot.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Mokapot integration for MS²Rescore."""

import logging
from typing import Any, Dict, List, Optional
from typing import Any, List, Optional, Tuple

import mokapot
import numpy as np
import pandas as pd
import psm_utils
Expand All @@ -19,7 +20,12 @@

def rescore(
psm_list: psm_utils.PSMList,
mokapot_kwargs: Optional[Dict[str, Any]] = None,
output_file_root: str = "ms2rescore",
fasta_file: Optional[str] = None,
write_weights: bool = False,
write_txt: bool = False,
write_flashlfq: bool = False,
**kwargs: Any,
):
"""
Rescore PSMs with Mokapot.
Expand All @@ -28,16 +34,33 @@ def rescore(
----------
psm_list
PSMs to be rescored.
mokapot_kwargs
Additional keyword arguments for Mokapot. Defaults to ``None``.
output_file_root
Root of output file names. Defaults to ``"ms2rescore"``.
fasta_file
Path to FASTA file with protein sequences to use for protein inference. Defaults to
``None``.
write_weights
Write model weights to a text file. Defaults to ``False``.
write_txt
Write Mokapot results to a text file. Defaults to ``False``.
write_flashlfq
Write Mokapot results to a FlashLFQ-compatible file. Defaults to ``False``.
**kwargs
Additional keyword arguments are passed to the Mokapot ~:py:function:`mokapot.brew.brew`
function.
"""
# Convert PSMList to Mokapot dataset
feature_names = psm_list[0].rescoring_features.keys()
feature_names = list(psm_list[0].rescoring_features.keys())
lin_psm_data = convert_psm_list(psm_list, feature_names)

# Add proteins
if fasta_file:
proteins = mokapot.read_fasta(fasta_file)
lin_psm_data.add_proteins(proteins)

# Rescore
confidence_results, model = brew(lin_psm_data, **mokapot_kwargs)
confidence_results, models = brew(lin_psm_data, **kwargs)

# Reshape confidence estimates to match PSMList
mokapot_values_targets = (
Expand All @@ -59,6 +82,14 @@ def rescore(
psm_list["qvalue"] = q[:, 1]
psm_list["pep"] = q[:, 2]

# Write results
if write_weights:
save_model_weights(models, feature_names, output_file_root)
if write_txt:
confidence_results.to_txt(file_root=output_file_root, decoys=True)
if write_flashlfq:
confidence_results.to_flashlfq(output_file_root + ".mokapot.flashlfq.txt")


def convert_psm_list(
psm_list: psm_utils.PSMList,
Expand Down Expand Up @@ -129,6 +160,28 @@ def convert_psm_list(
return lin_psm_data


def save_model_weights(
models: Tuple[mokapot.model.Model], feature_names: List[str], output_file_root: str
):
"""
Save model weights to a file.
Parameters
----------
models
Tuple of Mokapot models (one for each fold) to save.
feature_names
List of feature names that were used to train the models.
output_file_root
Root of output file names.
"""
pd.DataFrame(
np.stack([m.estimator.coef_[0] for m in models]),
columns=list(feature_names),
).to_csv(output_file_root + ".mokapot.weights.tsv", sep="\t", index=False)


def _mz_to_mass(mz: float, charge: int) -> float:
"""Convert m/z to mass."""
return mz * charge - charge * nist_mass["H"][1][0]
2 changes: 2 additions & 0 deletions ms2rescore/rescoring_engines/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def rescore(
"decoy-results-psms": output_file_root + "_decoy_psms.pout",
"results-peptides": output_file_root + "_target_peptides.pout",
"decoy-results-peptides": output_file_root + "_decoy_peptides.pout",
"results-proteins": output_file_root + "_target_proteins.pout",
"decoy-results-proteins": output_file_root + "_decoy_proteins.pout",
"weights": output_file_root + ".weights",
"verbose": LOG_LEVEL_MAP[log_level],
"num-threads": processes,
Expand Down

0 comments on commit 1f017b2

Please sign in to comment.