diff --git a/search_single_spectrum/test/test.py b/search_single_spectrum/test/test.py new file mode 100644 index 00000000..9c41d633 --- /dev/null +++ b/search_single_spectrum/test/test.py @@ -0,0 +1,9 @@ +import sys + +sys.path.insert(0, "../tools/search_single_spectrum") + +def test(): + import xic_masst + new_record_df = xic_masst.process_masst_xic("test_data/SEARCH_SINGLE_SPECTRUM-7e961554-view_all_spectra_datasets_matched-main.tsv") + + print(new_record_df) \ No newline at end of file diff --git a/search_single_spectrum/test/test_data/SEARCH_SINGLE_SPECTRUM-7e961554-view_all_spectra_datasets_matched-main.tsv b/search_single_spectrum/test/test_data/SEARCH_SINGLE_SPECTRUM-7e961554-view_all_spectra_datasets_matched-main.tsv new file mode 100644 index 00000000..895c3d15 --- /dev/null +++ b/search_single_spectrum/test/test_data/SEARCH_SINGLE_SPECTRUM-7e961554-view_all_spectra_datasets_matched-main.tsv @@ -0,0 +1,51 @@ +cluster_scan dataset_id filename filescan metadata +295334 MSV000084900 f.MSV000084900/ccms_peak/Global_Foodomics_composite_data_2020/MSV000083010_core/G74022_5x_BG11_01_18395.mzML 574 +295334 MSV000084900 f.MSV000084900/ccms_peak/Global_Foodomics_composite_data_2020/MSV000083010_core/G74022_5x_BG11_01_18395.mzML 570 +300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix3_Fe_conc46_1_long.mzML 840 +300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix2_4.mzML 947 +300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix2_4.mzML 941 +300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix2_4.mzML 938 +131379 MSV000083010 f.MSV000083010/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 574 GNPS - GFOP - Food - Beverage - Data set 1|not applicable|not applicable|reverse phase (C18)|not applicable|United States of America|not applicable|not applicable|not specified|not applicable|not applicable|sulfamethazine;sulfadimethoxine|electrospray ionization (positive)|not specified|not applicable|impact HD|MS:1002667|not specified|not specified|solid material, frozen|ethanol-water (19:1)|food|food_source_animal|G74022|not applicable|not applicable|not applicable|MSV000083010_G74022|2018 +131379 MSV000083010 f.MSV000083010/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 570 GNPS - GFOP - Food - Beverage - Data set 1|not applicable|not applicable|reverse phase (C18)|not applicable|United States of America|not applicable|not applicable|not specified|not applicable|not applicable|sulfamethazine;sulfadimethoxine|electrospray ionization (positive)|not specified|not applicable|impact HD|MS:1002667|not specified|not specified|solid material, frozen|ethanol-water (19:1)|food|food_source_animal|G74022|not applicable|not applicable|not applicable|MSV000083010_G74022|2018 +132950 MSV000082074 f.MSV000082074/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 574 +132950 MSV000082074 f.MSV000082074/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 570 +18313 MSV000081936 f.MSV000081936/ccms_peak/animal/G74022_5x_BG11_01_18395.mzML 574 +18313 MSV000081936 f.MSV000081936/ccms_peak/animal/G74022_5x_BG11_01_18395.mzML 570 +5636 MSV000081657 f.MSV000081657/ccms_peak/data_mzxml/Samples/G74022_5x_BG11_01_18395.mzML 574 +5636 MSV000081657 f.MSV000081657/ccms_peak/data_mzxml/Samples/G74022_5x_BG11_01_18395.mzML 570 +3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000036206_RC2_01_5354.mzML 763 GNPS_AmericanGut3K_dataset|14|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|5.8|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|40.7|-74|Adolescence (8 yrs < x <= 18 yrs)|impact HD|MS:1002667|9606|Homo sapiens|10/10/2015 13:15|swabs, dry|ethanol-water (9:1)|animal|biofluid|36206|not applicable|feces|UBERON:0001988|MSV000080673_36206|2017 +3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000036206_RC2_01_5354.mzML 760 GNPS_AmericanGut3K_dataset|14|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|5.8|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|40.7|-74|Adolescence (8 yrs < x <= 18 yrs)|impact HD|MS:1002667|9606|Homo sapiens|10/10/2015 13:15|swabs, dry|ethanol-water (9:1)|animal|biofluid|36206|not applicable|feces|UBERON:0001988|MSV000080673_36206|2017 +3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000011075_RH2_01_6313.mzML 845 GNPS_AmericanGut3K_dataset|44|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|17.3|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|37|-122|Early Adulthood (18 yrs < x <= 45 yrs)|impact HD|MS:1002667|9606|Homo sapiens|1/4/2016 9:00|swabs, dry|ethanol-water (9:1)|animal|biofluid|11075|not applicable|feces|UBERON:0001988|MSV000080673_11075|2017 +3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000011075_RH2_01_6313.mzML 840 GNPS_AmericanGut3K_dataset|44|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|17.3|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|37|-122|Early Adulthood (18 yrs < x <= 45 yrs)|impact HD|MS:1002667|9606|Homo sapiens|1/4/2016 9:00|swabs, dry|ethanol-water (9:1)|animal|biofluid|11075|not applicable|feces|UBERON:0001988|MSV000080673_11075|2017 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn15_CL21_F11.mzXML 721 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn15_CL21_F07.mzXML 749 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn15_CL21_F07.mzXML 709 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_F03.mzXML 753 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_F03.mzXML 711 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_F03.mzXML 673 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_E11.mzXML 730 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_E11.mzXML 691 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn11_CL21_E07.mzXML 705 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn11_CL21_E03.mzXML 737 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn11_CL21_E03.mzXML 699 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D11.mzXML 753 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D11.mzXML 713 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D11.mzXML 675 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D08.mzXML 726 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D07.mzXML 744 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D07.mzXML 700 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_D03.mzXML 728 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_D03.mzXML 688 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_C11.mzXML 737 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_C11.mzXML 698 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn05_control_B11.mzXML 697 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn05_control_B07.mzXML 728 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn05_control_B07.mzXML 683 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn04_control_B03.mzXML 733 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn04_control_B03.mzXML 693 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn04_control_B02.mzXML 699 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A07.mzXML 706 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A07.mzXML 666 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A03.mzXML 733 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A03.mzXML 692 +22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A03.mzXML 649 diff --git a/search_single_spectrum/tools/search_single_spectrum/xic_masst.py b/search_single_spectrum/tools/search_single_spectrum/xic_masst.py new file mode 100644 index 00000000..89f1b471 --- /dev/null +++ b/search_single_spectrum/tools/search_single_spectrum/xic_masst.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python + +import os +import pymzml +import pandas as pd +import shutil +import urllib.request as request +from contextlib import closing + + + +def process_masst_xic(input_filename): + df = pd.read_csv(input_filename, sep="\t") + + ms2_records = df.to_dict(orient="records") + for record in ms2_records: + # Making data file available + ftp_url = "ftp://massive.ucsd.edu/" + record["filename"][2:] + local_filename = os.path.basename(record["filename"]) + with closing(request.urlopen(ftp_url)) as r: + with open(local_filename, 'wb') as f: + shutil.copyfileobj(r, f) + + # Finding the scan + run = pymzml.run.Reader(local_filename) + + target_scan = record["filescan"] + for spectrum in run: + if str(spectrum.ID) == str(target_scan): + selected_precursors = spectrum.selected_precursors + precursor_dict = selected_precursors[0] + precursor_mz = precursor_dict["mz"] + precursor_i = precursor_dict["i"] + + record["precursor_mz"] = precursor_mz + record["precursor_i"] = precursor_i + record["rt"] = spectrum.scan_time_in_minutes() + + # Perform XIC + target_mz = record["precursor_mz"] + lower_rt = record["rt"] - 0.1 + upper_rt = record["rt"] + 0.1 + run = pymzml.run.Reader(local_filename, MS_precisions={1 : 5e-6, 2 : 20e-6}) + time_dependent_intensities = [] + + for spectrum in run: + spectrum_rt = float(spectrum.scan_time_in_minutes()) + if spectrum_rt < lower_rt or spectrum_rt > upper_rt: + continue + + if spectrum.ms_level == 1: + has_peak_matches = spectrum.has_peak(target_mz) + if has_peak_matches != []: + for mz, I in has_peak_matches: + time_dependent_intensities.append( + [spectrum.scan_time_in_minutes(), I, mz] + ) + + intensity = sum([peak[2] for peak in time_dependent_intensities]) + record["xic_sum"] = (intensity) + + print(record) + + os.remove(local_filename) + + return pd.DataFrame(ms2_records) + + +def main(): + """ + Demonstration of the extraction of a specific ion chromatogram, i.e. XIC or EIC + + All intensities and m/z values for a target m/z are extracted. + + usage: + + ./extract_ion_chromatogram.py + + """ + + example_file = os.path.join( + os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML" + ) + run = pymzml.run.Reader(example_file) + time_dependent_intensities = [] + + MZ_2_FOLLOW = 70.06575775 + + for spectrum in run: + if spectrum.ms_level == 1: + has_peak_matches = spectrum.has_peak(MZ_2_FOLLOW) + if has_peak_matches != []: + for mz, I in has_peak_matches: + time_dependent_intensities.append( + [spectrum.scan_time_in_minutes(), I, mz] + ) + print("RT \ti \tmz") + for rt, i, mz in time_dependent_intensities: + print("{0:5.3f}\t{1:13.4f}\t{2:10}".format(rt, i, mz)) + return + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/workflow-integration-misc-tests/test_tasks.csv b/workflow-integration-misc-tests/test_tasks.csv index 3e456b12..a3b1b355 100644 --- a/workflow-integration-misc-tests/test_tasks.csv +++ b/workflow-integration-misc-tests/test_tasks.csv @@ -2,4 +2,5 @@ task_id,description,regressioncountviews adc3ec9194de43f7b97a61369bf2d7f6,ms2lda, 73da384ea02a4e8ca3edd82649e540c3,msms-chooser, 1df2ea6e406447dfaa7a8b3ea23bf9e1,batchvalidator,new_annotations -2b7b4e189177498f8f95ea217ff351ff,lc_mzmine2 small test, \ No newline at end of file +2b7b4e189177498f8f95ea217ff351ff,lc_mzmine2 small test, +3484239c925b46dda8c1bf5a7e6d592b,sirius,compound_identifications_summary;compound_formula_summary \ No newline at end of file