From 75d7334472819d1442d51085b922bd915baddd4b Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Thu, 7 Apr 2022 14:59:33 -0600 Subject: [PATCH 1/5] Fixing the get_pmcode error This commit addresses issue #17 --- dataretrieval/nwis.py | 50 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 3f75f22..6a8d7da 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -12,7 +12,7 @@ import re from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md -from .utils import query +from utils import query WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' @@ -436,9 +436,9 @@ def _iv(**kwargs): return _read_json(response.json()), _set_metadata(response, **kwargs) -def get_pmcodes(parameterCd='All', **kwargs): +def get_pmcodes(parameterCd = None, parameterNm = None): """ - Return a DataFrame containing all NWIS parameter codes. + Return a DataFrame containing NWIS parameter codes. Note: NWIS may return incorrect column names. Rename them with @@ -447,21 +447,43 @@ def get_pmcodes(parameterCd='All', **kwargs): Parameters (Additional parameters, if supplied, will be used as query parameters). ---------- parameterCd: string or listlike + parameterNm: string Returns: DataFrame containing the USGS parameter codes and Metadata as tuple """ - payload = {'radio_pm_search' : 'pm_search', - 'pm_group' : 'All+--+include+all+parameter+groups', - 'pm_search' : parameterCd, - 'casrn_search' : None, - 'srsname_search' : None, - 'show' : ['parameter_group_nm', 'casrn', 'srsname','parameter_units', 'parameter_nm'], - 'format' : 'rdb'} + url = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' - payload.update(kwargs) - url = WATERDATA_URL + 'pmcodes/pmcodes' + if parameterCd is None and parameterNm is None: + raise TypeError('Query must specify a parameter code (parameterCd = ) or name (parameterNm = )') + + if parameterCd is not None and parameterNm is not None: + raise TypeError('Query must specify a parameter name or number, not both)') + + if parameterNm is None and parameterCd is not None: # querying based on a parameter code or list of codes + if isinstance(parameterCd, str): # when a single code is given + if parameterCd.lower() == "all": # if querying ALL a different url is needed + payload = {'fmt':'rdb', 'group_cd':'%'} + url = "https://help.waterdata.usgs.gov/code/parameter_cd_query?" + else: # this is for querying a single parameter + payload = {'parm_nm_cd':parameterCd,'fmt':'rdb'} + if isinstance(parameterCd, list): # Querying with a list of parameters + l = [] + for param in parameterCd: + payload = {'parm_nm_cd':param,'fmt':'rdb'} + response = query(url, payload) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError('One of the parameter codes used is not valid, please try a different value') + l.append(_read_rdb(response.text)) + return pd.concat(l), _set_metadata(response) + + if parameterNm is not None and parameterCd is None: # querying based on a parameter name + parameterNm ='%{0}%'.format(parameterNm) # update to include partial matches + payload = {'parm_nm_cd':parameterNm,'fmt':'rdb'} + response = query(url, payload) - return _read_rdb(response.text), _set_metadata(response, **kwargs) + if len(response.text.splitlines()) < 10: # empty query + return ('The parameter code used is not valid, please try a different value') + return _read_rdb(response.text), _set_metadata(response) def get_water_use(years="ALL", state=None, counties="ALL", categories="ALL"): @@ -713,7 +735,7 @@ def _read_rdb(rdb): break fields = re.split("[,\t]", rdb.splitlines()[count]) - dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float} + dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float, 'parm_cd': str, 'parameter_cd':str} df = pd.read_csv(StringIO(rdb), delimiter='\t', skiprows=count + 2, names=fields, na_values='NaN', dtype=dtypes) From 2472781ed53cf5416ba4bb820a6f418792499155 Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Mon, 11 Apr 2022 16:14:09 -0600 Subject: [PATCH 2/5] Update nwis.py --- dataretrieval/nwis.py | 72 ++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 6a8d7da..29b8d76 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -17,6 +17,8 @@ WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/' +PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' +ALLPARAMCODES_URL ="https://help.waterdata.usgs.gov/code/parameter_cd_query?" WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat', 'gwlevels'] WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings'] @@ -436,53 +438,53 @@ def _iv(**kwargs): return _read_json(response.json()), _set_metadata(response, **kwargs) -def get_pmcodes(parameterCd = None, parameterNm = None): +def get_pmcodes(parameterInfo = 'all', partial = False): """ - Return a DataFrame containing NWIS parameter codes. + Return a DataFrame containing NWIS parameter code information. - Note: NWIS may return incorrect column names. Rename them with - - >>> df.rename(columns={key:value}) - - Parameters (Additional parameters, if supplied, will be used as query parameters). + parameterInfo accepts parameter codes or names. ---------- - parameterCd: string or listlike - parameterNm: string + parameterInfo: string or listlike + partial: default is False, can take any value. If a value is provided the function will query including partial matches + Returns: DataFrame containing the USGS parameter codes and Metadata as tuple """ - url = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' + url = PARAMCODES_URL + payload = {'fmt':'rdb'} - if parameterCd is None and parameterNm is None: - raise TypeError('Query must specify a parameter code (parameterCd = ) or name (parameterNm = )') + if parameterInfo is None: + raise TypeError('Query must include a parameter name or code') - if parameterCd is not None and parameterNm is not None: - raise TypeError('Query must specify a parameter name or number, not both)') - - if parameterNm is None and parameterCd is not None: # querying based on a parameter code or list of codes - if isinstance(parameterCd, str): # when a single code is given - if parameterCd.lower() == "all": # if querying ALL a different url is needed - payload = {'fmt':'rdb', 'group_cd':'%'} - url = "https://help.waterdata.usgs.gov/code/parameter_cd_query?" - else: # this is for querying a single parameter - payload = {'parm_nm_cd':parameterCd,'fmt':'rdb'} - if isinstance(parameterCd, list): # Querying with a list of parameters + else: + if isinstance(parameterInfo, str): # when a single code or name is given + if parameterInfo.lower() == "all": # Querying all parameters (this is also the default) + payload.update({'group_cd':'%'}) + url = ALLPARAMCODES_URL + else: # this is for querying with a single parameter code or name + if partial: + parameterInfo ='%{0}%'.format(parameterInfo) + payload.update({'parm_nm_cd':parameterInfo}) + elif isinstance(parameterInfo, list): # Querying with a list of parameters names, codes, or mixed l = [] - for param in parameterCd: - payload = {'parm_nm_cd':param,'fmt':'rdb'} - response = query(url, payload) - if len(response.text.splitlines()) < 10: # empty query - raise TypeError('One of the parameter codes used is not valid, please try a different value') - l.append(_read_rdb(response.text)) + for param in parameterInfo: + if isinstance(param, str): + if partial: + param ='%{0}%'.format(param) + payload.update({'parm_nm_cd':param}) + response = query(url, payload) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError('One of the parameter codes or names entered does not return any information, please try a different value') + l.append(_read_rdb(response.text)) + else: + raise TypeError('Parameter information (code or name) must be type string or list') return pd.concat(l), _set_metadata(response) - - if parameterNm is not None and parameterCd is None: # querying based on a parameter name - parameterNm ='%{0}%'.format(parameterNm) # update to include partial matches - payload = {'parm_nm_cd':parameterNm,'fmt':'rdb'} - + else: + raise TypeError('Parameter information (code or name) must be type string or list') + response = query(url, payload) if len(response.text.splitlines()) < 10: # empty query - return ('The parameter code used is not valid, please try a different value') + raise TypeError('The parameter information entered does not return any information, please try a different value. Consider using partial = True to enlarge query results') return _read_rdb(response.text), _set_metadata(response) From f78a68a1799e5d4b330142fe0e06810fe19c06c6 Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Mon, 11 Apr 2022 16:20:42 -0600 Subject: [PATCH 3/5] Update nwis.py --- dataretrieval/nwis.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 29b8d76..9dcb7bd 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -18,7 +18,7 @@ WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/' PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' -ALLPARAMCODES_URL ="https://help.waterdata.usgs.gov/code/parameter_cd_query?" +ALLPARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?' WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat', 'gwlevels'] WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings'] @@ -445,7 +445,7 @@ def get_pmcodes(parameterInfo = 'all', partial = False): parameterInfo accepts parameter codes or names. ---------- parameterInfo: string or listlike - partial: default is False, can take any value. If a value is provided the function will query including partial matches + partial: default is False, can take any value. If a value is provided the function will query partial matches Returns: DataFrame containing the USGS parameter codes and Metadata as tuple @@ -454,7 +454,7 @@ def get_pmcodes(parameterInfo = 'all', partial = False): payload = {'fmt':'rdb'} if parameterInfo is None: - raise TypeError('Query must include a parameter name or code') + raise TypeError('The query must include a parameter name or code') else: if isinstance(parameterInfo, str): # when a single code or name is given @@ -474,7 +474,7 @@ def get_pmcodes(parameterInfo = 'all', partial = False): payload.update({'parm_nm_cd':param}) response = query(url, payload) if len(response.text.splitlines()) < 10: # empty query - raise TypeError('One of the parameter codes or names entered does not return any information, please try a different value') + raise TypeError('One of the parameter code or name entered does not return any information, please try a different value') l.append(_read_rdb(response.text)) else: raise TypeError('Parameter information (code or name) must be type string or list') @@ -484,7 +484,7 @@ def get_pmcodes(parameterInfo = 'all', partial = False): response = query(url, payload) if len(response.text.splitlines()) < 10: # empty query - raise TypeError('The parameter information entered does not return any information, please try a different value. Consider using partial = True to enlarge query results') + raise TypeError('The query does not return any information, please try a different code or name. Consider using partial = True to enlarge query results') return _read_rdb(response.text), _set_metadata(response) From 3b29a5f5919da412640a7a0aac754c9f21e0a6ec Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Mon, 2 May 2022 08:14:39 -0600 Subject: [PATCH 4/5] Update waterwatch.py --- dataretrieval/waterwatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataretrieval/waterwatch.py b/dataretrieval/waterwatch.py index a3410b4..d7c9485 100644 --- a/dataretrieval/waterwatch.py +++ b/dataretrieval/waterwatch.py @@ -41,7 +41,7 @@ def get_flood_stage(sites: List[str] = None, fmt: str= "DF") -> Union[pd.DataFra 07144101 None None None None 50057000 16 20 24 30 """ - res = requests.get(url + 'floodstage', params={"format": ResponseFormat}) + res = requests.get(waterwatch_url + 'floodstage', params={"format": ResponseFormat}) if res.ok: json_res = res.json() From 09959bab1805e3cb32968d799baf66596e774cc6 Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Mon, 2 May 2022 09:46:45 -0600 Subject: [PATCH 5/5] Update nwis.py --- dataretrieval/nwis.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 9dcb7bd..266dfda 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -11,8 +11,7 @@ from io import StringIO import re -from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md -from utils import query +from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md, query WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/'