Merge pull request #1729 from CartoDB/release/1.2.0

Release/1.2.0
CartoDB · Mar 26, 2021 · bf35d2f · bf35d2f
2 parents 5f49363 + c437d61
commit bf35d2f
Show file tree

Hide file tree

Showing 104 changed files with 6,172 additions and 7,480 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.2.0] - 2021-03-26
+
+### Changed
+- Filter only product entities in subscriptions (#1723, #1728)
+- Dataset describe not in scientific notation (#1722)
+- Review and improve SQLClient utilities (#1725)
+
+### Fixed
+- Fix timestamptz read carto (#1720)
+- Avoid renaming geometry if geometry name is already GEOM_COLUMN_NAME (#1726)
+- Fix user_id in metrics (#1727)
+
+### Removed
+- Remove DataObsClient (#1721)
+
 ## [1.1.1] - 2021-02-12
 
 ### Fixed

diff --git a/README.rst b/README.rst
@@ -4,8 +4,8 @@ CARTOframes
 
 .. image:: https://travis-ci.org/CartoDB/cartoframes.svg?branch=develop
     :target: https://travis-ci.org/CartoDB/cartoframes
-.. image:: https://img.shields.io/badge/pypi-v1.1.1-orange
-    :target: https://pypi.org/project/cartoframes/1.1.1
+.. image:: https://img.shields.io/badge/pypi-v1.2.0-orange
+    :target: https://pypi.org/project/cartoframes/1.2.0
 
 A Python package for integrating `CARTO <https://carto.com/>`__ maps, analysis, and data services into data science workflows.
 
@@ -14,11 +14,11 @@ Python data analysis workflows often rely on the de facto standards `pandas <htt
 Try it Out
 ==========
 
-* Stable (1.1.1): |stable|
+* Stable (1.2.0): |stable|
 * Latest (develop branch): |develop|
 
 .. |stable| image:: https://mybinder.org/badge_logo.svg
-    :target: https://mybinder.org/v2/gh/cartodb/cartoframes/v1.1.1?filepath=examples
+    :target: https://mybinder.org/v2/gh/cartodb/cartoframes/v1.2.0?filepath=examples
 
 .. |develop| image:: https://mybinder.org/badge_logo.svg
     :target: https://mybinder.org/v2/gh/cartodb/cartoframes/develop?filepath=examples

diff --git a/binder/requirements.txt b/binder/requirements.txt
@@ -1,4 +1,4 @@
-cartoframes==1.1.1
+cartoframes==1.2.0
 # Additional dependencies from examples
 matplotlib
 dask

diff --git a/cartoframes/_version.py b/cartoframes/_version.py
@@ -1 +1 @@
-__version__ = '1.1.1'
+__version__ = '1.2.0'
diff --git a/cartoframes/auth/credentials.py b/cartoframes/auth/credentials.py
@@ -128,8 +128,7 @@ def user_id(self):
             log.debug('Getting `user_id` for {}'.format(self._username))
 
             try:
-                user_me = self.me_data()
-                user_data = user_me.get('user_data')
+                user_data = self.me_data.get('user_data')
                 if user_data:
                     self._user_id = user_data.get('id')
 

diff --git a/cartoframes/data/clients/__init__.py b/cartoframes/data/clients/__init__.py
@@ -1,7 +1,5 @@
 from .sql_client import SQLClient
-from .data_obs_client import DataObsClient
 
 __all__ = [
-    'SQLClient',
-    'DataObsClient'
+    'SQLClient'
 ]
diff --git a/cartoframes/data/clients/data_obs_client.py b/cartoframes/data/clients/data_obs_client.py
diff --git a/cartoframes/data/clients/sql_client.py b/cartoframes/data/clients/sql_client.py
@@ -1,8 +1,11 @@
 from ...io.managers.context_manager import ContextManager
 
+COLLISION_STRATEGIES = ['fail', 'replace']
+
 
 class SQLClient:
     """SQLClient class is a client to run SQL queries in a CARTO account.
+    It also provides basic SQL utilities for analyzing and managing tables.
 
     Args:
         credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`):
@@ -11,10 +14,6 @@ class SQLClient:
 
     Example:
         >>> sql = SQLClient(credentials)
-        >>> sql.query('SELECT * FROM table_name')
-        >>> sql.execute('DROP TABLE table_name')
-        >>> sql.distinct('table_name', 'column_name')
-        >>> sql.count('table_name')
 
     """
     def __init__(self, credentials=None):
@@ -31,6 +30,9 @@ def query(self, query, verbose=False):
             query (str): SQL query.
             verbose (bool, optional): flag to return all the response. Default False.
 
+        Example:
+            >>> sql.query('SELECT * FROM table_name')
+
         """
         response = self._context_manager.execute_query(query.strip())
         if not verbose:
@@ -47,6 +49,9 @@ def execute(self, query):
         Args:
             query (str): SQL query.
 
+        Example:
+            >>> sql.execute('DROP TABLE table_name')
+
         """
         return self._context_manager.execute_long_running_query(query.strip())
 
@@ -58,6 +63,10 @@ def distinct(self, table_name, column_name):
             table_name (str): name of the table.
             column_name (str): name of the column.
 
+        Example:
+            >>> sql.distinct('table_name', 'column_name')
+            [('value1', 10), ('value2', 5)]
+
         """
         query = '''
             SELECT {0}, COUNT(*) FROM {1}
@@ -72,16 +81,24 @@ def count(self, table_name):
         Args:
             table_name (str): name of the table.
 
+        Example:
+            >>> sql.count('table_name')
+            15
+
         """
         query = 'SELECT COUNT(*) FROM {};'.format(table_name)
         output = self.query(query)
         return output[0].get('count')
 
-    def bounds(self, query):
+    def bounds(self, table_name):
         """Get the bounds of the geometries in a table.
 
         Args:
-            query (str): SQL query containing a "the_geom" column.
+            table_name (str): name of the table containing a "the_geom" column.
+
+        Example:
+            >>> sql.bounds('table_name')
+            [[-1,-1], [1,1]]
 
         """
         query = '''
@@ -90,9 +107,9 @@ def bounds(self, query):
                 ARRAY[st_xmax(geom_env), st_ymax(geom_env)]
             ] bounds FROM (
                 SELECT ST_Extent(the_geom) geom_env
-                FROM ({}) q
+                FROM (SELECT the_geom FROM {}) q
             ) q;
-        '''.format(query)
+        '''.format(table_name)
         output = self.query(query)
         return output[0].get('bounds')
 
@@ -104,11 +121,21 @@ def schema(self, table_name, raw=False):
             raw (bool, optional): return raw dict data if set to True.
                 Default False.
 
+        Example:
+            >>> sql.schema('table_name')
+            Column name          Column type
+            -------------------------------------
+            cartodb_id           number
+            the_geom             geometry
+            the_geom_webmercator geometry
+            column1              string
+            column2              number
+
         """
         query = 'SELECT * FROM {0} LIMIT 0;'.format(table_name)
         output = self.query(query, verbose=True)
         fields = output.get('fields')
-        if raw:
+        if raw is True:
             return {key: fields[key]['type'] for key in fields}
         else:
             columns = ['Column name', 'Column type']
@@ -125,6 +152,14 @@ def describe(self, table_name, column_name):
             table_name (str): name of the table.
             column_name (str): name of the column.
 
+        Example:
+            >>> sql.describe('table_name', 'column_name')
+            count     1.00e+03
+            avg       2.00e+01
+            min       0.00e+00
+            max       5.00e+01
+            type: number
+
         """
         column_type = self._get_column_type(table_name, column_name)
         stats = ['COUNT(*)']
@@ -138,22 +173,33 @@ def describe(self, table_name, column_name):
         '''.format(','.join(stats), table_name)
         output = self.query(query, verbose=True)
         fields = output.get('rows')[0]
-        rows = [(key, '{:0.2e}'.format(fields[key])) for key in fields]
+        rows = [(key, '{:0.2e}'.format(fields[key])) for key in fields if fields[key] is not None]
         self._print_table(rows, padding=[5, 10])
         print('type: {}'.format(column_type))
 
-    def create_table(self, table_name, columns, cartodbfy=True):
+    def create_table(self, table_name, columns_types, if_exists='fail', cartodbfy=True):
         """Create a table with a specific table name and columns.
 
         Args:
             table_name (str): name of the table.
-            column_name (str): name of the column.
+            column_types (dict): dictionary with the column names and types.
+            if_exists (str, optional): collision strategy if the table already exists in CARTO.
+                Options are 'fail' or 'replace'. Default 'fail'.
             cartodbfy (bool, optional): convert the table to CARTO format.
                 Default True. More info `here
                 <https://carto.com/developers/sql-api/guides/creating-tables/#create-tables>`.
 
+        Example:
+            >>> sql.create_table('table_name', {'column1': 'text', 'column2': 'integer'})
+
         """
-        columns = ','.join(' '.join(x) for x in columns)
+        if not isinstance(columns_types, dict):
+            raise ValueError('The columns_types parameter should be a dictionary of column names and types.')
+
+        if if_exists not in COLLISION_STRATEGIES:
+            raise ValueError('Please provide a valid if_exists value among {}'.format(', '.join(COLLISION_STRATEGIES)))
+
+        columns = ['{0} {1}'.format(cname, ctype) for cname, ctype in columns_types.items()]
         schema = self._context_manager.get_schema()
         query = '''
             BEGIN;
@@ -162,31 +208,29 @@ def create_table(self, table_name, columns, cartodbfy=True):
             {cartodbfy};
             COMMIT;
         '''.format(
-            drop='DROP TABLE IF EXISTS {}'.format(table_name),
-            create='CREATE TABLE {0} ({1})'.format(table_name, columns),
+            drop='DROP TABLE IF EXISTS {}'.format(table_name) if if_exists == 'replace' else '',
+            create='CREATE TABLE {0} ({1})'.format(table_name, ','.join(columns)),
             cartodbfy='SELECT CDB_CartoDBFyTable(\'{0}\', \'{1}\')'.format(
                 schema, table_name) if cartodbfy else ''
         )
-        return self.execute(query)
+        self.execute(query)
 
-    def insert_table(self, table_name, column_names, column_values):
+    def insert_table(self, table_name, columns_values):
         """Insert a row to the table.
 
         Args:
             table_name (str): name of the table.
-            column_names (str, list of str): names of the columns.
-            column_values (str, list of str): values of the columns.
+            columns_values (dict): dictionary with the column names and values.
+        Example:
+            >>> sql.insert_table('table_name', {'column1': ['value1', 'value2'], 'column2': [1, 2]})
 
         """
-        if isinstance(column_names, str):
-            column_names = [column_names]
-        if isinstance(column_values, str):
-            column_names = [column_values]
-        sql_values = [self._sql_format(x) for x in column_values]
+        cnames = columns_values.keys()
+        cvalues = [self._row_values_format(v) for v in zip(*columns_values.values())]
         query = '''
-            INSERT INTO {0} ({1}) VALUES({2});
-        '''.format(table_name, ','.join(column_names), ','.join(sql_values))
-        return self.execute(query)
+            INSERT INTO {0} ({1}) VALUES {2};
+        '''.format(table_name, ','.join(cnames), ','.join(cvalues))
+        self.execute(query)
 
     def update_table(self, table_name, column_name, column_value, condition):
         """Update the column's value for the rows that match the condition.
@@ -197,12 +241,15 @@ def update_table(self, table_name, column_name, column_value, condition):
             column_value (str): value of the column.
             condition (str): "where" condition of the request.
 
+        Example:
+            >>> sql.update_table('table_name', 'column1', 'VALUE1', 'column1=\'value1\'')
+
         """
         value = self._sql_format(column_value)
         query = '''
             UPDATE {0} SET {1}={2} WHERE {3};
         '''.format(table_name, column_name, value, condition)
-        return self.execute(query)
+        self.execute(query)
 
     def rename_table(self, table_name, new_table_name):
         """Rename a table from its table name.
@@ -211,19 +258,25 @@ def rename_table(self, table_name, new_table_name):
             table_name (str): name of the original table.
             new_table_name (str): name of the new table.
 
+        Example:
+            >>> sql.rename_table('table_name', 'table_name2')
+
         """
         query = 'ALTER TABLE {0} RENAME TO {1};'.format(table_name, new_table_name)
-        return self.execute(query)
+        self.execute(query)
 
     def drop_table(self, table_name):
         """Remove a table from its table name.
 
         Args:
             table_name (str): name of the table.
 
+        Example:
+            >>> sql.drop_table('table_name')
+
         """
         query = 'DROP TABLE IF EXISTS {0};'.format(table_name)
-        return self.execute(query)
+        self.execute(query)
 
     def _get_column_type(self, table_name, column_name):
         query = 'SELECT {0} FROM {1} LIMIT 0;'.format(column_name, table_name)
@@ -239,6 +292,9 @@ def _sql_format(self, value):
             return 'TRUE' if value else 'FALSE'
         return str(value)
 
+    def _row_values_format(self, row_values):
+        return '({})'.format(','.join([self._sql_format(value) for value in row_values]))
+
     def _print_table(self, rows, columns=None, padding=None):
         row_format = ''
         index = 0

diff --git a/cartoframes/data/observatory/catalog/catalog.py b/cartoframes/data/observatory/catalog/catalog.py
@@ -286,13 +286,9 @@ def subscriptions(self, credentials=None):
             CatalogError: if there's a problem when connecting to the catalog or no datasets are found.
 
         """
-        _no_filters = {}
         _credentials = get_credentials(credentials)
 
-        return Subscriptions(
-            Dataset.get_all(_no_filters, _credentials),
-            Geography.get_all(_no_filters, _credentials)
-        )
+        return Subscriptions(_credentials)
 
     def datasets_filter(self, filter_dataset):
         """Get all the datasets in the Catalog filtered

diff --git a/cartoframes/data/observatory/catalog/dataset.py b/cartoframes/data/observatory/catalog/dataset.py
@@ -293,11 +293,14 @@ def geom_coverage(self):
         """
         return geom_coverage(self.geography)
 
-    def describe(self):
+    def describe(self, autoformat=True):
         """Shows a summary of the actual stats of the variables (columns) of the dataset.
         Some of the stats provided per variable are: avg, max, min, sum, range,
         stdev, q1, q3, median and interquartile_range
 
+        Args:
+            autoformat (boolean): set automatic format for values. Default is True.
+
         Returns:
             pandas.DataFrame
 
@@ -317,6 +320,11 @@ def describe(self):
                 # interquartile_range
 
         """
+        FLOAT_FORMAT = 'display.float_format'
+
+        if autoformat:
+            pd.set_option(FLOAT_FORMAT, lambda x: '%.3f' % x)
+
         return dataset_describe(self.variables)
 
     @classmethod