Skip to content

Commit

Permalink
Merge pull request #1729 from CartoDB/release/1.2.0
Browse files Browse the repository at this point in the history
Release/1.2.0
  • Loading branch information
Jesus89 authored Mar 26, 2021
2 parents 5f49363 + c437d61 commit bf35d2f
Show file tree
Hide file tree
Showing 104 changed files with 6,172 additions and 7,480 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.2.0] - 2021-03-26

### Changed
- Filter only product entities in subscriptions (#1723, #1728)
- Dataset describe not in scientific notation (#1722)
- Review and improve SQLClient utilities (#1725)

### Fixed
- Fix timestamptz read carto (#1720)
- Avoid renaming geometry if geometry name is already GEOM_COLUMN_NAME (#1726)
- Fix user_id in metrics (#1727)

### Removed
- Remove DataObsClient (#1721)

## [1.1.1] - 2021-02-12

### Fixed
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ CARTOframes

.. image:: https://travis-ci.org/CartoDB/cartoframes.svg?branch=develop
:target: https://travis-ci.org/CartoDB/cartoframes
.. image:: https://img.shields.io/badge/pypi-v1.1.1-orange
:target: https://pypi.org/project/cartoframes/1.1.1
.. image:: https://img.shields.io/badge/pypi-v1.2.0-orange
:target: https://pypi.org/project/cartoframes/1.2.0

A Python package for integrating `CARTO <https://carto.com/>`__ maps, analysis, and data services into data science workflows.

Expand All @@ -14,11 +14,11 @@ Python data analysis workflows often rely on the de facto standards `pandas <htt
Try it Out
==========

* Stable (1.1.1): |stable|
* Stable (1.2.0): |stable|
* Latest (develop branch): |develop|

.. |stable| image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/v1.1.1?filepath=examples
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/v1.2.0?filepath=examples

.. |develop| image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/develop?filepath=examples
Expand Down
2 changes: 1 addition & 1 deletion binder/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cartoframes==1.1.1
cartoframes==1.2.0
# Additional dependencies from examples
matplotlib
dask
Expand Down
2 changes: 1 addition & 1 deletion cartoframes/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.1'
__version__ = '1.2.0'
3 changes: 1 addition & 2 deletions cartoframes/auth/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,7 @@ def user_id(self):
log.debug('Getting `user_id` for {}'.format(self._username))

try:
user_me = self.me_data()
user_data = user_me.get('user_data')
user_data = self.me_data.get('user_data')
if user_data:
self._user_id = user_data.get('id')

Expand Down
4 changes: 1 addition & 3 deletions cartoframes/data/clients/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from .sql_client import SQLClient
from .data_obs_client import DataObsClient

__all__ = [
'SQLClient',
'DataObsClient'
'SQLClient'
]
716 changes: 0 additions & 716 deletions cartoframes/data/clients/data_obs_client.py

This file was deleted.

116 changes: 86 additions & 30 deletions cartoframes/data/clients/sql_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from ...io.managers.context_manager import ContextManager

COLLISION_STRATEGIES = ['fail', 'replace']


class SQLClient:
"""SQLClient class is a client to run SQL queries in a CARTO account.
It also provides basic SQL utilities for analyzing and managing tables.
Args:
credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`):
Expand All @@ -11,10 +14,6 @@ class SQLClient:
Example:
>>> sql = SQLClient(credentials)
>>> sql.query('SELECT * FROM table_name')
>>> sql.execute('DROP TABLE table_name')
>>> sql.distinct('table_name', 'column_name')
>>> sql.count('table_name')
"""
def __init__(self, credentials=None):
Expand All @@ -31,6 +30,9 @@ def query(self, query, verbose=False):
query (str): SQL query.
verbose (bool, optional): flag to return all the response. Default False.
Example:
>>> sql.query('SELECT * FROM table_name')
"""
response = self._context_manager.execute_query(query.strip())
if not verbose:
Expand All @@ -47,6 +49,9 @@ def execute(self, query):
Args:
query (str): SQL query.
Example:
>>> sql.execute('DROP TABLE table_name')
"""
return self._context_manager.execute_long_running_query(query.strip())

Expand All @@ -58,6 +63,10 @@ def distinct(self, table_name, column_name):
table_name (str): name of the table.
column_name (str): name of the column.
Example:
>>> sql.distinct('table_name', 'column_name')
[('value1', 10), ('value2', 5)]
"""
query = '''
SELECT {0}, COUNT(*) FROM {1}
Expand All @@ -72,16 +81,24 @@ def count(self, table_name):
Args:
table_name (str): name of the table.
Example:
>>> sql.count('table_name')
15
"""
query = 'SELECT COUNT(*) FROM {};'.format(table_name)
output = self.query(query)
return output[0].get('count')

def bounds(self, query):
def bounds(self, table_name):
"""Get the bounds of the geometries in a table.
Args:
query (str): SQL query containing a "the_geom" column.
table_name (str): name of the table containing a "the_geom" column.
Example:
>>> sql.bounds('table_name')
[[-1,-1], [1,1]]
"""
query = '''
Expand All @@ -90,9 +107,9 @@ def bounds(self, query):
ARRAY[st_xmax(geom_env), st_ymax(geom_env)]
] bounds FROM (
SELECT ST_Extent(the_geom) geom_env
FROM ({}) q
FROM (SELECT the_geom FROM {}) q
) q;
'''.format(query)
'''.format(table_name)
output = self.query(query)
return output[0].get('bounds')

Expand All @@ -104,11 +121,21 @@ def schema(self, table_name, raw=False):
raw (bool, optional): return raw dict data if set to True.
Default False.
Example:
>>> sql.schema('table_name')
Column name Column type
-------------------------------------
cartodb_id number
the_geom geometry
the_geom_webmercator geometry
column1 string
column2 number
"""
query = 'SELECT * FROM {0} LIMIT 0;'.format(table_name)
output = self.query(query, verbose=True)
fields = output.get('fields')
if raw:
if raw is True:
return {key: fields[key]['type'] for key in fields}
else:
columns = ['Column name', 'Column type']
Expand All @@ -125,6 +152,14 @@ def describe(self, table_name, column_name):
table_name (str): name of the table.
column_name (str): name of the column.
Example:
>>> sql.describe('table_name', 'column_name')
count 1.00e+03
avg 2.00e+01
min 0.00e+00
max 5.00e+01
type: number
"""
column_type = self._get_column_type(table_name, column_name)
stats = ['COUNT(*)']
Expand All @@ -138,22 +173,33 @@ def describe(self, table_name, column_name):
'''.format(','.join(stats), table_name)
output = self.query(query, verbose=True)
fields = output.get('rows')[0]
rows = [(key, '{:0.2e}'.format(fields[key])) for key in fields]
rows = [(key, '{:0.2e}'.format(fields[key])) for key in fields if fields[key] is not None]
self._print_table(rows, padding=[5, 10])
print('type: {}'.format(column_type))

def create_table(self, table_name, columns, cartodbfy=True):
def create_table(self, table_name, columns_types, if_exists='fail', cartodbfy=True):
"""Create a table with a specific table name and columns.
Args:
table_name (str): name of the table.
column_name (str): name of the column.
column_types (dict): dictionary with the column names and types.
if_exists (str, optional): collision strategy if the table already exists in CARTO.
Options are 'fail' or 'replace'. Default 'fail'.
cartodbfy (bool, optional): convert the table to CARTO format.
Default True. More info `here
<https://carto.com/developers/sql-api/guides/creating-tables/#create-tables>`.
Example:
>>> sql.create_table('table_name', {'column1': 'text', 'column2': 'integer'})
"""
columns = ','.join(' '.join(x) for x in columns)
if not isinstance(columns_types, dict):
raise ValueError('The columns_types parameter should be a dictionary of column names and types.')

if if_exists not in COLLISION_STRATEGIES:
raise ValueError('Please provide a valid if_exists value among {}'.format(', '.join(COLLISION_STRATEGIES)))

columns = ['{0} {1}'.format(cname, ctype) for cname, ctype in columns_types.items()]
schema = self._context_manager.get_schema()
query = '''
BEGIN;
Expand All @@ -162,31 +208,29 @@ def create_table(self, table_name, columns, cartodbfy=True):
{cartodbfy};
COMMIT;
'''.format(
drop='DROP TABLE IF EXISTS {}'.format(table_name),
create='CREATE TABLE {0} ({1})'.format(table_name, columns),
drop='DROP TABLE IF EXISTS {}'.format(table_name) if if_exists == 'replace' else '',
create='CREATE TABLE {0} ({1})'.format(table_name, ','.join(columns)),
cartodbfy='SELECT CDB_CartoDBFyTable(\'{0}\', \'{1}\')'.format(
schema, table_name) if cartodbfy else ''
)
return self.execute(query)
self.execute(query)

def insert_table(self, table_name, column_names, column_values):
def insert_table(self, table_name, columns_values):
"""Insert a row to the table.
Args:
table_name (str): name of the table.
column_names (str, list of str): names of the columns.
column_values (str, list of str): values of the columns.
columns_values (dict): dictionary with the column names and values.
Example:
>>> sql.insert_table('table_name', {'column1': ['value1', 'value2'], 'column2': [1, 2]})
"""
if isinstance(column_names, str):
column_names = [column_names]
if isinstance(column_values, str):
column_names = [column_values]
sql_values = [self._sql_format(x) for x in column_values]
cnames = columns_values.keys()
cvalues = [self._row_values_format(v) for v in zip(*columns_values.values())]
query = '''
INSERT INTO {0} ({1}) VALUES({2});
'''.format(table_name, ','.join(column_names), ','.join(sql_values))
return self.execute(query)
INSERT INTO {0} ({1}) VALUES {2};
'''.format(table_name, ','.join(cnames), ','.join(cvalues))
self.execute(query)

def update_table(self, table_name, column_name, column_value, condition):
"""Update the column's value for the rows that match the condition.
Expand All @@ -197,12 +241,15 @@ def update_table(self, table_name, column_name, column_value, condition):
column_value (str): value of the column.
condition (str): "where" condition of the request.
Example:
>>> sql.update_table('table_name', 'column1', 'VALUE1', 'column1=\'value1\'')
"""
value = self._sql_format(column_value)
query = '''
UPDATE {0} SET {1}={2} WHERE {3};
'''.format(table_name, column_name, value, condition)
return self.execute(query)
self.execute(query)

def rename_table(self, table_name, new_table_name):
"""Rename a table from its table name.
Expand All @@ -211,19 +258,25 @@ def rename_table(self, table_name, new_table_name):
table_name (str): name of the original table.
new_table_name (str): name of the new table.
Example:
>>> sql.rename_table('table_name', 'table_name2')
"""
query = 'ALTER TABLE {0} RENAME TO {1};'.format(table_name, new_table_name)
return self.execute(query)
self.execute(query)

def drop_table(self, table_name):
"""Remove a table from its table name.
Args:
table_name (str): name of the table.
Example:
>>> sql.drop_table('table_name')
"""
query = 'DROP TABLE IF EXISTS {0};'.format(table_name)
return self.execute(query)
self.execute(query)

def _get_column_type(self, table_name, column_name):
query = 'SELECT {0} FROM {1} LIMIT 0;'.format(column_name, table_name)
Expand All @@ -239,6 +292,9 @@ def _sql_format(self, value):
return 'TRUE' if value else 'FALSE'
return str(value)

def _row_values_format(self, row_values):
return '({})'.format(','.join([self._sql_format(value) for value in row_values]))

def _print_table(self, rows, columns=None, padding=None):
row_format = ''
index = 0
Expand Down
6 changes: 1 addition & 5 deletions cartoframes/data/observatory/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,9 @@ def subscriptions(self, credentials=None):
CatalogError: if there's a problem when connecting to the catalog or no datasets are found.
"""
_no_filters = {}
_credentials = get_credentials(credentials)

return Subscriptions(
Dataset.get_all(_no_filters, _credentials),
Geography.get_all(_no_filters, _credentials)
)
return Subscriptions(_credentials)

def datasets_filter(self, filter_dataset):
"""Get all the datasets in the Catalog filtered
Expand Down
10 changes: 9 additions & 1 deletion cartoframes/data/observatory/catalog/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,14 @@ def geom_coverage(self):
"""
return geom_coverage(self.geography)

def describe(self):
def describe(self, autoformat=True):
"""Shows a summary of the actual stats of the variables (columns) of the dataset.
Some of the stats provided per variable are: avg, max, min, sum, range,
stdev, q1, q3, median and interquartile_range
Args:
autoformat (boolean): set automatic format for values. Default is True.
Returns:
pandas.DataFrame
Expand All @@ -317,6 +320,11 @@ def describe(self):
# interquartile_range
"""
FLOAT_FORMAT = 'display.float_format'

if autoformat:
pd.set_option(FLOAT_FORMAT, lambda x: '%.3f' % x)

return dataset_describe(self.variables)

@classmethod
Expand Down
Loading

0 comments on commit bf35d2f

Please sign in to comment.