Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

698: library API for non-path input, accept markdown and dict input #712

Merged
merged 2 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 0 additions & 24 deletions clean_for_build.py

This file was deleted.

1 change: 1 addition & 0 deletions pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
SUBMISSION_URL = "submission_url"
AUTO_SEND = "auto_send"
AUTO_DELETE = "auto_delete"
DEFAULT_FORM_NAME = "data"
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
DEFAULT_LANGUAGE_KEY = "default_language"
DEFAULT_LANGUAGE_VALUE = "default"
LABEL = "label"
Expand Down
4 changes: 2 additions & 2 deletions pyxform/entities/entities_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def get_validated_dataset_name(entity):

if not is_valid_xml_tag(dataset):
if isinstance(dataset, bytes):
dataset = dataset.encode("utf-8")
dataset = dataset.decode("utf-8")

raise PyXFormError(
f"Invalid entity list name: '{dataset}'. Names must begin with a letter, colon, or underscore. Other characters can include numbers or dashes."
Expand Down Expand Up @@ -117,7 +117,7 @@ def validate_entity_saveto(

if not is_valid_xml_tag(save_to):
if isinstance(save_to, bytes):
save_to = save_to.encode("utf-8")
save_to = save_to.decode("utf-8")

raise PyXFormError(
f"{error_start} '{save_to}'. Entity property names {const.XML_IDENTIFIER_ERROR_MESSAGE}"
Expand Down
4 changes: 4 additions & 0 deletions pyxform/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ class PyXFormError(Exception):

class ValidationError(PyXFormError):
"""Common base class for pyxform validation exceptions."""


class PyXFormReadError(PyXFormError):
"""Common base class for pyxform exceptions occuring during reading XLSForm data."""
4 changes: 2 additions & 2 deletions pyxform/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
SurveyInstance class module.
"""

import os.path

from pyxform.errors import PyXFormError
from pyxform.xform_instance_parser import parse_xform_instance

Expand Down Expand Up @@ -76,8 +78,6 @@ def answers(self):
return self._answers

def import_from_xml(self, xml_string_or_filename):
import os.path

if os.path.isfile(xml_string_or_filename):
xml_str = open(xml_string_or_filename, encoding="utf-8").read()
else:
Expand Down
28 changes: 13 additions & 15 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from collections.abc import Generator, Iterator
from datetime import datetime
from functools import lru_cache
from pathlib import Path

from pyxform import aliases, constants
from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS, NSMAP
Expand Down Expand Up @@ -970,10 +971,10 @@ def date_stamp(self):
"""Returns a date string with the format of %Y_%m_%d."""
return self._created.strftime("%Y_%m_%d")

def _to_ugly_xml(self):
def _to_ugly_xml(self) -> str:
return '<?xml version="1.0"?>' + self.xml().toxml()

def _to_pretty_xml(self):
def _to_pretty_xml(self) -> str:
"""Get the XForm with human readable formatting."""
return '<?xml version="1.0"?>\n' + self.xml().toprettyxml(indent=" ")

Expand Down Expand Up @@ -1171,10 +1172,9 @@ def _var_repl_output_function(matchobj):
else:
return text, False

# pylint: disable=too-many-arguments
def print_xform_to_file(
self, path=None, validate=True, pretty_print=True, warnings=None, enketo=False
):
) -> str:
"""
Print the xForm to a file and optionally validate it as well by
throwing exceptions and adding warnings to the warnings array.
Expand All @@ -1183,12 +1183,13 @@ def print_xform_to_file(
warnings = []
if not path:
path = self._print_name + ".xml"
if pretty_print:
xml = self._to_pretty_xml()
else:
xml = self._to_ugly_xml()
try:
with open(path, mode="w", encoding="utf-8") as file_obj:
if pretty_print:
file_obj.write(self._to_pretty_xml())
else:
file_obj.write(self._to_ugly_xml())
file_obj.write(xml)
except Exception:
if os.path.exists(path):
os.unlink(path)
Expand All @@ -1210,6 +1211,7 @@ def print_xform_to_file(
+ ". "
+ "Learn more: http://xlsform.org#multiple-language-support"
)
return xml

def to_xml(self, validate=True, pretty_print=True, warnings=None, enketo=False):
"""
Expand All @@ -1227,20 +1229,16 @@ def to_xml(self, validate=True, pretty_print=True, warnings=None, enketo=False):
tmp.close()
try:
# this will throw an exception if the xml is not valid
self.print_xform_to_file(
xml = self.print_xform_to_file(
path=tmp.name,
validate=validate,
pretty_print=pretty_print,
warnings=warnings,
enketo=enketo,
)
finally:
if os.path.exists(tmp.name):
os.remove(tmp.name)
if pretty_print:
return self._to_pretty_xml()

return self._to_ugly_xml()
Path(tmp.name).unlink(missing_ok=True)
return xml

def instantiate(self):
"""
Expand Down
95 changes: 32 additions & 63 deletions pyxform/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@
import json
import os
import re
from io import StringIO
from json.decoder import JSONDecodeError
from typing import NamedTuple
from typing import Any, NamedTuple
from xml.dom import Node
from xml.dom.minidom import Element, Text, _write_data

import openpyxl
import xlrd
from defusedxml.minidom import parseString

from pyxform import constants as const
from pyxform.errors import PyXFormError
from pyxform.xls2json_backends import is_empty, xls_value_to_unicode, xlsx_value_to_str

SEP = "_"

Expand Down Expand Up @@ -167,66 +166,32 @@ def flatten(li):
yield from subli


def sheet_to_csv(workbook_path, csv_path, sheet_name):
if workbook_path.endswith(".xls"):
return xls_sheet_to_csv(workbook_path, csv_path, sheet_name)
else:
return xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name)

def external_choices_to_csv(
workbook_dict: dict[str, Any], warnings: list | None = None
) -> str | None:
"""
Convert the 'external_choices' sheet data to CSV.

def xls_sheet_to_csv(workbook_path, csv_path, sheet_name):
wb = xlrd.open_workbook(workbook_path)
try:
sheet = wb.sheet_by_name(sheet_name)
except xlrd.biffh.XLRDError:
return False
if not sheet or sheet.nrows < 2:
return False
with open(csv_path, mode="w", encoding="utf-8", newline="") as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
mask = [v and len(v.strip()) > 0 for v in sheet.row_values(0)]
for row_idx in range(sheet.nrows):
csv_data = []
try:
for v, m in zip(sheet.row(row_idx), mask, strict=False):
if m:
value = v.value
value_type = v.ctype
data = xls_value_to_unicode(value, value_type, wb.datemode)
# clean the values of leading and trailing whitespaces
data = data.strip()
csv_data.append(data)
except TypeError:
continue
writer.writerow(csv_data)

return True


def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name):
wb = openpyxl.open(workbook_path, read_only=True, data_only=True)
:param workbook_dict: The result from xls2json.workbook_to_json.
:param warnings: The conversions warnings list.
"""
warnings = coalesce(warnings, [])
if const.EXTERNAL_CHOICES not in workbook_dict:
warnings.append(
f"Could not export itemsets.csv, the '{const.EXTERNAL_CHOICES}' sheet is missing."
)
return None

itemsets = StringIO(newline="")
csv_writer = csv.writer(itemsets, quoting=csv.QUOTE_ALL)
try:
sheet = wb[sheet_name]
except KeyError:
return False

with open(csv_path, mode="w", encoding="utf-8", newline="") as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
mask = [not is_empty(cell.value) for cell in sheet[1]]
for row in sheet.rows:
csv_data = []
try:
for v, m in zip(row, mask, strict=False):
if m:
data = xlsx_value_to_str(v.value)
# clean the values of leading and trailing whitespaces
data = data.strip()
csv_data.append(data)
except TypeError:
continue
writer.writerow(csv_data)
wb.close()
return True
header = workbook_dict["external_choices_header"][0]
except (IndexError, KeyError, TypeError):
header = {k for d in workbook_dict[const.EXTERNAL_CHOICES] for k in d}
csv_writer.writerow(header)
for row in workbook_dict[const.EXTERNAL_CHOICES]:
csv_writer.writerow(row.values())
return itemsets.getvalue()


def has_external_choices(json_struct):
Expand All @@ -235,7 +200,11 @@ def has_external_choices(json_struct):
"""
if isinstance(json_struct, dict):
for k, v in json_struct.items():
if k == "type" and isinstance(v, str) and v.startswith("select one external"):
if (
k == const.TYPE
and isinstance(v, str)
and v.startswith(const.SELECT_ONE_EXTERNAL)
):
return True
elif has_external_choices(v):
return True
Expand Down
19 changes: 10 additions & 9 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from pyxform.errors import PyXFormError
from pyxform.parsing.expression import is_single_token_expression
from pyxform.utils import PYXFORM_REFERENCE_REGEX, default_is_dynamic
from pyxform.utils import PYXFORM_REFERENCE_REGEX, coalesce, default_is_dynamic
from pyxform.validators.pyxform import parameters_generic, select_from_file
from pyxform.validators.pyxform.android_package_name import validate_android_package_name
from pyxform.validators.pyxform.translations_checks import SheetTranslations
Expand Down Expand Up @@ -395,7 +395,7 @@ def workbook_to_json(
workbook_dict,
form_name: str | None = None,
fallback_form_name: str | None = None,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
default_language: str | None = None,
warnings: list[str] | None = None,
) -> dict[str, Any]:
"""
Expand All @@ -416,8 +416,7 @@ def workbook_to_json(
returns a nested dictionary equivalent to the format specified in the
json form spec.
"""
if warnings is None:
warnings = []
warnings = coalesce(warnings, [])
is_valid = False
# Sheet names should be case-insensitive
workbook_dict = {x.lower(): y for x, y in workbook_dict.items()}
Expand All @@ -441,8 +440,8 @@ def workbook_to_json(
)

# Make sure the passed in vars are unicode
form_name = str(form_name)
default_language = str(default_language)
form_name = str(coalesce(form_name, constants.DEFAULT_FORM_NAME))
default_language = str(coalesce(default_language, constants.DEFAULT_LANGUAGE_VALUE))

# We check for double columns to determine whether to use them
# or single colons to delimit grouped headers.
Expand Down Expand Up @@ -500,7 +499,9 @@ def workbook_to_json(
)

# Here we create our json dict root with default settings:
id_string = settings.get(constants.ID_STRING, fallback_form_name)
id_string = settings.get(
constants.ID_STRING, coalesce(fallback_form_name, constants.DEFAULT_FORM_NAME)
)
sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
json_dict = {
constants.TYPE: constants.SURVEY,
Expand Down Expand Up @@ -970,7 +971,7 @@ def workbook_to_json(
question_name = str(row[constants.NAME])
if not is_valid_xml_tag(question_name):
if isinstance(question_name, bytes):
question_name = question_name.encode("utf-8")
question_name = question_name.decode("utf-8")

raise PyXFormError(
f"{ROW_FORMAT_STRING % row_number} Invalid question name '{question_name}'. Names {XML_IDENTIFIER_ERROR_MESSAGE}"
Expand Down Expand Up @@ -1591,7 +1592,7 @@ def get_filename(path):

def parse_file_to_json(
path: str,
default_name: str = "data",
default_name: str = constants.DEFAULT_FORM_NAME,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
warnings: list[str] | None = None,
file_object: IO | None = None,
Expand Down
Loading
Loading