Skip to content

Commit

Permalink
improve csv params selection
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromedockes committed Jul 17, 2024
1 parent e802dd3 commit 9cb7cd3
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 86 deletions.
101 changes: 57 additions & 44 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
href="https://cdn.jsdelivr.net/npm/[email protected]/build/pure-min.css"
integrity="sha384-X38yfunGUhNzHpBaEBsWLO+A0HDYOQi8ufWDkZ0k9e0eXz/tH3II7uKZ9msv++Ls"
crossorigin="anonymous">
<link rel="stylesheet" href="skrub-online-report.css?version800e">
<link rel="stylesheet" href="skrub-online-report.css?versiona8da">
<link rel="icon" href="skrub.svg">
<!-- <script src="https://cdn.jsdelivr.net/pyodide/v0.26.1/full/pyodide.js"></script> -->
<script src="skrub-online-report.js?version800e" defer></script>
<script src="skrub-online-report.js?versiona8da" defer></script>
</head>

<body>
<main>
<main class="flow">
<h1>Skrub table report generator</h1>
<div class="flex">
<label for="file-input" id="file-input-label" class="button">Choose CSV
Expand All @@ -28,15 +27,19 @@ <h1>Skrub table report generator</h1>
<div class="flex" id="large-file-warning" data-hidden="">
<div class="highlighted-block warning-block">
<div>
The selected file may be too large (<span id="file-size-display"></span>)
The selected file may be too large (<span
id="file-size-display"></span>)
</div>
<div class="text-wrapper">
Python is running in the browser with limited memory. If you experience difficulties this may be due to the relatively large size of the selected file. For large files it is recommended to install Skrub and use it from a Python script.
Python is running in the browser with limited memory. If you
experience difficulties this may be due to the relatively
large size of the selected file. For large files it is
recommended to install Skrub and use it from a Python
script.
</div>
</div>
</div>
<div id="status-container" data-hidden-top-level="">
</div>
<div id="status-container" data-hidden-top-level=""></div>
<div id="csv-dialog">
<details id="csv-dialog-details">
<summary>CSV parsing parameters</summary>
Expand All @@ -45,49 +48,59 @@ <h1>Skrub table report generator</h1>
parse the CSV. When the table preview looks like the
parameters are correct, click "Create report".
</p>
<form id="csv-dialog-form">
<fieldset id="csv-dialog-form-fieldset" class="flex">
<div>
<label for="delimiter-input">Delimiter ("\t" for
tab):</label>
<input type="text" id="delimiter-input" maxlength="2"
value="," class="char-input" />
</div>
<div class="flow">
<form id="csv-dialog-form">
<fieldset id="csv-dialog-form-fieldset" class="flex">
<div>
<label for="delimiter-input">Delimiter:</label>
<select id="delimiter-input" value=",">
<option value="," selected>,</option>
<option value="&#9;">TAB</option>
<option value=";">;</option>
<option value="|">|</option>
</select>
</div>

<div>
<label for="quote-input">Quote:</label>
<input type="text" id="quote-input" maxlength="1" value='"'
class="char-input" />
</div>
<div>
<label for="quote-input">Quote:</label>
<select id="quote-input">
<option value='"' selected>"</option>
<option value="'">'</option>
</select>
</div>

<div>
<label for="escape-input">Escape:</label>
<select id="escape-input">
<option value="none" selected>none</option>
<option value="&#92;">&#92;</option>
</select>
</div>
<div>
<label for="encoding-input">Text encoding:</label>
<select id="encoding-input"></select>
</div>
<button type="button" class="button blue-bg"
id="csv-params-submit">Create report</button>
</fieldset>
</form>

<div>
<label for="escape-input">Escape:</label>
<input type="text" id="escape-input" maxlength="1" value='"'
class="char-input" />
<div id="csv-raw-text-section">
<h2>File contents</h2>
<div class="scroll">
<div id="csv-raw-text"></div>
</div>
<div>
<label for="encoding-input">Text encoding:</label>
<select id="encoding-input"></select>
</div>
<div id="csv-preview-section">
<h2>Table preview</h2>
<div class="scroll">
<div id="csv-preview"></div>
</div>
<button type="button" class="button blue-bg"
id="csv-params-submit">Create report</button>
</fieldset>
</form>

<div id="csv-raw-text-section">
<h2>File contents</h2>
<div class="scroll">
<div id="csv-raw-text"></div>
</div>
</div>
<div id="csv-preview-section">
<h2>Table preview</h2>
<div class="scroll">
<div id="csv-preview"></div>
<div id="csv-error-section"
class="highlighted-block error-block wrapper">
</div>
</div>
<div id="csv-error-section" class="highlighted-block error-block wrapper">
</div>
</details>
</div>
<div id="report">
Expand Down
22 changes: 14 additions & 8 deletions skrub-online-report.css
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ main {
padding: 1.2rem;
}

main > * + * {
margin-bottom: 1rem;
}

h1 {
font-size: 1.5rem;
margin-bottom: 2rem;
Expand All @@ -40,18 +36,27 @@ h2 {
pre {
overflow-x: auto;
overflow-y: auto;
margin: 0;
}

fieldset {
border: none;
}

select {
min-width: 5ch;
}

details > summary {
cursor: pointer;
}

/* Utilities */

.flow > * + * {
margin-bottom: 1rem;
}

.blue-bg {
--bg-color: var(--lightblue);
}
Expand All @@ -66,6 +71,11 @@ details > summary {
max-height: 15rem;
}

.scroll * {
overflow-x: visible;
overflow-y: visible;
}

.flex {
display: flex;
flex-wrap: wrap;
Expand Down Expand Up @@ -219,10 +229,6 @@ label:has(:focus-visible),
text-overflow: ellipsis;
}

.char-input {
max-width: 1.5ch;
}

#report .error-block {
margin-top: 1rem;
}
12 changes: 6 additions & 6 deletions skrub-online-report.js
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,11 @@ function DONE_COMPUTING_CSV_PREVIEW(data) {
disableControls: false
});

if (data.sniffedCsvParams) {
if (data.sniffedCsvParams !== undefined) {
const form = document.getElementById("csv-dialog-form");
for (let param of AllCsvParamNames) {
if (data[param] !== undefined){
form.elements[`${param}-input`].value = data[param];
if (data.sniffedCsvParams[param] !== undefined){
form.elements[`${param}-input`].value = data.sniffedCsvParams[param];
}
}
}
Expand Down Expand Up @@ -249,7 +249,7 @@ function getCsvParams() {
const result = {};
const form = document.getElementById("csv-dialog-form");
for (let param of AllCsvParamNames) {
result[param] = form.elements[`${param}-input`].value.trim();
result[param] = form.elements[`${param}-input`].value;
}
return result;
}
Expand All @@ -269,7 +269,7 @@ function csvParamsChanged() {
const params = getCsvParams();
worker.postMessage({
kind: "CSV_PARAMS",
...params
csvParams: params
});
}

Expand Down Expand Up @@ -322,7 +322,7 @@ setElementStates({
disableControls: false,
hide: ["csv-dialog", "status-container"]
});
const worker = new Worker("skrub-worker.js?version800e");
const worker = new Worker("skrub-worker.js?versiona8da");
worker.onmessage = (e) => {
window[e.data.kind](e.data);
};
65 changes: 37 additions & 28 deletions skrub-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ import clevercsv
import chardet
import skrub
import js
def escape_str_to_py(escape_str):
if escape_str == "none":
return None
return escape_str
def escape_py_to_str(escape_py):
if escape_py is None:
return "none"
return escape_py
`);
self.postMessage({
kind: "DONE_LOADING_PYODIDE"
Expand All @@ -49,10 +59,10 @@ def get_report():
else:
df = pd.read_csv(
data,
encoding=js.encoding,
sep=js.delimiter,
escapechar=js.escape,
quotechar=js.quote,
encoding=js.csvParams.encoding,
sep=js.csvParams.delimiter,
escapechar=escape_str_to_py(js.csvParams.escape),
quotechar=js.csvParams.quote,
encoding_errors="replace",
na_values=["?"],
keep_default_na=True,
Expand Down Expand Up @@ -102,8 +112,9 @@ import clevercsv
from chardet.universaldetector import UniversalDetector
import js
error = error_type = decoded_text = preview = None
csv_params = {}
encoding = delimiter = quote = escape = None
error = error_type = decoded_text = preview = None
def get_preview():
global encoding, delimiter, quote, escape, decoded_text, preview
Expand All @@ -115,18 +126,22 @@ def get_preview():
encoding = detector.result["encoding"]
if encoding in (None, "ascii"):
encoding = "utf-8"
csv_params["encoding"] = encoding
decoder = codecs.getincrementaldecoder(encoding)()
decoded_text = decoder.decode(data_bytes[:1_000_000])
dialect = clevercsv.Sniffer().sniff(decoded_text)
decoded_text = decoded_text[:4000]
delimiter = dialect.delimiter or ","
csv_params["delimiter"] = delimiter
quote = dialect.quotechar or '"'
escape = dialect.escapechar or ("\\u005c" if quote == "'" else quote)
csv_params["quote"] = quote
escape = dialect.escapechar or None
csv_params["escape"] = escape_py_to_str(escape)
else:
encoding = js.encoding
delimiter = js.delimiter
escape = js.escape
quote = js.quote
encoding = js.csvParams.encoding
delimiter = js.csvParams.delimiter
escape = escape_str_to_py(js.csvParams.escape)
quote = js.csvParams.quote
decoder = codecs.getincrementaldecoder(encoding)()
decoded_text = decoder.decode(data_bytes[:4000])
Expand Down Expand Up @@ -157,19 +172,19 @@ except Exception as e:
preview: pyodide.globals.get("preview"),
errorType: pyodide.globals.get("error_type"),
error: pyodide.globals.get("error"),
sniffedCsvParams: false,
fileName: self.fileName
};
if (self.sniff) {
data.sniffedCsvParams = true;
self.encoding = pyodide.globals.get("encoding");
data.encoding = self.encoding;
self.delimiter = pyodide.globals.get("delimiter");
data.delimiter = self.delimiter;
self.quote = pyodide.globals.get("quote");
data.quote = self.quote;
self.escape = pyodide.globals.get("escape");
data.escape = self.escape;
const proxy = pyodide.globals.get("csv_params");
try {
self.csvParams = proxy.toJs({
create_proxies: false,
dict_converter: Object.fromEntries
});
} finally {
proxy.destroy();
}
data.sniffedCsvParams = self.csvParams;
}
self.postMessage(data);
}
Expand All @@ -196,14 +211,9 @@ async function FILE_SELECTED(data) {


function CSV_PARAMS(data) {
if (self.encoding !== data.encoding || self.quote !== data
.quote || self.delimiter !== data.delimiter || self.escape !==
data.escape) {
if (self.csvParams !== data.csvParams) {
self.sniff = false;
self.encoding = data.encoding;
self.quote = data.quote;
self.delimiter = data.delimiter;
self.escape = data.escape;
self.csvParams = data.csvParams;
csvPreview();
}
}
Expand All @@ -213,7 +223,6 @@ async function CSV_COMMIT(data) {
getReport();
}


const pyodideLoaded = startPyodide();

self.onmessage = (e) => {
Expand Down

0 comments on commit 9cb7cd3

Please sign in to comment.