Skip to content

Commit

Permalink
update from end_token_id to stop_token_ids (#1849)
Browse files Browse the repository at this point in the history
  • Loading branch information
sachinprasadhs authored May 1, 2024
1 parent 66feb8b commit 127e671
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"\n",
"**Author:** [Abheesht Sharma](https://github.com/abheesht17/)<br>\n",
"**Date created:** 2022/05/26<br>\n",
"**Last modified:** 2022/12/21<br>\n",
"**Last modified:** 2024/04/30<br>\n",
"**Description:** Use KerasNLP to train a sequence-to-sequence Transformer model on the machine translation task."
]
},
Expand Down Expand Up @@ -662,7 +662,9 @@
" encoder_input_tokens = ops.convert_to_tensor(eng_tokenizer(input_sentences))\n",
" if len(encoder_input_tokens[0]) < MAX_SEQUENCE_LENGTH:\n",
" pads = ops.full((1, MAX_SEQUENCE_LENGTH - len(encoder_input_tokens[0])), 0)\n",
" encoder_input_tokens = ops.concatenate([encoder_input_tokens, pads], 1)\n",
" encoder_input_tokens = ops.concatenate(\n",
" [encoder_input_tokens.to_tensor(), pads], 1\n",
" )\n",
"\n",
" # Define a function that outputs the next token's probability given the\n",
" # input sequence.\n",
Expand All @@ -681,7 +683,7 @@
" generated_tokens = keras_nlp.samplers.GreedySampler()(\n",
" next,\n",
" prompt,\n",
" end_token_id=spa_tokenizer.token_to_id(\"[END]\"),\n",
" stop_token_ids=[spa_tokenizer.token_to_id(\"[END]\")],\n",
" index=1, # Start sampling after start token.\n",
" )\n",
" generated_sentences = spa_tokenizer.detokenize(generated_tokens)\n",
Expand Down
108 changes: 57 additions & 51 deletions examples/nlp/md/neural_machine_translation_with_keras_nlp.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

**Author:** [Abheesht Sharma](https://github.com/abheesht17/)<br>
**Date created:** 2022/05/26<br>
**Last modified:** 2022/12/21<br>
**Last modified:** 2024/04/30<br>
**Description:** Use KerasNLP to train a sequence-to-sequence Transformer model on the machine translation task.


Expand Down Expand Up @@ -51,9 +51,6 @@ Before we start implementing the pipeline, let's import all the libraries we nee
!pip install -q --upgrade keras # Upgrade to Keras 3.
```




```python
import keras_nlp
import pathlib
Expand All @@ -69,8 +66,9 @@ from tensorflow_text.tools.wordpiece_vocab import (
```
<div class="k-default-codeblock">
```
['\x1b[33mWARNING: There was an error checking the latest version of pip.\x1b[0m\x1b[33m',
'\x1b[0m']
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.15.1 requires keras<2.16,>=2.15.0, but you have keras 3.3.3 which is incompatible.
```
</div>
Expand Down Expand Up @@ -105,6 +103,13 @@ text_file = keras.utils.get_file(
text_file = pathlib.Path(text_file).parent / "spa-eng" / "spa.txt"
```

<div class="k-default-codeblock">
```
Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip
2638744/2638744 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
```
</div>
---
## Parsing the data

Expand Down Expand Up @@ -134,11 +139,11 @@ for _ in range(5):

<div class="k-default-codeblock">
```
('will the coffee stain ruin the carpet?', '¿la mancha de café va a arruinar la alfombra?')
('is it only about money?', '¿sólo se trata de dinero?')
('most students come to school on foot.', 'la mayoría de los estudiantes vienen a la escuela de a pie.')
("tom doesn't want to make mary angry.", 'tom no quiere hacer enojar a mary.')
('i can fly.', 'puedo volar.')
('tom heard that mary had bought a new computer.', 'tom oyó que mary se había comprado un computador nuevo.')
('will you stay at home?', '¿te vas a quedar en casa?')
('where is this train going?', '¿adónde va este tren?')
('tom panicked.', 'tom entró en pánico.')
("we'll help you rescue tom.", 'te ayudaremos a rescatar a tom.')
```
</div>
Expand Down Expand Up @@ -231,8 +236,8 @@ print("Spanish Tokens: ", spa_vocab[100:110])

<div class="k-default-codeblock">
```
English Tokens: ['at', 'know', 'him', 'there', 'they', 'go', 'her', 'has', 'will', 're']
Spanish Tokens: ['qué', 'le', 'ella', 'para', 'te', 'mary', 'las', 'más', 'al', 'yo']
English Tokens: ['at', 'know', 'him', 'there', 'go', 'they', 'her', 'has', 'time', 'will']
Spanish Tokens: ['le', 'para', 'te', 'mary', 'las', 'más', 'al', 'yo', 'tu', 'estoy']
```
</div>
Expand Down Expand Up @@ -278,21 +283,17 @@ print(

<div class="k-default-codeblock">
```
English sentence: tom thinks mary should apologize to john for not doing what she said she'd do.
Tokens: tf.Tensor(
[ 69 640 86 151 1274 67 309 82 97 288 85 84 181 84
8 29 77 11], shape=(18,), dtype=int32)
Recovered text after detokenizing: tf.Tensor(b"tom thinks mary should apologize to john for not doing what she said she ' d do .", shape=(), dtype=string)
English sentence: i am leaving the books here.
Tokens: tf.Tensor([ 35 163 931 66 356 119 12], shape=(7,), dtype=int32)
Recovered text after detokenizing: tf.Tensor(b'i am leaving the books here .', shape=(), dtype=string)
```
</div>

<div class="k-default-codeblock">
```
Spanish sentence: tom piensa que mary debería pedirle perdón a john por no hacer lo que había dicho que haría.
Tokens: tf.Tensor(
[ 82 704 80 105 262 1666 1894 29 314 91 81 125 92 80
179 464 80 915 14], shape=(19,), dtype=int32)
Recovered text after detokenizing: tf.Tensor(b'tom piensa que mary deber\xc3\xada pedirle perd\xc3\xb3n a john por no hacer lo que hab\xc3\xada dicho que har\xc3\xada .', shape=(), dtype=string)
Spanish sentence: dejo los libros aquí.
Tokens: tf.Tensor([2962 93 350 122 14], shape=(5,), dtype=int32)
Recovered text after detokenizing: tf.Tensor(b'dejo los libros aqu\xc3\xad .', shape=(), dtype=string)
```
</div>
Expand Down Expand Up @@ -492,24 +493,24 @@ transformer.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)



<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃<span style="font-weight: bold"> Layer (type) </span>┃<span style="font-weight: bold"> Output Shape </span>┃<span style="font-weight: bold"> Param # </span>┃<span style="font-weight: bold"> Connected to </span>┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ encoder_inputs │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">0</span> │ -
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">InputLayer</span>) │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ token_and_position… │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00af00; text-decoration-color: #00af00">256</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">3,850,</span> │ encoder_inputs[<span style="color: #00af00; text-decoration-color: #00af00">0</span>][<span style="color: #00af00; text-decoration-color: #00af00">0</span>]
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">TokenAndPositionE…</span> │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ decoder_inputs │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">0</span> │ -
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">InputLayer</span>) │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ transformer_encoder │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00af00; text-decoration-color: #00af00">256</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">1,315,</span> │ token_and_position_… │
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">TransformerEncode…</span> │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ functional_3 │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, │ <span style="color: #00af00; text-decoration-color: #00af00">9,283,</span> │ decoder_inputs[<span style="color: #00af00; text-decoration-color: #00af00">0</span>][<span style="color: #00af00; text-decoration-color: #00af00">0</span>… │
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">Functional</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">15000</span>) │ │ transformer_encoder… │
└─────────────────────┴───────────────────┴───────────────────────────────┘
<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃<span style="font-weight: bold"> Layer (type) </span>┃<span style="font-weight: bold"> Output Shape </span>┃<span style="font-weight: bold"> Param # </span>┃<span style="font-weight: bold"> Connected to </span>┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ encoder_inputs │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">0</span> │ - │
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">InputLayer</span>) │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ token_and_position… │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00af00; text-decoration-color: #00af00">256</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">3,850,240</span> │ encoder_inputs[<span style="color: #00af00; text-decoration-color: #00af00">0</span>
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">TokenAndPositionE…</span> │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ decoder_inputs │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">0</span> │ - │
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">InputLayer</span>) │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ transformer_encoder │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00af00; text-decoration-color: #00af00">256</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">1,315,072</span> │ token_and_positi… │
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">TransformerEncode…</span> │ │
├─────────────────────┼───────────────────┼───────────────────────────────┤
│ functional_3 │ (<span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, <span style="color: #00d7ff; text-decoration-color: #00d7ff">None</span>, │ <span style="color: #00af00; text-decoration-color: #00af00">9,283,992</span> │ decoder_inputs[<span style="color: #00af00; text-decoration-color: #00af00">0</span>… │
│ (<span style="color: #0087ff; text-decoration-color: #0087ff">Functional</span>) │ <span style="color: #00af00; text-decoration-color: #00af00">15000</span>) │ │ transformer_enco… │
└─────────────────────┴───────────────────┴───────────────────────────────┘
</pre>


Expand All @@ -534,9 +535,9 @@ transformer.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)

<div class="k-default-codeblock">
```
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 22s 15ms/step - accuracy: 0.8164 - loss: 1.4953 - val_accuracy: 0.8683 - val_loss: 0.7952
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 1701s 1s/step - accuracy: 0.8168 - loss: 1.4819 - val_accuracy: 0.8650 - val_loss: 0.8129
<keras.src.callbacks.history.History at 0x7f6563fd2140>
<keras.src.callbacks.history.History at 0x7efdd7ee6a50>
```
</div>
Expand All @@ -563,7 +564,9 @@ def decode_sequences(input_sentences):
encoder_input_tokens = ops.convert_to_tensor(eng_tokenizer(input_sentences))
if len(encoder_input_tokens[0]) < MAX_SEQUENCE_LENGTH:
pads = ops.full((1, MAX_SEQUENCE_LENGTH - len(encoder_input_tokens[0])), 0)
encoder_input_tokens = ops.concatenate([encoder_input_tokens, pads], 1)
encoder_input_tokens = ops.concatenate(
[encoder_input_tokens.to_tensor(), pads], 1
)

# Define a function that outputs the next token's probability given the
# input sequence.
Expand All @@ -582,7 +585,7 @@ def decode_sequences(input_sentences):
generated_tokens = keras_nlp.samplers.GreedySampler()(
next,
prompt,
end_token_id=spa_tokenizer.token_to_id("[END]"),
stop_token_ids=[spa_tokenizer.token_to_id("[END]")],
index=1, # Start sampling after start token.
)
generated_sentences = spa_tokenizer.detokenize(generated_tokens)
Expand All @@ -608,17 +611,20 @@ for i in range(2):

<div class="k-default-codeblock">
```
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1714519073.816969 34774 device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.
** Example 0 **
he is always complaining.
él siempre está en la escuela .
i got the ticket free of charge.
me pregunto la comprome .
```
</div>

<div class="k-default-codeblock">
```
** Example 1 **
i think you're all wrong.
creo que te representan todos los días .
i think maybe that's all you have to do.
creo que tom le dije que hacer eso .
```
</div>

Expand Down Expand Up @@ -663,8 +669,8 @@ print("ROUGE-2 Score: ", rouge_2.result())

<div class="k-default-codeblock">
```
ROUGE-1 Score: {'precision': Array(0.33075738, dtype=float32), 'recall': Array(0.33867723, dtype=float32), 'f1_score': Array(0.3302676, dtype=float32)}
ROUGE-2 Score: {'precision': Array(0.13534392, dtype=float32), 'recall': Array(0.13344036, dtype=float32), 'f1_score': Array(0.13272808, dtype=float32)}
ROUGE-1 Score: {'precision': <tf.Tensor: shape=(), dtype=float32, numpy=0.30989552>, 'recall': <tf.Tensor: shape=(), dtype=float32, numpy=0.37136248>, 'f1_score': <tf.Tensor: shape=(), dtype=float32, numpy=0.33032653>}
ROUGE-2 Score: {'precision': <tf.Tensor: shape=(), dtype=float32, numpy=0.08999339>, 'recall': <tf.Tensor: shape=(), dtype=float32, numpy=0.09524643>, 'f1_score': <tf.Tensor: shape=(), dtype=float32, numpy=0.08855649>}
```
</div>
Expand Down
8 changes: 5 additions & 3 deletions examples/nlp/neural_machine_translation_with_keras_nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Title: English-to-Spanish translation with KerasNLP
Author: [Abheesht Sharma](https://github.com/abheesht17/)
Date created: 2022/05/26
Last modified: 2022/12/21
Last modified: 2024/04/30
Description: Use KerasNLP to train a sequence-to-sequence Transformer model on the machine translation task.
Accelerator: GPU
"""
Expand Down Expand Up @@ -420,7 +420,9 @@ def decode_sequences(input_sentences):
encoder_input_tokens = ops.convert_to_tensor(eng_tokenizer(input_sentences))
if len(encoder_input_tokens[0]) < MAX_SEQUENCE_LENGTH:
pads = ops.full((1, MAX_SEQUENCE_LENGTH - len(encoder_input_tokens[0])), 0)
encoder_input_tokens = ops.concatenate([encoder_input_tokens, pads], 1)
encoder_input_tokens = ops.concatenate(
[encoder_input_tokens.to_tensor(), pads], 1
)

# Define a function that outputs the next token's probability given the
# input sequence.
Expand All @@ -439,7 +441,7 @@ def next(prompt, cache, index):
generated_tokens = keras_nlp.samplers.GreedySampler()(
next,
prompt,
end_token_id=spa_tokenizer.token_to_id("[END]"),
stop_token_ids=[spa_tokenizer.token_to_id("[END]")],
index=1, # Start sampling after start token.
)
generated_sentences = spa_tokenizer.detokenize(generated_tokens)
Expand Down
2 changes: 1 addition & 1 deletion scripts/autogen.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
PROJECT_URL = {
"keras": f"{KERAS_TEAM_GH}/keras/tree/v3.3.3/",
"keras_tuner": f"{KERAS_TEAM_GH}/keras-tuner/tree/v1.4.7/",
"keras_cv": f"{KERAS_TEAM_GH}/keras-cv/tree/v0.8.2/",
"keras_cv": f"{KERAS_TEAM_GH}/keras-cv/tree/v0.9.0/",
"keras_nlp": f"{KERAS_TEAM_GH}/keras-nlp/tree/v0.10.0/",
"tf_keras": f"{KERAS_TEAM_GH}/tf-keras/tree/v2.16.0/",
}
Expand Down

0 comments on commit 127e671

Please sign in to comment.