diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css index 66f69c111c..d7d40d6773 100644 --- a/docs/_static/css/custom.css +++ b/docs/_static/css/custom.css @@ -209,3 +209,35 @@ html[data-theme="dark"] .button-primary:visited:hover { padding-inline: 50px; padding-bottom: 24px; } + +/* Inline inputs for editable commands */ +.inline-input { + background-color: rgba(255, 255, 255, 0.1); + color: inherit; + border: 1px solid #555; + border-radius: 3px; + padding: 2px 6px; + font-family: inherit; + font-size: inherit; + display: inline-block; + vertical-align: middle; + margin: 0 2px; + box-sizing: content-box; +} + +.inline-input:focus { + outline: none; + border-color: #1A73E8; + background-color: rgba(255, 255, 255, 0.2); +} + +html[data-theme="light"] .inline-input { + background-color: rgba(0, 0, 0, 0.05); + border-color: #ccc; + color: #333; +} + +html[data-theme="light"] .inline-input:focus { + background-color: rgba(0, 0, 0, 0.1); + border-color: #1A73E8; +} \ No newline at end of file diff --git a/docs/_static/js/editable_commands.js b/docs/_static/js/editable_commands.js new file mode 100644 index 0000000000..a5301c3d1a --- /dev/null +++ b/docs/_static/js/editable_commands.js @@ -0,0 +1,130 @@ +/** + * Handles inline editable commands in documentation. + * Replaces placeholders in code blocks with inline input fields. + */ +document.addEventListener('DOMContentLoaded', () => { + const codeBlocks = document.querySelectorAll('div.highlight-sh pre, div.highlight-bash pre, div.highlight-default pre'); + + codeBlocks.forEach(block => { + + const originalHTML = block.innerHTML; + + const placeholders = [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ]; + + let newHTML = originalHTML; + + placeholders.forEach(placeholder => { + // 1. create robust regex for this placeholder + // escape chars + const escapeRegex = (string) => string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + + const htmlEscapedKey = placeholder + .replace(/&/g, '&') + .replace(//g, '>'); + + let pattern = ''; + for (let i = 0; i < htmlEscapedKey.length; i++) { + const char = htmlEscapedKey[i]; + pattern += escapeRegex(char) + '(?:<[^>]+>)*'; + } + + const regex = new RegExp(pattern, 'g'); + + // Replace with an input element + // We use the original placeholder text as placeholder for the input + const inputHTML = ``; + + newHTML = newHTML.replace(regex, inputHTML); + }); + + if (newHTML !== originalHTML) { + block.innerHTML = newHTML; + } + }); + + // Add event listeners to newly created inputs to auto-resize + document.querySelectorAll('.inline-input').forEach(input => { + input.addEventListener('input', function () { + this.style.width = Math.max(this.value.length, this.placeholder.length) + 2 + 'ch'; + }); + }); + + /** + * Intercept copy button clicks to include user input values. + * Runs in capture phase to precede sphinx-copybutton's listener. + */ + document.addEventListener('click', (event) => { + // Check if the clicked element is a copy button or inside one + const button = event.target.closest('.copybtn'); + if (!button) return; + + // Find the associated code block + // Sphinx-copybutton places the button inside .highlight usually + const highlightDiv = button.closest('.highlight'); + if (!highlightDiv) return; + + const inputs = highlightDiv.querySelectorAll('input.inline-input'); + if (inputs.length === 0) return; + + const swaps = []; + inputs.forEach(input => { + // Create a temporary span with the input's current value + const span = document.createElement('span'); + // If value is empty, fallback to placeholder to match original text behavior + const val = input.value; + span.textContent = val ? val : input.placeholder; + + // Mimic input appearance slightly if needed, but plain text is what we want copied + span.style.color = val ? 'inherit' : 'gray'; + + input.replaceWith(span); + swaps.push({ input, span }); + }); + + // Revert immediately after the current event loop + setTimeout(() => { + swaps.forEach(({ input, span }) => { + span.replaceWith(input); + }); + }, 0); + }, true); +}); diff --git a/docs/conf.py b/docs/conf.py index 646b60f86e..cee80f9866 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,6 +48,7 @@ html_theme = "sphinx_book_theme" html_static_path = ["_static"] html_css_files = ["css/custom.css"] +html_js_files = ["js/editable_commands.js"] html_logo = "_static/maxtext.png" # -- Options for myst ---------------------------------------------- diff --git a/docs/tutorials/posttraining/rl.md b/docs/tutorials/posttraining/rl.md index d28a308cf8..166bbb7954 100644 --- a/docs/tutorials/posttraining/rl.md +++ b/docs/tutorials/posttraining/rl.md @@ -69,9 +69,9 @@ Setup following environment variables before running GRPO/GSPO: ```bash # -- Model configuration -- -export HF_MODEL='llama3.1-8b-Instruct' -export MODEL='llama3.1-8b' -export TOKENIZER='meta-llama/Llama-3.1-8B-Instruct' +export HF_MODEL= # e.g. 'llama3.1-8b-Instruct' +export MODEL= # e.g. 'llama3.1-8b' +export TOKENIZER= # e.g. 'meta-llama/Llama-3.1-8B-Instruct' export HF_TOKEN= # -- MaxText configuration -- diff --git a/docs/tutorials/posttraining/rl_on_multi_host.md b/docs/tutorials/posttraining/rl_on_multi_host.md index eaeef3d5e2..5262745a15 100644 --- a/docs/tutorials/posttraining/rl_on_multi_host.md +++ b/docs/tutorials/posttraining/rl_on_multi_host.md @@ -39,19 +39,19 @@ Setup following environment variables: ```bash # -- Model configuration -- -export HF_MODEL='llama3.1-70b-Instruct' -export MODEL='llama3.1-70b' -export TOKENIZER='meta-llama/Llama-3.1-70B-Instruct' +export HF_MODEL= # e.g. 'llama3.1-70b-Instruct' +export MODEL= # e.g. 'llama3.1-70b' +export TOKENIZER= # e.g. 'meta-llama/Llama-3.1-70B-Instruct' export HF_TOKEN= # -- MaxText configuration -- export BASE_OUTPUT_DIRECTORY= # e.g., gs://my-bucket/my-output-directory -export RUN_NAME=llama-3-70b-grpo +export RUN_NAME= # e.g., llama-3-70b-grpo export MAXTEXT_CKPT_PATH=${BASE_OUTPUT_DIRECTORY}/${RUN_NAME}/0/items # -- Workload configuration -- export WORKLOAD=${RUN_NAME} -export TPU_TYPE='v5p-128' +export TPU_TYPE= # e.g., 'v5p-128' export TPU_CLUSTER= export PROJECT_ID= export ZONE= @@ -59,6 +59,16 @@ export ZONE= ## Get your model checkpoint +### Option 1: Using an existing MaxText checkpoint + +If you already have a MaxText-compatible model checkpoint, simply set the following environment variable and move on to the next section. + +```bash +export MAXTEXT_CKPT_PATH= # e.g., gs://my-bucket/my-model-checkpoint/0/items +``` + +### Option 2: Converting from a Hugging Face checkpoint + You can convert a Hugging Face checkpoint to MaxText format using the `src/MaxText/utils/ckpt_conversion/to_maxtext.py` script. This is useful if you have a pre-trained model from Hugging Face that you want to use with MaxText. First, ensure you have the necessary dependencies installed. Then, run the conversion script on a CPU machine. For large models, it is recommended to use the `--lazy_load_tensors` flag to reduce memory usage during conversion. \ diff --git a/docs/tutorials/posttraining/sft.md b/docs/tutorials/posttraining/sft.md index 1566a0859a..bcd6bdd250 100644 --- a/docs/tutorials/posttraining/sft.md +++ b/docs/tutorials/posttraining/sft.md @@ -24,6 +24,7 @@ We use [Tunix](https://github.com/google/tunix), a JAX-based library designed fo In this tutorial we use a single host TPU VM such as `v6e-8/v5p-8`. Let's get started! ## Install dependencies + ```sh # 1. Clone the repository git clone https://github.com/AI-Hypercomputer/maxtext.git diff --git a/docs/tutorials/posttraining/sft_on_multi_host.md b/docs/tutorials/posttraining/sft_on_multi_host.md index 1cdcc3402f..8ebd7f0575 100644 --- a/docs/tutorials/posttraining/sft_on_multi_host.md +++ b/docs/tutorials/posttraining/sft_on_multi_host.md @@ -51,8 +51,7 @@ bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-trainin ### 1.3. Upload the Docker image to Artifact Registry > **Note:** You will need the [**Artifact Registry Writer**](https://docs.cloud.google.com/artifact-registry/docs/access-control#permissions) role to push Docker images to your project's Artifact Registry and to allow the cluster to pull them during workload execution. If you don't have this permission, contact your project administrator to grant you this role through "Google Cloud Console -> IAM -> Grant access". ```bash -# Replace `$USER_runner` with your desired image name -export DOCKER_IMAGE_NAME=${USER}_runner +export DOCKER_IMAGE_NAME= bash dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=$DOCKER_IMAGE_NAME ``` The `docker_upload_runner.sh` script uploads your Docker image to Artifact Registry. @@ -73,7 +72,7 @@ export ZONE= # -- Workload Configuration -- export WORKLOAD_NAME= # e.g., sft-$(date +%s) export TPU_TYPE= # e.g., v6e-256 -export TPU_SLICE=1 +export TPU_SLICE= export DOCKER_IMAGE="gcr.io/${PROJECT}/${DOCKER_IMAGE_NAME}" # -- MaxText Configuration --