add back the playwright mcp; update node version in coderunner

mkagenius · mkagenius · commit b5c61de80445 · 2025-07-26T00:47:46.000+05:30
diff --git a/Dockerfile b/Dockerfile
@@ -51,7 +51,7 @@ RUN python -m pip install --no-cache-dir --upgrade pip
 COPY ./requirements.txt /app/requirements.txt
 
 # Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install -r requirements.txt
 
 
 # Install the bash kernel spec for Jupyter (not working with uv)
@@ -89,7 +89,7 @@ COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
 
 # Ensure Node.js, npm (and npx) are set up
-RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash -
+RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
 RUN apt-get install -y nodejs
 
 ENV PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
diff --git a/requirements.txt b/requirements.txt
@@ -34,3 +34,5 @@ mcp[cli]
 fastmcp
 
 openai-agents
+
+playwright==1.53.0
diff --git a/server.py b/server.py
@@ -11,8 +11,11 @@
 
 import aiofiles
 import websockets
-from mcp.server.fastmcp import FastMCP
-
+# Import Context for progress reporting
+from mcp.server.fastmcp import FastMCP, Context
+from playwright.async_api import async_playwright
+from bs4 import BeautifulSoup
+import socket
 # --- CONFIGURATION & SETUP ---
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -76,20 +79,21 @@ def create_jupyter_request(code: str) -> tuple[str, str]:
 
 
 # --- MCP TOOLS ---
-
 @mcp.tool()
-async def execute_python_code(command: str) -> str:
+async def execute_python_code(command: str, ctx: Context) -> str:
     """
-    Executes a string of Python code in a persistent Jupyter kernel and returns the output.
-    This is suitable for calculations, data analysis, and interacting with previously defined variables.
+    Executes a string of Python code in a persistent Jupyter kernel and returns the final output.
+    Streams intermediate output (stdout) as progress updates.
 
     Args:
         command: The Python code to execute as a single string.
+        ctx: The MCP Context object, used for reporting progress.
     """
     # 1. Get Kernel ID
     if not os.path.exists(KERNEL_ID_FILE_PATH):
-        logger.error(f"Kernel ID file not found at: {KERNEL_ID_FILE_PATH}")
-        return "Error: Kernel is not running. The kernel ID file was not found."
+        error_msg = f"Error: Kernel is not running. The kernel ID file was not found at: {KERNEL_ID_FILE_PATH}"
+        logger.error(error_msg)
+        return error_msg
 
     with open(KERNEL_ID_FILE_PATH, 'r') as file:
         kernel_id = file.read().strip()
@@ -99,7 +103,7 @@ async def execute_python_code(command: str) -> str:
 
     # 2. Connect and Execute via WebSocket
     jupyter_ws_url = f"{JUPYTER_WS_URL}/api/kernels/{kernel_id}/channels"
-    output_lines = []
+    final_output_lines = []
     sent_msg_id = None
 
     try:
@@ -109,50 +113,87 @@ async def execute_python_code(command: str) -> str:
             logger.info(f"Sent execute_request (msg_id: {sent_msg_id})")
 
             execution_complete = False
-            loop_timeout = 3600.0  # Total time to wait for a result
+            loop_timeout = 3600.0
             start_time = time.time()
 
             while not execution_complete and (time.time() - start_time) < loop_timeout:
                 try:
-                    # Wait for a message with a short timeout to keep the loop responsive
                     message_str = await asyncio.wait_for(jupyter_ws.recv(), timeout=1.0)
                 except asyncio.TimeoutError:
                     continue
 
                 message_data = json.loads(message_str)
                 parent_msg_id = message_data.get("parent_header", {}).get("msg_id")
 
-                # Ignore messages not related to our request
                 if parent_msg_id != sent_msg_id:
                     continue
 
                 msg_type = message_data.get("header", {}).get("msg_type")
                 content = message_data.get("content", {})
 
                 if msg_type == "stream":
-                    output_lines.append(content.get("text", ""))
-                elif msg_type == "execute_result" or msg_type == "display_data":
-                    output_lines.append(content.get("data", {}).get("text/plain", ""))
+                    stream_text = content.get("text", "")
+                    final_output_lines.append(stream_text)
+                    # --- THIS IS THE CORRECTED LINE ---
+                    await ctx.report_progress(progress=stream_text)
+
+                elif msg_type in ["execute_result", "display_data"]:
+                    final_output_lines.append(content.get("data", {}).get("text/plain", ""))
                 elif msg_type == "error":
                     error_traceback = "\n".join(content.get("traceback", []))
                     logger.error(f"Execution error for msg_id {sent_msg_id}:\n{error_traceback}")
                     return f"Execution Error:\n{error_traceback}"
+
                 elif msg_type == "status" and content.get("execution_state") == "idle":
-                    # The kernel is idle, meaning our execution is finished.
                     execution_complete = True
 
             if not execution_complete:
+                 timeout_msg = f"Error: Execution timed out after {loop_timeout} seconds."
                  logger.error(f"Execution timed out for msg_id: {sent_msg_id}")
-                 return f"Error: Execution timed out after {loop_timeout} seconds."
+                 return timeout_msg
 
-            return "".join(output_lines) if output_lines else "[Execution successful with no output]"
+            return "".join(final_output_lines) if final_output_lines else "[Execution successful with no output]"
 
     except websockets.exceptions.ConnectionClosed as e:
+        error_msg = f"Error: Could not connect to the Jupyter kernel. It may be offline. Details: {e}"
         logger.error(f"WebSocket connection failed: {e}")
-        return f"Error: Could not connect to the Jupyter kernel. It may be offline. Details: {e}"
+        return error_msg
     except Exception as e:
         logger.error(f"An unexpected error occurred during execution: {e}", exc_info=True)
         return f"Error: An internal server error occurred: {str(e)}"
 
+@mcp.tool()
+async def navigate_and_get_all_visible_text(url: str) -> str:
+    """
+    Retrieves all visible text from the entire webpage using Playwright.
+
+    Args:
+        url: The URL of the webpage from which to retrieve text.
+    """
+    # This function doesn't have intermediate steps, so it only needs 'return'.
+    try:
+        # Note: 'async with async_playwright() as p:' can be slow.
+        # For performance, consider managing a single Playwright instance
+        # outside the tool function if this tool is called frequently.
+        async with async_playwright() as p:
+            browser = await p.chromium.connect(PLAYWRIGHT_WS_URL)
+            page = await browser.new_page()
+            await page.goto(url)
+
+            html_content = await page.content()
+            soup = BeautifulSoup(html_content, 'html.parser')
+            visible_text = soup.get_text(separator="\n", strip=True)
+
+            await browser.close()
+
+            # The operation is complete, return the final result.
+            return visible_text
+
+    except Exception as e:
+        logger.error(f"Failed to retrieve all visible text: {e}")
+        # An error occurred, return the final error message.
+        return f"Error: Failed to retrieve all visible text: {str(e)}"
+
 
-app = mcp.sse_app()
+# Use the streamable_http_app as it's the modern standard
+app = mcp.streamable_http_app()