Skip to content

Commit 6292dd2

Browse files
committed
Addressing review feedback.
1 parent 644baca commit 6292dd2

File tree

4 files changed

+104
-79
lines changed

4 files changed

+104
-79
lines changed

jupyter_server_documents/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def get_fileid_manager():
6868
self.settings["yroom_manager"] = self.yroom_manager
6969

7070
# Initialize OutputsManager
71-
self.outputs_manager = self.outputs_manager_class(parent=self, config=self.config)
71+
self.outputs_manager = self.outputs_manager_class(parent=self)
7272
self.settings["outputs_manager"] = self.outputs_manager
7373

7474
# Serve Jupyter Collaboration API on

jupyter_server_documents/outputs/manager.py

Lines changed: 96 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,10 @@ def write_output(self, file_id, cell_id, output, display_id=None, asdict: bool =
134134
f.write(data)
135135
url = create_output_url(file_id, cell_id, index)
136136
self.log.info(f"Wrote output: {url}")
137-
return create_placeholder_output(output["output_type"], url, asdict=asdict)
137+
placeholder = create_placeholder_dict(output["output_type"], url)
138+
if not asdict:
139+
placeholder = Map(placeholder)
140+
return placeholder
138141

139142
def write_stream(self, file_id, cell_id, output, placeholder, asdict : bool = False) -> Map | dict:
140143
# How many stream outputs have been written for this cell previously
@@ -158,7 +161,9 @@ def write_stream(self, file_id, cell_id, output, placeholder, asdict : bool = Fa
158161
placeholder = placeholder
159162
elif count == self.stream_limit:
160163
# Return a link to the full stream output
161-
placeholder = create_placeholder_output("display_data", url, full=True, asdict=asdict)
164+
placeholder = create_placeholder_dict("display_data", url, full=True)
165+
if not asdict:
166+
placeholder = Map(placeholder)
162167
elif count > self.stream_limit:
163168
# Return None to indicate that no placeholder should be written to the ydoc
164169
placeholder = None
@@ -183,8 +188,7 @@ def clear(self, file_id, cell_id=None):
183188
pass
184189

185190
def process_loaded_notebook(self, file_id: str, file_data: dict) -> dict:
186-
"""
187-
Process a loaded notebook and handle outputs through the outputs manager.
191+
"""Process a loaded notebook and handle outputs through the outputs manager.
188192
189193
This method processes a notebook that has been loaded from disk.
190194
If the notebook metadata has placeholder_outputs set to True,
@@ -193,6 +197,7 @@ def process_loaded_notebook(self, file_id: str, file_data: dict) -> dict:
193197
Args:
194198
file_id (str): The file identifier
195199
file_data (dict): The file data containing the notebook content
200+
from calling ContentsManager.get()
196201
197202
Returns:
198203
dict: The modified file data with processed outputs
@@ -205,58 +210,94 @@ def process_loaded_notebook(self, file_id: str, file_data: dict) -> dict:
205210

206211
# Check if the notebook metadata has placeholder_outputs set to True
207212
if nb.get('metadata', {}).get('placeholder_outputs') is True:
208-
for cell in nb.get('cells', []):
209-
if cell.get('cell_type') == 'code':
210-
cell_id = cell.get('id', str(uuid.uuid4()))
211-
try:
212-
# Try to get outputs from disk
213-
output_strings = self.get_outputs(file_id, cell_id)
214-
outputs = []
215-
for output_string in output_strings:
216-
output_dict = json.loads(output_string)
217-
placeholder = create_placeholder_output(
218-
output_dict["output_type"],
219-
url=create_output_url(file_id, cell_id),
220-
asdict=True,
221-
)
222-
outputs.append(placeholder)
223-
cell['outputs'] = outputs
224-
except FileNotFoundError:
225-
# No outputs on disk for this cell, set empty outputs
226-
cell['outputs'] = []
213+
nb = self._process_loaded_placeholders(file_id=file_id, nb=nb)
227214
else:
228-
for cell in nb.get('cells', []):
229-
# Only process cells that have outputs
230-
if cell.get('cell_type') == 'code' and 'outputs' in cell:
231-
cell_id = cell.get('id', str(uuid.uuid4()))
232-
processed_outputs = []
233-
for output in cell.get('outputs', []):
234-
display_id = output.get('metadata', {}).get('display_id')
235-
url = output.get('metadata', {}).get('url')
236-
if url is None:
237-
# Save output to disk and replace with placeholder
238-
placeholder = self.write(
239-
file_id,
240-
cell_id,
241-
output,
242-
display_id,
243-
asdict=True,
244-
)
245-
if placeholder is None:
246-
placeholder = output
247-
else:
248-
placeholder = output
249-
250-
processed_outputs.append(nbformat.from_dict(placeholder))
251-
252-
# Replace the outputs with processed ones
253-
cell['outputs'] = processed_outputs
215+
nb = self._process_loaded_no_placeholders(file_id=file_id, nb=nb)
254216

217+
file_data['content'] = nb
255218
return file_data
256219

257-
def process_saving_notebook(self, nb: dict) -> dict:
220+
def _process_loaded_placeholders(self, file_id: str, nb: dict) -> dict:
221+
"""Process a notebook with placeholder_outputs metadata set to True.
222+
223+
This method processes notebooks that have been saved with placeholder outputs.
224+
It attempts to load actual outputs from disk and creates placeholder outputs
225+
for each code cell. If no outputs exist on disk for a cell, the cell's
226+
outputs are set to an empty list.
227+
228+
Args:
229+
file_id (str): The file identifier
230+
nb (dict): The notebook dictionary
231+
232+
Returns:
233+
dict: The notebook with placeholder outputs loaded from disk
258234
"""
259-
Process a notebook before saving to disk.
235+
for cell in nb.get('cells', []):
236+
if cell.get('cell_type') == 'code':
237+
cell_id = cell.get('id', str(uuid.uuid4()))
238+
try:
239+
# Try to get outputs from disk
240+
output_strings = self.get_outputs(file_id=file_id, cell_id=cell_id)
241+
outputs = []
242+
for output_string in output_strings:
243+
output_dict = json.loads(output_string)
244+
placeholder = create_placeholder_dict(
245+
output_dict["output_type"],
246+
url=create_output_url(file_id, cell_id)
247+
)
248+
outputs.append(placeholder)
249+
cell['outputs'] = outputs
250+
except FileNotFoundError:
251+
# No outputs on disk for this cell, set empty outputs
252+
cell['outputs'] = []
253+
return nb
254+
255+
def _process_loaded_no_placeholders(self, file_id: str, nb: dict) -> dict:
256+
"""Process a notebook that doesn't have placeholder_outputs metadata.
257+
258+
This method processes notebooks with actual output data in the cells.
259+
It saves existing outputs to disk and replaces them with placeholder
260+
outputs that reference the saved files. Outputs that already have
261+
a URL in their metadata are left as-is.
262+
263+
Args:
264+
file_id (str): The file identifier
265+
nb (dict): The notebook dictionary
266+
267+
Returns:
268+
dict: The notebook with outputs saved to disk and replaced with placeholders
269+
"""
270+
for cell in nb.get('cells', []):
271+
if cell.get('cell_type') != 'code' or 'outputs' not in cell:
272+
continue
273+
274+
cell_id = cell.get('id', str(uuid.uuid4()))
275+
processed_outputs = []
276+
for output in cell.get('outputs', []):
277+
display_id = output.get('metadata', {}).get('display_id')
278+
url = output.get('metadata', {}).get('url')
279+
if url is None:
280+
# Save output to disk and replace with placeholder
281+
placeholder = self.write(
282+
file_id,
283+
cell_id,
284+
output,
285+
display_id,
286+
asdict=True,
287+
)
288+
if placeholder is None:
289+
placeholder = output
290+
else:
291+
placeholder = output
292+
293+
processed_outputs.append(nbformat.from_dict(placeholder))
294+
295+
# Replace the outputs with processed ones
296+
cell['outputs'] = processed_outputs
297+
return nb
298+
299+
def process_saving_notebook(self, nb: dict) -> dict:
300+
"""Process a notebook before saving to disk.
260301
261302
This method is called when the yroom_file_api saves notebooks.
262303
It sets the placeholder_outputs key to True in the notebook metadata
@@ -285,8 +326,7 @@ def process_saving_notebook(self, nb: dict) -> dict:
285326

286327

287328
def create_output_url(file_id: str, cell_id: str, output_index: int = None) -> str:
288-
"""
289-
Create the URL for an output or stream.
329+
"""Create the URL for an output or stream.
290330
291331
Parameters:
292332
- file_id (str): The ID of the file.
@@ -301,9 +341,9 @@ def create_output_url(file_id: str, cell_id: str, output_index: int = None) -> s
301341
else:
302342
return f"/api/outputs/{file_id}/{cell_id}/{output_index}.output"
303343

304-
def create_placeholder_dict(output_type: str, url: str, full: bool = False):
305-
"""
306-
Build a placeholder output dict for the given output_type and url.
344+
def create_placeholder_dict(output_type: str, url: str, full: bool = False) -> dict:
345+
"""Build a placeholder output dict for the given output_type and url.
346+
307347
If full is True and output_type is "display_data", returns a display_data output
308348
with an HTML link to the full stream output.
309349
@@ -337,22 +377,3 @@ def create_placeholder_dict(output_type: str, url: str, full: bool = False):
337377
else:
338378
raise ValueError(f"Unknown output_type: {output_type}")
339379

340-
def create_placeholder_output(output_type: str, url: str, full: bool = False, asdict: bool = False):
341-
"""
342-
Creates a placeholder output Map for the given output_type and url.
343-
If full is True and output_type is "display_data", creates a display_data output with an HTML link.
344-
345-
Parameters:
346-
- output_type (str): The type of the output.
347-
- url (str): The URL associated with the output.
348-
- full (bool): Whether to create a full output placeholder with a link.
349-
- asdict (bool): Whether to return a dict or a YMap.
350-
351-
Returns:
352-
- Map: The placeholder output `ycrdt.Map`.
353-
"""
354-
output_dict = create_placeholder_dict(output_type, url, full=full)
355-
if asdict:
356-
return output_dict
357-
else:
358-
return Map(output_dict)

jupyter_server_documents/outputs/output_processor.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,12 @@ async def output_task(self, msg_type, cell_id, content):
142142
# Convert from the message spec to the nbformat output structure
143143
if self.use_outputs_service:
144144
output = self.transform_output(msg_type, content, ydoc=False)
145-
output = self.outputs_manager.write(file_id, cell_id, output, display_id)
145+
output = self.outputs_manager.write(
146+
file_id=file_id,
147+
cell_id=cell_id,
148+
output=output,
149+
display_id=display_id
150+
)
146151
else:
147152
output = self.transform_output(msg_type, content, ydoc=True)
148153

jupyter_server_documents/rooms/yroom_file_api.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ async def _load_content(self, jupyter_ydoc: YBaseDoc) -> None:
201201
))
202202

203203
if self.file_type == "notebook":
204-
self.log.critical(f"Processing outputs for notebook: '{self.room_id}'.")
204+
self.log.info(f"Processing outputs for loaded notebook: '{self.room_id}'.")
205205
file_data = self.outputs_manager.process_loaded_notebook(file_id=self.file_id, file_data=file_data)
206206

207207
# Set JupyterYDoc content and set `dirty = False` to hide the "unsaved
@@ -441,7 +441,6 @@ def restart(self) -> None:
441441
self._save_scheduled = False
442442
self._last_modified = None
443443
self._last_path = None
444-
self._outputs_processing_complete = asyncio.Event()
445444
self.log.info(f"Restarted FileAPI for room '{self.room_id}'.")
446445

447446

0 commit comments

Comments
 (0)