Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions packages/markitdown/src/markitdown/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,25 @@ def _handle_output(args, result: DocumentConverterResult):
with open(args.output, "w", encoding="utf-8") as f:
f.write(result.markdown)
else:
# Handle stdout encoding errors more gracefully
print(
result.markdown.encode(sys.stdout.encoding, errors="replace").decode(
sys.stdout.encoding
# Write UTF-8 directly to the underlying binary buffer when available.
# This avoids UnicodeEncodeError on systems whose locale encoding
# (e.g. GBK on Chinese Windows) cannot represent all Unicode characters
# in the markdown output, and also handles the case where
# sys.stdout.encoding is None (e.g. when stdout is a raw pipe).
if hasattr(sys.stdout, "buffer"):
data = result.markdown.encode("utf-8")
# `print()` (used in the fallback branch and previously here) appends
# a trailing newline, so preserve that behavior to keep output
# parity for downstream tools that expect a final newline.
if not data.endswith(b"\n"):
data += b"\n"
sys.stdout.buffer.write(data)
sys.stdout.buffer.flush()
else:
encoding = sys.stdout.encoding or "utf-8"
print(
result.markdown.encode(encoding, errors="replace").decode(encoding)
)
)


def _exit_with_error(message: str):
Expand Down