comments: add Comments.add_comment()

scanny · scanny · commit 8ac9fc4f6b50 · 2025-06-16T14:24:39.000-07:00
Only with `text` parameter so far. Author and initials parameters to
follow.
diff --git a/features/cmt-mutations.feature b/features/cmt-mutations.feature
@@ -4,7 +4,6 @@ Feature: Comment mutations
   I need mutation methods on Comment objects
 
 
-  @wip
   Scenario: Comments.add_comment()
     Given a Comments object with 0 comments
      When I assign comment = comments.add_comment()
@@ -15,15 +14,13 @@ Feature: Comment mutations
       And comments.get(0) == comment
 
 
-  @wip
   Scenario: Comments.add_comment() specifying author and initials
     Given a Comments object with 0 comments
      When I assign comment = comments.add_comment(author="John Doe", initials="JD")
      Then comment.author == "John Doe"
       And comment.initials == "JD"
 
 
-  @wip
   Scenario: Comment.add_paragraph() specifying text and style
     Given a default Comment object
      When I assign paragraph = comment.add_paragraph(text, style)
@@ -33,7 +30,6 @@ Feature: Comment mutations
       And comment.paragraphs[-1] == paragraph
 
 
-  @wip
   Scenario: Comment.add_paragraph() not specifying text or style
     Given a default Comment object
      When I assign paragraph = comment.add_paragraph()
@@ -43,7 +39,6 @@ Feature: Comment mutations
       And comment.paragraphs[-1] == paragraph
 
 
-  @wip
   Scenario: Add image to comment
     Given a default Comment object
      When I assign paragraph = comment.add_paragraph()
diff --git a/src/docx/comments.py b/src/docx/comments.py
@@ -10,6 +10,8 @@
 if TYPE_CHECKING:
     from docx.oxml.comments import CT_Comment, CT_Comments
     from docx.parts.comments import CommentsPart
+    from docx.styles.style import ParagraphStyle
+    from docx.text.paragraph import Paragraph
 
 
 class Comments:
@@ -30,6 +32,48 @@ def __len__(self) -> int:
         """The number of comments in this collection."""
         return len(self._comments_elm.comment_lst)
 
+    def add_comment(self, text: str = "", author: str = "", initials: str | None = "") -> Comment:
+        """Add a new comment to the document and return it.
+
+        The comment is added to the end of the comments collection and is assigned a unique
+        comment-id.
+
+        If `text` is provided, it is added to the comment. This option provides for the common
+        case where a comment contains a modest passage of plain text. Multiple paragraphs can be
+        added using the `text` argument by separating their text with newlines (`"\\\\n"`).
+        Between newlines, text is interpreted as it is in `Document.add_paragraph(text=...)`.
+
+        The default is to place a single empty paragraph in the comment, which is the same
+        behavior as the Word UI when you add a comment. New runs can be added to the first
+        paragraph in the empty comment with `comments.paragraphs[0].add_run()` to adding more
+        complex text with emphasis or images. Additional paragraphs can be added using
+        `.add_paragraph()`.
+
+        `author` is a required attribute, set to the empty string by default.
+
+        `initials` is an optional attribute, set to the empty string by default. Passing |None|
+        for the `initials` parameter causes that attribute to be omitted from the XML.
+        """
+        comment_elm = self._comments_elm.add_comment()
+        comment_elm.author = author
+        comment_elm.initials = initials
+        comment_elm.date = dt.datetime.now(dt.timezone.utc)
+        comment = Comment(comment_elm, self._comments_part)
+
+        if text == "":
+            return comment
+
+        para_text_iter = iter(text.split("\n"))
+
+        first_para_text = next(para_text_iter)
+        first_para = comment.paragraphs[0]
+        first_para.add_run(first_para_text)
+
+        for s in para_text_iter:
+            comment.add_paragraph(text=s)
+
+        return comment
+
     def get(self, comment_id: int) -> Comment | None:
         """Return the comment identified by `comment_id`, or |None| if not found."""
         comment_elm = self._comments_elm.get_comment_by_id(comment_id)
@@ -54,6 +98,22 @@ def __init__(self, comment_elm: CT_Comment, comments_part: CommentsPart):
         super().__init__(comment_elm, comments_part)
         self._comment_elm = comment_elm
 
+    def add_paragraph(self, text: str = "", style: str | ParagraphStyle | None = None) -> Paragraph:
+        """Return paragraph newly added to the end of the content in this container.
+
+        The paragraph has `text` in a single run if present, and is given paragraph style `style`.
+        When `style` is |None| or ommitted, the "CommentText" paragraph style is applied, which is
+        the default style for comments.
+        """
+        paragraph = super().add_paragraph(text, style)
+
+        # -- have to assign style directly to element because `paragraph.style` raises when
+        # -- a style is not present in the styles part
+        if style is None:
+            paragraph._p.style = "CommentText"  # pyright: ignore[reportPrivateUsage]
+
+        return paragraph
+
     @property
     def author(self) -> str:
         """The recorded author of this comment."""
diff --git a/src/docx/oxml/comments.py b/src/docx/oxml/comments.py
@@ -3,8 +3,10 @@
 from __future__ import annotations
 
 import datetime as dt
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING, Callable, cast
 
+from docx.oxml.ns import nsdecls
+from docx.oxml.parser import parse_xml
 from docx.oxml.simpletypes import ST_DateTime, ST_DecimalNumber, ST_String
 from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute, ZeroOrMore
 
@@ -27,11 +29,64 @@ class CT_Comments(BaseOxmlElement):
 
     comment = ZeroOrMore("w:comment")
 
+    def add_comment(self) -> CT_Comment:
+        """Return newly added `w:comment` child of this `w:comments`.
+
+        The returned `w:comment` element is the minimum valid value, having a `w:id` value unique
+        within the existing comments and the required `w:author` attribute present but set to the
+        empty string. It's content is limited to a single run containing the necessary annotation
+        reference but no text. Content is added by adding runs to this first paragraph and by
+        adding additional paragraphs as needed.
+        """
+        next_id = self._next_available_comment_id()
+        comment = cast(
+            CT_Comment,
+            parse_xml(
+                f'<w:comment {nsdecls("w")} w:id="{next_id}" w:author="">'
+                f"  <w:p>"
+                f"    <w:pPr>"
+                f'      <w:pStyle w:val="CommentText"/>'
+                f"    </w:pPr>"
+                f"    <w:r>"
+                f"      <w:rPr>"
+                f'        <w:rStyle w:val="CommentReference"/>'
+                f"      </w:rPr>"
+                f"      <w:annotationRef/>"
+                f"    </w:r>"
+                f"  </w:p>"
+                f"</w:comment>"
+            ),
+        )
+        self.append(comment)
+        return comment
+
     def get_comment_by_id(self, comment_id: int) -> CT_Comment | None:
         """Return the `w:comment` element identified by `comment_id`, or |None| if not found."""
         comment_elms = self.xpath(f"(./w:comment[@w:id='{comment_id}'])[1]")
         return comment_elms[0] if comment_elms else None
 
+    def _next_available_comment_id(self) -> int:
+        """The next available comment id.
+
+        According to the schema, this can be any positive integer, as big as you like, and the
+        default mechanism is to use `max() + 1`. However, if that yields a value larger than will
+        fit in a 32-bit signed integer, we take a more deliberate approach to use the first
+        ununsed integer starting from 0.
+        """
+        used_ids = [int(x) for x in self.xpath("./w:comment/@w:id")]
+
+        next_id = max(used_ids, default=-1) + 1
+
+        if next_id <= 2**31 - 1:
+            return next_id
+
+        # -- fall-back to enumerating all used ids to find the first unused one --
+        for expected, actual in enumerate(sorted(used_ids)):
+            if expected != actual:
+                return expected
+
+        return len(used_ids)
+
 
 class CT_Comment(BaseOxmlElement):
     """`w:comment` element, representing a single comment.
diff --git a/tests/oxml/test_comments.py b/tests/oxml/test_comments.py
@@ -0,0 +1,31 @@
+# pyright: reportPrivateUsage=false
+
+"""Unit-test suite for `docx.oxml.comments` module."""
+
+from __future__ import annotations
+
+from typing import cast
+
+import pytest
+
+from docx.oxml.comments import CT_Comments
+
+from ..unitutil.cxml import element
+
+
+class DescribeCT_Comments:
+    """Unit-test suite for `docx.oxml.comments.CT_Comments`."""
+
+    @pytest.mark.parametrize(
+        ("cxml", "expected_value"),
+        [
+            ("w:comments", 0),
+            ("w:comments/(w:comment{w:id=1})", 2),
+            ("w:comments/(w:comment{w:id=4},w:comment{w:id=2147483646})", 2147483647),
+            ("w:comments/(w:comment{w:id=1},w:comment{w:id=2147483647})", 0),
+            ("w:comments/(w:comment{w:id=1},w:comment{w:id=2},w:comment{w:id=3})", 4),
+        ],
+    )
+    def it_finds_the_next_available_comment_id_to_help(self, cxml: str, expected_value: int):
+        comments_elm = cast(CT_Comments, element(cxml))
+        assert comments_elm._next_available_comment_id() == expected_value
diff --git a/tests/test_comments.py b/tests/test_comments.py
@@ -13,6 +13,7 @@
 from docx.opc.constants import CONTENT_TYPE as CT
 from docx.opc.packuri import PackURI
 from docx.oxml.comments import CT_Comment, CT_Comments
+from docx.oxml.ns import qn
 from docx.package import Package
 from docx.parts.comments import CommentsPart
 
@@ -86,8 +87,85 @@ def it_can_get_a_comment_by_id(self, package_: Mock):
         assert type(comment) is Comment, "expected a `Comment` object"
         assert comment._comment_elm is comments_elm.comment_lst[1]
 
+    def but_it_returns_None_when_no_comment_with_that_id_exists(self, package_: Mock):
+        comments_elm = cast(
+            CT_Comments,
+            element("w:comments/(w:comment{w:id=1},w:comment{w:id=2},w:comment{w:id=3})"),
+        )
+        comments = Comments(
+            comments_elm,
+            CommentsPart(
+                PackURI("/word/comments.xml"),
+                CT.WML_COMMENTS,
+                comments_elm,
+                package_,
+            ),
+        )
+
+        comment = comments.get(4)
+
+        assert comment is None, "expected None when no comment with that id exists"
+
+    def it_can_add_a_new_comment(self, package_: Mock):
+        comments_elm = cast(CT_Comments, element("w:comments"))
+        comments_part = CommentsPart(
+            PackURI("/word/comments.xml"),
+            CT.WML_COMMENTS,
+            comments_elm,
+            package_,
+        )
+        now_before = dt.datetime.now(dt.timezone.utc).replace(microsecond=0)
+        comments = Comments(comments_elm, comments_part)
+
+        comment = comments.add_comment()
+
+        now_after = dt.datetime.now(dt.timezone.utc).replace(microsecond=0)
+        # -- a comment is unconditionally added, and returned for any further adjustment --
+        assert isinstance(comment, Comment)
+        # -- it is "linked" to the comments part so it can add images and hyperlinks, etc. --
+        assert comment.part is comments_part
+        # -- comment numbering starts at 0, and is incremented for each new comment --
+        assert comment.comment_id == 0
+        # -- author is a required attribut, but is the empty string by default --
+        assert comment.author == ""
+        # -- initials is an optional attribute, but defaults to the empty string, same as Word --
+        assert comment.initials == ""
+        # -- timestamp is also optional, but defaults to now-UTC --
+        assert comment.timestamp is not None
+        assert now_before <= comment.timestamp <= now_after
+        # -- by default, a new comment contains a single empty paragraph --
+        assert [p.text for p in comment.paragraphs] == [""]
+        # -- that paragraph has the "CommentText" style, same as Word applies --
+        comment_elm = comment._comment_elm
+        assert len(comment_elm.p_lst) == 1
+        p = comment_elm.p_lst[0]
+        assert p.style == "CommentText"
+        # -- and that paragraph contains a single run with the necessary annotation reference --
+        assert len(p.r_lst) == 1
+        r = comment_elm.p_lst[0].r_lst[0]
+        assert r.style == "CommentReference"
+        assert r[-1].tag == qn("w:annotationRef")
+
+    def and_it_can_add_text_to_the_comment_when_adding_it(self, comments: Comments, package_: Mock):
+        comment = comments.add_comment(text="para 1\n\npara 2")
+
+        assert len(comment.paragraphs) == 3
+        assert [p.text for p in comment.paragraphs] == ["para 1", "", "para 2"]
+        assert all(p._p.style == "CommentText" for p in comment.paragraphs)
+
     # -- fixtures --------------------------------------------------------------------------------
 
+    @pytest.fixture
+    def comments(self, package_: Mock) -> Comments:
+        comments_elm = cast(CT_Comments, element("w:comments"))
+        comments_part = CommentsPart(
+            PackURI("/word/comments.xml"),
+            CT.WML_COMMENTS,
+            comments_elm,
+            package_,
+        )
+        return Comments(comments_elm, comments_part)
+
     @pytest.fixture
     def package_(self, request: FixtureRequest):
         return instance_mock(request, Package)