Skip to content

Commit 0e0a362

Browse files
committed
axtree: fix text export
1 parent ff871fc commit 0e0a362

File tree

1 file changed

+70
-12
lines changed

1 file changed

+70
-12
lines changed

src/cdp/AXNode.zig

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ pub const Writer = struct {
4949

5050
fn toJSON(self: *const Writer, node: *const Node, w: anytype) !void {
5151
try w.beginArray();
52-
if (try self.writeNode(node, w)) {
52+
const root = try AXNode.fromNode(node._node);
53+
if (try self.writeNode(node.id, root, w)) {
5354
try w.endArray();
5455
return;
5556
}
@@ -60,13 +61,26 @@ pub const Writer = struct {
6061
while (true) {
6162
next = try walker.get_next(node._node, next, .{ .skip_children = skip_children }) orelse break;
6263

63-
if (parser.nodeType(next.?) != .element) {
64+
const node_type = parser.nodeType(next.?);
65+
if (node_type != .element and node_type != .text) {
6466
skip_children = true;
6567
continue;
6668
}
6769

70+
// special case: if the node is a text, it depends the parent to
71+
// keep the text.
72+
if (node_type == .text) {
73+
if (parser.nodeParentNode(next.?)) |p| {
74+
if (try ignoreText(p)) {
75+
skip_children = true;
76+
continue;
77+
}
78+
}
79+
}
80+
6881
const n = try self.registry.register(next.?);
69-
skip_children = try self.writeNode(n, w);
82+
const axn = try AXNode.fromNode(next.?);
83+
skip_children = try self.writeNode(n.id, axn, w);
7084
}
7185

7286
try w.endArray();
@@ -149,7 +163,7 @@ pub const Writer = struct {
149163
try self.writeAXProperty(.{ .name = .focusable, .value = .{ .type = .booleanOrUndefined, .value = .{ .boolean = true } } }, w);
150164
return;
151165
},
152-
.element => {},
166+
.element, .text => {},
153167
else => {
154168
log.debug(.cdp, "invalid tag", .{ .node_type = parser.nodeType(node) });
155169
return error.InvalidTag;
@@ -191,15 +205,15 @@ pub const Writer = struct {
191205
}
192206

193207
// write a node. returns true if children must be skipped.
194-
fn writeNode(self: *const Writer, node: *const Node, w: anytype) !bool {
208+
fn writeNode(self: *const Writer, id: u32, axn: AXNode, w: anytype) !bool {
209+
// ignore empty texts
195210
try w.beginObject();
196211

197-
const axn = try AXNode.fromNode(node._node);
198212
try w.objectField("nodeId");
199-
try w.write(node.id);
213+
try w.write(id);
200214

201215
try w.objectField("backendDOMNodeId");
202-
try w.write(node.id);
216+
try w.write(id);
203217

204218
try w.objectField("role");
205219
try self.writeAXValue(.{ .type = .role, .value = .{ .string = try axn.getRole() } }, w);
@@ -250,6 +264,7 @@ pub const Writer = struct {
250264

251265
// Children
252266
const skip_children = try axn.ignoreChildren();
267+
const skip_text = try ignoreText(n);
253268

254269
try w.objectField("childIds");
255270
try w.beginArray();
@@ -263,8 +278,8 @@ pub const Writer = struct {
263278
defer i += 1;
264279
const child = (parser.nodeListItem(child_nodes, @intCast(i))) orelse break;
265280

266-
// ignore non-elements
267-
if (parser.nodeType(child) != .element) {
281+
// ignore non-elements or text.
282+
if (parser.nodeType(child) != .element and (parser.nodeType(child) != .text or skip_text)) {
268283
continue;
269284
}
270285

@@ -303,7 +318,7 @@ pub const AXRole = enum(u8) {
303318
form,
304319
group,
305320
heading,
306-
img,
321+
image,
307322
insertion,
308323
link,
309324
list,
@@ -335,11 +350,14 @@ pub const AXRole = enum(u8) {
335350
textbox,
336351
time,
337352
RootWebArea,
353+
LineBreak,
354+
StaticText,
338355

339356
fn fromNode(node: *parser.Node) !AXRole {
340357
switch (parser.nodeType(node)) {
341358
.document => return .RootWebArea, // Chrome specific.
342359
.element => {},
360+
.text => return .StaticText,
343361
else => {
344362
log.debug(.cdp, "invalid tag", .{ .node_type = parser.nodeType(node) });
345363
return error.InvalidTag;
@@ -491,7 +509,7 @@ pub const AXRole = enum(u8) {
491509
.dialog => .dialog,
492510

493511
// Media
494-
.img => .img,
512+
.img => .image,
495513
.figure => .figure,
496514

497515
// Tables
@@ -530,6 +548,8 @@ pub const AXRole = enum(u8) {
530548
// Deprecated/Obsolete Elements
531549
.marquee => .marquee,
532550

551+
.br => .LineBreak,
552+
533553
else => .none,
534554
};
535555
}
@@ -564,6 +584,12 @@ fn writeName(axnode: AXNode, w: anytype) !?AXSource {
564584
return .title;
565585
}
566586

587+
if (parser.nodeType(node) == .text) {
588+
const content = parser.nodeTextContent(node) orelse "";
589+
try writeString(content, w);
590+
return .contents;
591+
}
592+
567593
std.debug.assert(parser.nodeType(node) == .element);
568594
const elt: *parser.Element = @ptrCast(node);
569595

@@ -581,6 +607,10 @@ fn writeName(axnode: AXNode, w: anytype) !?AXSource {
581607

582608
const tag = try parser.elementTag(elt);
583609
switch (tag) {
610+
.br => {
611+
try writeString("\n", w);
612+
return .contents;
613+
},
584614
.input => {
585615
const input_type = try parser.elementGetAttribute(elt, "type") orelse "text";
586616
switch (input_type.len) {
@@ -622,6 +652,7 @@ fn writeName(axnode: AXNode, w: anytype) !?AXSource {
622652
.object,
623653
.progress,
624654
.meter,
655+
.p,
625656
=> {},
626657
else => {
627658
if (parser.nodeTextContent(node)) |content| {
@@ -666,12 +697,35 @@ fn isHidden(elt: *parser.Element) !bool {
666697
return false;
667698
}
668699

700+
fn ignoreText(node: *parser.Node) !bool {
701+
if (parser.nodeType(node) == .document) {
702+
return true;
703+
}
704+
705+
if (parser.nodeType(node) == .text) {
706+
return true;
707+
}
708+
709+
std.debug.assert(parser.nodeType(node) == .element);
710+
711+
const elt: *parser.Element = @ptrCast(node);
712+
const tag = try parser.elementTag(elt);
713+
return switch (tag) {
714+
.p => false,
715+
else => true,
716+
};
717+
}
718+
669719
fn ignoreChildren(self: AXNode) !bool {
670720
const node = self._node;
671721
if (parser.nodeType(node) == .document) {
672722
return false;
673723
}
674724

725+
if (parser.nodeType(node) == .text) {
726+
return false;
727+
}
728+
675729
std.debug.assert(parser.nodeType(node) == .element);
676730

677731
const elt: *parser.Element = @ptrCast(node);
@@ -690,6 +744,10 @@ fn isIgnore(self: AXNode) !bool {
690744
return false;
691745
}
692746

747+
if (parser.nodeType(node) == .text) {
748+
return false;
749+
}
750+
693751
std.debug.assert(parser.nodeType(node) == .element);
694752

695753
const elt: *parser.Element = @ptrCast(node);

0 commit comments

Comments
 (0)