diff --git a/gapic/cli/generate.py b/gapic/cli/generate.py index e8eee1f034..fae69a961a 100644 --- a/gapic/cli/generate.py +++ b/gapic/cli/generate.py @@ -15,6 +15,7 @@ import os import sys import typing +import time import click @@ -24,6 +25,13 @@ from gapic.schema import api from gapic.utils import Options +# <--- Profiling Global ---> +LOG_FILE = "/tmp/gapic_profile.log" + +def _log(msg): + with open(LOG_FILE, "a", encoding="utf-8") as f: + f.write(f"[{time.strftime('%H:%M:%S')}] [CLI] {msg}\n") +# <--- End Profiling Global ---> @click.command() @click.option( @@ -42,8 +50,19 @@ ) def generate(request: typing.BinaryIO, output: typing.BinaryIO) -> None: """Generate a full API client description.""" + + # <--- Start Profiling ---> + # We clear the file here since this is the entry point + with open(LOG_FILE, "w", encoding="utf-8") as f: + f.write("--- CLI PROCESS START ---\n") + + t_start_script = time.time() + # <--- End Profiling ---> + # Load the protobuf CodeGeneratorRequest. + t0 = time.time() req = plugin_pb2.CodeGeneratorRequest.FromString(request.read()) + _log(f"Load CodeGeneratorRequest took {time.time() - t0:.4f}s") # Pull apart arguments in the request. opts = Options.build(req.parameter) @@ -59,15 +78,33 @@ def generate(request: typing.BinaryIO, output: typing.BinaryIO) -> None: # Build the API model object. # This object is a frozen representation of the whole API, and is sent # to each template in the rendering step. + # <--- Profile API Build ---> + _log("Starting API.build (Parsing Protos)...") + t0 = time.time() + api_schema = api.API.build(req.proto_file, opts=opts, package=package) + + _log(f"API.build took {time.time() - t0:.4f}s") + # <--- End Profile API Build ---> # Translate into a protobuf CodeGeneratorResponse; this reads the # individual templates and renders them. # If there are issues, error out appropriately. + # <--- Profile Generator ---> + _log("Starting generator.get_response (Rendering Templates)...") + t0 = time.time() + res = generator.Generator(opts).get_response(api_schema, opts) + + _log(f"generator.get_response took {time.time() - t0:.4f}s") + # <--- End Profile Generator ---> # Output the serialized response. + t0 = time.time() output.write(res.SerializeToString()) + _log(f"Serialization/Write took {time.time() - t0:.4f}s") + + _log(f"TOTAL CLI RUNTIME: {time.time() - t_start_script:.4f}s") if __name__ == "__main__": diff --git a/gapic/generator/generator.py b/gapic/generator/generator.py index f42e40655e..0b10844a30 100644 --- a/gapic/generator/generator.py +++ b/gapic/generator/generator.py @@ -19,6 +19,8 @@ import os import pathlib import typing +import time +import sys from typing import Any, DefaultDict, Dict, Mapping, Optional, Tuple from hashlib import sha256 from collections import OrderedDict, defaultdict @@ -34,8 +36,17 @@ from gapic.schema import api from gapic import utils from gapic.utils import Options +from gapic.utils import rst as rst_module from google.protobuf.compiler.plugin_pb2 import CodeGeneratorResponse +# <--- Profiling Global ---> +LOG_FILE = "/tmp/gapic_profile.log" + +def _log(msg): + # Append mode so we don't wipe logs from previous steps/APIs + with open(LOG_FILE, "a", encoding="utf-8") as f: + f.write(f"[{time.strftime('%H:%M:%S')}] {msg}\n") +# <--- End Profiling Global ---> class Generator: """A protoc code generator for client libraries. @@ -91,6 +102,11 @@ def get_response(self, api_schema: api.API, opts: Options) -> CodeGeneratorRespo ~.CodeGeneratorResponse: A response describing appropriate files and contents. See ``plugin.proto``. """ + # <--- Profiling Start ---> + _log(f"--- GENERATION STARTED (get_response) FOR {api_schema.naming.proto_package} ---") + start_time = time.time() # FIXED: Variable name matches end usage + # <--- Profiling End ---> + output_files: Dict[str, CodeGeneratorResponse.File] = OrderedDict() sample_templates, client_templates = utils.partition( lambda fname: os.path.basename(fname) == samplegen.DEFAULT_TEMPLATE_NAME, @@ -101,6 +117,7 @@ def get_response(self, api_schema: api.API, opts: Options) -> CodeGeneratorRespo # can be inserted into method docstrings. snippet_idx = snippet_index.SnippetIndex(api_schema) if sample_templates: + t_samples = time.time() sample_output, snippet_idx = self._generate_samples_and_manifest( api_schema, snippet_idx, @@ -108,6 +125,7 @@ def get_response(self, api_schema: api.API, opts: Options) -> CodeGeneratorRespo opts=opts, ) output_files.update(sample_output) + _log(f"Phase: Sample Gen took {time.time() - t_samples:.4f}s") # Iterate over each template and add the appropriate output files # based on that template. @@ -119,8 +137,9 @@ def get_response(self, api_schema: api.API, opts: Options) -> CodeGeneratorRespo filename = template_name.split("/")[-1] if filename.startswith("_") and filename != "__init__.py.j2": continue - - # Append to the output files dictionary. + + # <--- Profiling Template ---> + t_tpl = time.time() output_files.update( self._render_template( template_name, @@ -129,12 +148,18 @@ def get_response(self, api_schema: api.API, opts: Options) -> CodeGeneratorRespo snippet_index=snippet_idx, ) ) + duration = time.time() - t_tpl + if duration > 1.0: + _log(f"Phase: Template [{template_name}] took {duration:.4f}s") + # <--- End Profiling Template ---> # Return the CodeGeneratorResponse output. res = CodeGeneratorResponse( file=[i for i in output_files.values()] ) # type: ignore res.supported_features |= CodeGeneratorResponse.Feature.FEATURE_PROTO3_OPTIONAL # type: ignore + + _log(f"TOTAL GENERATION COMPLETE (get_response): {time.time() - start_time:.4f}s") return res def _generate_samples_and_manifest( @@ -400,6 +425,10 @@ def _get_file( context=context, ) + # <--- Profiling Render Start ---> + t_render = time.time() + # <--- End Profiling Render Start ---> + # Render the file contents. cgr_file = CodeGeneratorResponse.File( content=formatter.fix_whitespace( @@ -410,6 +439,12 @@ def _get_file( name=fn, ) + # <--- Profiling Render End ---> + duration = time.time() - t_render + if duration > 0.5: + _log(f" > RENDER: {fn} ({duration:.4f}s)") + # <--- End Profiling Render End ---> + # Quick check: Do not render empty files. if utils.empty(cgr_file.content) and not fn.endswith( ("py.typed", "__init__.py") diff --git a/gapic/schema/api.py b/gapic/schema/api.py index 2c01b07363..4cb24d0405 100644 --- a/gapic/schema/api.py +++ b/gapic/schema/api.py @@ -64,6 +64,14 @@ from gapic.utils import Options from gapic.utils import to_snake_case from gapic.utils import RESERVED_NAMES +import time + +LOG_FILE = "/tmp/gapic_profile.log" + +def _log(msg): + # Append mode so we don't wipe logs from previous steps/APIs + with open(LOG_FILE, "a", encoding="utf-8") as f: + f.write(f"[{time.strftime('%H:%M:%S')}] {msg}\n") TRANSPORT_GRPC = "grpc" @@ -114,6 +122,7 @@ def build( opts: Options = Options(), prior_protos: Optional[Mapping[str, "Proto"]] = None, load_services: bool = True, + skip_context_analysis: bool = False, all_resources: Optional[Mapping[str, wrappers.MessageType]] = None, ) -> "Proto": """Build and return a Proto instance. @@ -138,6 +147,7 @@ def build( opts=opts, prior_protos=prior_protos or {}, load_services=load_services, + skip_context_analysis=skip_context_analysis, all_resources=all_resources or {}, ).proto @@ -456,7 +466,9 @@ def disambiguate_keyword_sanitize_fname( # load the services and methods with the full scope of types. pre_protos: Dict[str, Proto] = dict(prior_protos or {}) for fd in file_descriptors: + t0 = time.time() fd.name = disambiguate_keyword_sanitize_fname(fd.name, pre_protos) + is_target = fd.package.startswith(package) pre_protos[fd.name] = Proto.build( file_descriptor=fd, file_to_generate=fd.package.startswith(package), @@ -465,7 +477,11 @@ def disambiguate_keyword_sanitize_fname( prior_protos=pre_protos, # Ugly, ugly hack. load_services=False, + skip_context_analysis=True, ) + if is_target: + duration = time.time() - t0 + _log(f"API.build (Pass 1 - Messages Only): {fd.name} took {duration:.4f}s") # A file descriptor's file-level resources are NOT visible to any importers. # The only way to make referenced resources visible is to aggregate them at @@ -477,8 +493,12 @@ def disambiguate_keyword_sanitize_fname( # Second pass uses all the messages and enums defined in the entire API. # This allows LRO returning methods to see all the types in the API, # bypassing the above missing import problem. - protos: Dict[str, Proto] = { - name: Proto.build( + protos: Dict[str, Proto] = {} + + for name, proto in pre_protos.items(): + t0 = time.time() + + protos[name] = Proto.build( file_descriptor=proto.file_pb2, file_to_generate=proto.file_to_generate, naming=naming, @@ -486,15 +506,20 @@ def disambiguate_keyword_sanitize_fname( prior_protos=pre_protos, all_resources=MappingProxyType(all_file_resources), ) - for name, proto in pre_protos.items() - } + + # Log timing only for the target file + if proto.file_to_generate: + duration = time.time() - t0 + _log(f"API.build (Pass 2): {name} took {duration:.4f}s") # Parse the google.api.Service proto from the service_yaml data. + t0_yaml = time.time() service_yaml_config = service_pb2.Service() ParseDict( opts.service_yaml_config, service_yaml_config, ignore_unknown_fields=True ) gapic_version = opts.gapic_version + _log(f"API.build (Service YAML Parse) took {time.time() - t0_yaml:.4f}s") # Third pass for various selective GAPIC settings; these require # settings in the service.yaml and so we build the API object @@ -1098,6 +1123,7 @@ def __init__( opts: Options = Options(), prior_protos: Optional[Mapping[str, Proto]] = None, load_services: bool = True, + skip_context_analysis: bool = False, all_resources: Optional[Mapping[str, wrappers.MessageType]] = None, ): self.proto_messages: Dict[str, wrappers.MessageType] = {} @@ -1107,6 +1133,7 @@ def __init__( self.file_to_generate = file_to_generate self.prior_protos = prior_protos or {} self.opts = opts + self.skip_context_analysis = skip_context_analysis # Iterate over the documentation and place it into a dictionary. # @@ -1213,7 +1240,7 @@ def proto(self) -> Proto: # If this is not a file being generated, we do not need to # do anything else. - if not self.file_to_generate: + if not self.file_to_generate or self.skip_context_analysis: return naive visited_messages: Set[wrappers.MessageType] = set() diff --git a/gapic/utils/rst.py b/gapic/utils/rst.py index a77df30332..a206217e91 100644 --- a/gapic/utils/rst.py +++ b/gapic/utils/rst.py @@ -13,12 +13,53 @@ # limitations under the License. import re -from typing import Optional +from typing import Optional, Dict import pypandoc # type: ignore from gapic.utils.lines import wrap +# Cache for the few complex items we actually send to pandoc +_RAW_RST_CACHE: Dict[str, str] = {} + +def _tuned_fast_convert(text: str) -> Optional[str]: + """ + Converts Markdown to RST using pure Python. + Only falls back to Pandoc for Tables and Images. + """ + # --- 1. FALLBACKS --- + # Tables (pipe surrounded by spaces) or Images (![). + # We allow "][" (Reference Links) to be handled by Python now. + if (re.search(r" \| ", text) or re.search(r"\|\n", text)) or "![" in text: + return None + + # --- 2. CONVERSION --- + + # A. CODE BLOCKS: `code` -> ``code`` + # CRITICAL: Run this FIRST. This ensures we handle existing backticks + # before we create NEW backticks for links. + # (? `Text `__ + # We fix the broken documentation by converting these to valid RST links. + # Since step A is done, these new backticks will NOT be doubled. + converted = re.sub(r"\[([^\]]+)\]\[([^\]]+)\]", r"`\1 <\2>`__", converted) + + # C. STANDARD LINKS: [Text](URL) -> `Text `__ + converted = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"`\1 <\2>`__", converted) + + # D. BOLD/ITALICS: + converted = re.sub(r"(? Heading\n======= + converted = re.sub(r"^# (.*)$", r"\1\n" + "=" * 10, converted, flags=re.MULTILINE) + converted = re.sub(r"^## (.*)$", r"\1\n" + "-" * 10, converted, flags=re.MULTILINE) + + # F. LISTS: Markdown (- item) needs a preceding newline for RST. + converted = re.sub(r"(\n[^-*].*)\n\s*([-*] )", r"\1\n\n\2", converted) + + return converted def rst( text: str, @@ -27,59 +68,41 @@ def rst( nl: Optional[bool] = None, source_format: str = "commonmark", ): - """Convert the given text to ReStructured Text. - - Args: - text (str): The text to convert. - width (int): The number of columns. - indent (int): The number of columns to indent each line of text - (except the first). - nl (bool): Whether to append a trailing newline. - Defaults to appending a newline if the result is more than - one line long. - source_format (str): The source format. This is ``commonmark`` by - default, which is what is used by convention in protocol buffers. - - Returns: - str: The same text, in RST format. - """ - # Quick check: If the text block does not appear to have any formatting, - # do not convert it. - # (This makes code generation significantly faster; calling out to pandoc - # is by far the most expensive thing we do.) - if not re.search(r"[|*`_[\]]", text): - answer = wrap( - text, - indent=indent, - offset=indent + 3, - width=width - indent, - ) + # 1. Super Fast Path: No special chars? Just wrap. + if not re.search(r"[|*`_[\]#]", text): + answer = wrap(text, indent=indent, offset=indent + 3, width=width - indent) + return _finalize(answer, nl, indent) + + # 2. Check Cache + if text in _RAW_RST_CACHE: + raw_rst = _RAW_RST_CACHE[text] else: - # Convert from CommonMark to ReStructured Text. - answer = ( - pypandoc.convert_text( - text, - "rst", - format=source_format, - extra_args=["--columns=%d" % (width - indent)], - ) - .strip() - .replace("\n", f"\n{' ' * indent}") - ) - - # Add a newline to the end of the document if any line breaks are - # already present. - # - # This causes the closing """ to be on the subsequent line only when - # appropriate. + # 3. Try Tuned Python Convert (Fastest) + fast_result = _tuned_fast_convert(text) + + if fast_result is not None: + raw_rst = fast_result.strip() + else: + # 4. Fallback to Pandoc (Only for Tables/Images) + raw_rst = pypandoc.convert_text( + text, "rst", format=source_format, extra_args=["--columns=1000"] + ).strip() + + _RAW_RST_CACHE[text] = raw_rst + + # 5. Python Formatting + if "::" in raw_rst or ".. code" in raw_rst: + answer = raw_rst.replace("\n", f"\n{' ' * indent}") + else: + answer = wrap(raw_rst, indent=indent, offset=indent, width=width - indent) + + return _finalize(answer, nl, indent) + + +def _finalize(answer, nl, indent): + """Helper to handle trailing newlines and quotes.""" if nl or ("\n" in answer and nl is None): answer += "\n" + " " * indent - - # If the text ends in a double-quote, append a period. - # This ensures that we do not get a parse error when this output is - # followed by triple-quotes. if answer.endswith('"'): answer += "." - - # Done; return the answer. - return answer + return answer \ No newline at end of file