diff --git a/.github/workflows/ci-docs.yaml b/.github/workflows/ci-docs.yaml index fd9b16ed2..4b13d9ef9 100644 --- a/.github/workflows/ci-docs.yaml +++ b/.github/workflows/ci-docs.yaml @@ -21,6 +21,8 @@ jobs: run: | python -m pip install ".[all]" - uses: quarto-dev/quarto-actions/setup@v2 + with: + tinytex: true - name: Build docs run: | make docs-build diff --git a/.gitignore b/.gitignore index 20f5bf690..9b8167194 100644 --- a/.gitignore +++ b/.gitignore @@ -107,10 +107,12 @@ docs/source/generated/ docs/source/reference/ # Playground Scripts and temporary outputs +.DS_Store playground*.py playground*.qmd playground.html playground_files manual-testing.py manual-testing.ipynb -.DS_Store +latex_testing.qmd +latex_testing.pdf diff --git a/docs/_quarto.yml b/docs/_quarto.yml index a0ca0dd64..a24a088d8 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -17,6 +17,8 @@ website: text: Reference - href: blog/index.qmd text: Blog + - text: LaTeX Examples + file: a-latex_examples/index.qmd right: - icon: github href: https://github.com/posit-dev/great-tables diff --git a/docs/a-latex_examples/index.qmd b/docs/a-latex_examples/index.qmd new file mode 100644 index 000000000..e12c472a0 --- /dev/null +++ b/docs/a-latex_examples/index.qmd @@ -0,0 +1,96 @@ +--- +format: pdf +--- + +```{python} +#| label: setup +#| include: false + +import polars as pl +from great_tables import GT +from great_tables.data import gtcars, islands, airquality +``` + +```{python} +# | output: asis +# | echo: false + +gtcars_pl = ( + pl.from_pandas(gtcars) + .select(["mfr", "model", "hp", "trq", "mpg_c", "msrp"]) +) + +print(( + GT( + gtcars[["mfr", "model", "hp", "trq", "msrp"]].head(5), + #rowname_col="model", + #groupname_col="mfr", + ) + .tab_header( + title="Some Cars from the gtcars Dataset", + subtitle="Five Cars are shown here" + ) + .tab_spanner( + label="Make and Model", + columns=["mfr", "model"] + ) + .tab_spanner( + label="Performance", + columns=["hp", "trq", "msrp"] + ) + .tab_spanner( + label="Everything but the cost", + columns=["mfr", "model", "hp", "trq"] + ) + .fmt_integer(columns=["hp", "trq"]) + .fmt_currency(columns="msrp") + .tab_source_note("Cars are all 2015 models.") + .tab_source_note("Horsepower and Torque values are estimates.") + .tab_options( + table_width="600pt", + ) +).as_latex()) +``` + + +```{python} +# | output: asis +# | echo: false + +islands_mini = islands.head(10) + +print( + ( + GT(islands_mini) + .tab_header( + title="Large Landmasses of the World", + subtitle="The top ten largest are presented" + ) + .tab_source_note( + source_note="Source: The World Almanac and Book of Facts, 1975, page 406." + ) + .tab_source_note( + source_note="Reference: McNeil, D. R. (1977) *Interactive Data Analysis*. Wiley." + ) + #.tab_stubhead(label="landmass") + ).as_latex() +) +``` + + +```{python} +# | output: asis +# | echo: false + +airquality_mini = airquality.head(10).assign(Year = 1973) + +print( + ( + GT(airquality_mini) + .tab_header( + title="New York Air Quality Measurements", + subtitle="Daily measurements in New York City (May 1-10, 1973)" + ) + ).as_latex() +) +``` diff --git a/great_tables/_export.py b/great_tables/_export.py index 0e033dd03..94b6f25e8 100644 --- a/great_tables/_export.py +++ b/great_tables/_export.py @@ -166,6 +166,30 @@ def as_raw_html( return html_table +def as_latex(self: GT) -> str: + """ + Output a GT object as LaTeX + + Get the LaTeX content from a GT object as a string. + + Parameters + ---------- + gt + A GT object. + + Returns + ------- + str + An LaTeX fragment containing a table. + + """ + built_table = self._build_data(context="latex") + + latex_table = built_table._render_as_latex() + + return latex_table + + # Create a list of all selenium webdrivers WebDrivers: TypeAlias = Literal[ "chrome", diff --git a/great_tables/_formats.py b/great_tables/_formats.py index d061162d4..55aac5ad0 100644 --- a/great_tables/_formats.py +++ b/great_tables/_formats.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from datetime import date, datetime, time from decimal import Decimal +from functools import partial from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, TypedDict, TypeVar, cast @@ -285,66 +286,124 @@ def fmt_number( sep_mark = _get_locale_sep_mark(default=sep_mark, use_seps=use_seps, locale=locale) dec_mark = _get_locale_dec_mark(default=dec_mark, locale=locale) - # Generate a function that will operate on single `x` values in the table body - def fmt_number_fn( - x: float | None, - decimals: int = decimals, - n_sigfig: int | None = n_sigfig, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - scale_by: float = scale_by, - compact: bool = compact, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - ): - if is_na(self._tbl_data, x): - return x - - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - - # Determine whether the value is positive - is_negative = _has_negative_value(value=x) - - if compact: - x_formatted = _format_number_compactly( - value=x, + # TODO: fix type errors for members of `FormatFns` + return fmt( + self, + fns=FormatFns( + html=partial( + fmt_number_context, + data=self, decimals=decimals, n_sigfig=n_sigfig, drop_trailing_zeros=drop_trailing_zeros, drop_trailing_dec_mark=drop_trailing_dec_mark, use_seps=use_seps, + scale_by=scale_by, + compact=compact, sep_mark=sep_mark, dec_mark=dec_mark, force_sign=force_sign, - ) - else: - x_formatted = _value_to_decimal_notation( - value=x, + pattern=pattern, + context="html", + ), + latex=partial( + fmt_number_context, + data=self, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + compact=compact, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + pattern=pattern, + context="latex", + ), + default=partial( + fmt_number_context, + data=self, decimals=decimals, n_sigfig=n_sigfig, drop_trailing_zeros=drop_trailing_zeros, drop_trailing_dec_mark=drop_trailing_dec_mark, use_seps=use_seps, + scale_by=scale_by, + compact=compact, sep_mark=sep_mark, dec_mark=dec_mark, force_sign=force_sign, - ) + pattern=pattern, + context="html", + ), + ), + columns=columns, + rows=rows, + ) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) +def fmt_number_context( + x: float | None, + data: GTData, + decimals: int, + n_sigfig: int | None, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + use_seps: bool, + scale_by: float, + compact: bool, + sep_mark: str, + dec_mark: str, + force_sign: bool, + pattern: str, + context: str, +): + if is_na(data._tbl_data, x): + return x - return x_formatted + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - return fmt(self, fns=fmt_number_fn, columns=columns, rows=rows) + # Determine whether the value is positive + is_negative = _has_negative_value(value=x) + + if compact: + x_formatted = _format_number_compactly( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) + else: + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) + + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context=context) + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_integer( @@ -458,59 +517,105 @@ def fmt_integer( # Use locale-based marks if a locale ID is provided sep_mark = _get_locale_sep_mark(default=sep_mark, use_seps=use_seps, locale=locale) - # Generate a function that will operate on single `x` values in - # the table body - def fmt_integer_fn( - x: float, - scale_by: float = scale_by, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x - - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - - # Determine whether the value is positive - is_negative = _has_negative_value(value=x) - - if compact: - x_formatted = _format_number_compactly( - value=x, - decimals=0, - n_sigfig=None, - drop_trailing_zeros=False, - drop_trailing_dec_mark=True, + # TODO: fix type errors for members of `FormatFns` + return fmt( + self, + fns=FormatFns( + html=partial( + fmt_integer_context, + data=self, use_seps=use_seps, + scale_by=scale_by, + compact=compact, sep_mark=sep_mark, - dec_mark="not used", force_sign=force_sign, - ) - else: - x_formatted = _value_to_decimal_notation( - value=x, - decimals=0, - n_sigfig=None, - drop_trailing_zeros=False, - drop_trailing_dec_mark=True, + pattern=pattern, + context="html", + ), + latex=partial( + fmt_integer_context, + data=self, use_seps=use_seps, + scale_by=scale_by, + compact=compact, sep_mark=sep_mark, - dec_mark="not used", force_sign=force_sign, - ) + pattern=pattern, + context="latex", + ), + default=partial( + fmt_integer_context, + data=self, + use_seps=use_seps, + scale_by=scale_by, + compact=compact, + sep_mark=sep_mark, + force_sign=force_sign, + pattern=pattern, + context="html", + ), + ), + columns=columns, + rows=rows, + ) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) +def fmt_integer_context( + x: float | None, + data: GTData, + use_seps: bool, + scale_by: float, + compact: bool, + sep_mark: str, + force_sign: bool, + pattern: str, + context: str, +): + if is_na(data._tbl_data, x): + return x - return x_formatted + # Scale `x` value by a defined `scale_by` value + x = x * scale_by + + # Determine whether the value is positive + is_negative = _has_negative_value(value=x) + + if compact: + x_formatted = _format_number_compactly( + value=x, + decimals=0, + n_sigfig=None, + drop_trailing_zeros=False, + drop_trailing_dec_mark=True, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark="not used", + force_sign=force_sign, + ) + + else: + x_formatted = _value_to_decimal_notation( + value=x, + decimals=0, + n_sigfig=None, + drop_trailing_zeros=False, + drop_trailing_dec_mark=True, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark="not used", + force_sign=force_sign, + ) + + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context=context) + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) - return fmt(self, fns=fmt_integer_fn, columns=columns, rows=rows) + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_scientific( @@ -663,112 +768,170 @@ def fmt_scientific( sep_mark = _get_locale_sep_mark(default=sep_mark, use_seps=use_seps, locale=locale) dec_mark = _get_locale_dec_mark(default=dec_mark, locale=locale) - # Generate a function that will operate on single `x` values in the table body - def fmt_scientific_fn( - x: float, - decimals: int = decimals, - n_sigfig: int | None = n_sigfig, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - scale_by: float = scale_by, - exp_style: str = exp_style, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign_m: bool = force_sign_m, - force_sign_n: bool = force_sign_n, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + # TODO: fix type errors for members of `FormatFns` + return fmt( + self, + fns=FormatFns( + html=partial( + fmt_scientific_context, + data=self, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + scale_by=scale_by, + exp_style=exp_style, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign_m=force_sign_m, + force_sign_n=force_sign_n, + pattern=pattern, + context="html", + ), + latex=partial( + fmt_scientific_context, + data=self, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + scale_by=scale_by, + exp_style=exp_style, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign_m=force_sign_m, + force_sign_n=force_sign_n, + pattern=pattern, + context="latex", + ), + default=partial( + fmt_scientific_context, + data=self, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + scale_by=scale_by, + exp_style=exp_style, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign_m=force_sign_m, + force_sign_n=force_sign_n, + pattern=pattern, + context="html", + ), + ), + columns=columns, + rows=rows, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - # Determine whether the value is positive - is_positive = _has_positive_value(value=x) +# Generate a function that will operate on single `x` values in the table body +def fmt_scientific_context( + x: float | None, + data: GTData, + decimals: int, + n_sigfig: int | None, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + scale_by: float, + exp_style: str, + sep_mark: str, + dec_mark: str, + force_sign_m: bool, + force_sign_n: bool, + pattern: str, + context: str, +): + if is_na(data._tbl_data, x): + return x - minus_mark = _context_minus_mark() + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - x_sci_notn = _value_to_scientific_notation( - value=x, - decimals=decimals, - n_sigfig=n_sigfig, - dec_mark=dec_mark, - ) + # Determine whether the value is positive + is_positive = _has_positive_value(value=x) - sci_parts = x_sci_notn.split("E") + minus_mark = _context_minus_mark(context=context) - m_part, n_part = sci_parts + x_sci_notn = _value_to_scientific_notation( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + dec_mark=dec_mark, + ) - # Remove trailing zeros and decimal marks from the `m_part` - if drop_trailing_zeros: - m_part = m_part.rstrip("0") - if drop_trailing_dec_mark: - m_part = m_part.rstrip(".") + sci_parts = x_sci_notn.split("E") - # Force the positive sign to be present if the `force_sign_m` option is taken - if is_positive and force_sign_m: - m_part = "+" + m_part + m_part, n_part = sci_parts - if exp_style == "x10n": - # Define the exponent string based on the `exp_style` that is the default - # ('x10n'); this is styled as 'x 10^n' instead of using a fixed symbol like 'E' + # Remove trailing zeros and decimal marks from the `m_part` + if drop_trailing_zeros: + m_part = m_part.rstrip("0") + if drop_trailing_dec_mark: + m_part = m_part.rstrip(".") - # Determine which values don't require the (x 10^n) for scientific formatting - # since their order would be zero - small_pos = _has_sci_order_zero(value=x) + # Force the positive sign to be present if the `force_sign_m` option is taken + if is_positive and force_sign_m: + m_part = "+" + m_part - # Force the positive sign to be present if the `force_sign_n` option is taken - if force_sign_n and not _str_detect(n_part, "-"): - n_part = "+" + n_part + if exp_style == "x10n": + # Define the exponent string based on the `exp_style` that is the default + # ('x10n'); this is styled as 'x 10^n' instead of using a fixed symbol like 'E' - # Implement minus sign replacement for `m_part` and `n_part` - m_part = _replace_minus(m_part, minus_mark=minus_mark) - n_part = _replace_minus(n_part, minus_mark=minus_mark) + # Determine which values don't require the (x 10^n) for scientific formatting + # since their order would be zero + small_pos = _has_sci_order_zero(value=x) - if small_pos: - # If the value is small enough to not require the (x 10^n) notation, then - # the formatted value is based on only the `m_part` - x_formatted = m_part - else: - # Get the set of exponent marks, which are used to decorate the `n_part` - exp_marks = _context_exp_marks() + # Force the positive sign to be present if the `force_sign_n` option is taken + if force_sign_n and not _str_detect(n_part, "-"): + n_part = "+" + n_part - # Create the formatted string based on `exp_marks` and the two `sci_parts` - x_formatted = m_part + exp_marks[0] + n_part + exp_marks[1] + # Implement minus sign replacement for `m_part` and `n_part` + m_part = _replace_minus(m_part, minus_mark=minus_mark) + n_part = _replace_minus(n_part, minus_mark=minus_mark) + if small_pos: + # If the value is small enough to not require the (x 10^n) notation, then + # the formatted value is based on only the `m_part` + x_formatted = m_part else: - # Define the exponent string based on the `exp_style` that's not the default - # value of 'x10n' + # Get the set of exponent marks, which are used to decorate the `n_part` + exp_marks = _context_exp_marks(context=context) - exp_str = _context_exp_str(exp_style=exp_style) + # Create the formatted string based on `exp_marks` and the two `sci_parts` + x_formatted = m_part + exp_marks[0] + n_part + exp_marks[1] - n_min_width = 1 if _str_detect(exp_style, r"^[a-zA-Z]1$") else 2 + else: + # Define the exponent string based on the `exp_style` that's not the default + # value of 'x10n' - # The `n_part` will be extracted here and it must be padded to - # the defined minimum number of decimal places - if _str_detect(n_part, "-"): - n_part = _str_replace(n_part, "-", "") - n_part = n_part.ljust(n_min_width, "0") - n_part = "-" + n_part - else: - n_part = n_part.ljust(n_min_width, "0") - if force_sign_n: - n_part = "+" + n_part + exp_str = _context_exp_str(exp_style=exp_style) - # Implement minus sign replacement for `m_part` and `n_part` - m_part = _replace_minus(m_part, minus_mark=minus_mark) - n_part = _replace_minus(n_part, minus_mark=minus_mark) + n_min_width = 1 if _str_detect(exp_style, r"^[a-zA-Z]1$") else 2 - x_formatted = m_part + exp_str + n_part + # The `n_part` will be extracted here and it must be padded to + # the defined minimum number of decimal places + if _str_detect(n_part, "-"): + n_part = _str_replace(n_part, "-", "") + n_part = n_part.ljust(n_min_width, "0") + n_part = "-" + n_part + else: + n_part = n_part.ljust(n_min_width, "0") + if force_sign_n: + n_part = "+" + n_part - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Implement minus sign replacement for `m_part` and `n_part` + m_part = _replace_minus(m_part, minus_mark=minus_mark) + n_part = _replace_minus(n_part, minus_mark=minus_mark) - return x_formatted + x_formatted = m_part + exp_str + n_part + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + x_formatted = pattern.replace("{x}", x_formatted) - return fmt(self, fns=fmt_scientific_fn, columns=columns, rows=rows) + return x_formatted def fmt_percent( @@ -919,72 +1082,134 @@ def fmt_percent( else: scale_by = 1.0 - # Generate a function that will operate on single `x` values in the table body - def fmt_percent_fn( - x: float, - decimals: int = decimals, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - scale_by: float = scale_by, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - placement: str = placement, - incl_space: bool = incl_space, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + # TODO: fix type errors for members of `FormatFns` + return fmt( + self, + fns=FormatFns( + html=partial( + fmt_percent_context, + data=self, + decimals=decimals, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + context="html", + ), + latex=partial( + fmt_percent_context, + data=self, + decimals=decimals, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + context="latex", + ), + default=partial( + fmt_percent_context, + data=self, + decimals=decimals, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + context="html", + ), + ), + columns=columns, + rows=rows, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - # Determine properties of the value - is_negative = _has_negative_value(value=x) - is_positive = _has_positive_value(value=x) +def fmt_percent_context( + x: float | None, + data: GTData, + decimals: int, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + use_seps: bool, + scale_by: float, + sep_mark: str, + dec_mark: str, + force_sign: bool, + placement: str, + incl_space: bool, + pattern: str, + context: str, +): + if is_na(data._tbl_data, x): + return x - x_formatted = _value_to_decimal_notation( - value=x, - decimals=decimals, - n_sigfig=None, - drop_trailing_zeros=drop_trailing_zeros, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - # Create a percent pattern for affixing the percent sign - space_character = " " if incl_space else "" - percent_pattern = ( - f"{{x}}{space_character}%" if placement == "right" else f"%{space_character}{{x}}" - ) + # Determine properties of the value + is_negative = _has_negative_value(value=x) + is_positive = _has_positive_value(value=x) - if is_negative and placement == "left": - x_formatted = x_formatted.replace("-", "") - x_formatted = percent_pattern.replace("{x}", x_formatted) - x_formatted = "-" + x_formatted - elif is_positive and force_sign and placement == "left": - x_formatted = x_formatted.replace("+", "") - x_formatted = percent_pattern.replace("{x}", x_formatted) - x_formatted = "+" + x_formatted - else: - x_formatted = percent_pattern.replace("{x}", x_formatted) + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=None, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + # Get the context-specific percent mark + percent_mark = _context_percent_mark(context=context) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Create a percent pattern for affixing the percent sign + space_character = " " if incl_space else "" + percent_pattern = ( + f"{{x}}{space_character}{percent_mark}" + if placement == "right" + else f"{percent_mark}{space_character}{{x}}" + ) - return x_formatted + if is_negative and placement == "left": + x_formatted = x_formatted.replace("-", "") + x_formatted = percent_pattern.replace("{x}", x_formatted) + x_formatted = "-" + x_formatted + elif is_positive and force_sign and placement == "left": + x_formatted = x_formatted.replace("+", "") + x_formatted = percent_pattern.replace("{x}", x_formatted) + x_formatted = "+" + x_formatted + else: + x_formatted = percent_pattern.replace("{x}", x_formatted) - return fmt(self, fns=fmt_percent_fn, columns=columns, rows=rows) + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context="html") + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_currency( @@ -1151,79 +1376,140 @@ def fmt_currency( currency=currency_resolved, decimals=decimals, use_subunits=use_subunits ) - # Generate a function that will operate on single `x` values in the table body - def fmt_currency_fn( - x: float, - currency: str = currency_resolved, - decimals: int = decimals, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - scale_by: float = scale_by, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - placement: str = placement, - incl_space: bool = incl_space, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + # TODO: fix type errors for members of `FormatFns` + return fmt( + self, + fns=FormatFns( + html=partial( + fmt_currency_context, + data=self, + currency=currency_resolved, + decimals=decimals, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + context="html", + ), + latex=partial( + fmt_currency_context, + data=self, + currency=currency_resolved, + decimals=decimals, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + context="latex", + ), + default=partial( + fmt_currency_context, + data=self, + currency=currency_resolved, + decimals=decimals, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + context="html", + ), + ), + columns=columns, + rows=rows, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - # Determine properties of the value - is_negative = _has_negative_value(value=x) - is_positive = _has_positive_value(value=x) +# Generate a function that will operate on single `x` values in the table body +def fmt_currency_context( + x: float | None, + data: GTData, + currency: str, + decimals: int, + drop_trailing_dec_mark: bool, + use_seps: bool, + scale_by: float, + sep_mark: str, + dec_mark: str, + force_sign: bool, + placement: str, + incl_space: bool, + pattern: str, + context: str, +): + if is_na(data._tbl_data, x): + return x - # Get the currency symbol on the basis of a valid currency code - currency_symbol = _get_currency_str(currency=currency) + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - # Format the value to decimal notation; this is done before the currency symbol is - # affixed to the value - x_formatted = _value_to_decimal_notation( - value=x, - decimals=decimals, - n_sigfig=None, - drop_trailing_zeros=False, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) + # Determine properties of the value + is_negative = _has_negative_value(value=x) + is_positive = _has_positive_value(value=x) - # Create a currency pattern for affixing the currency symbol - space_character = " " if incl_space else "" - currency_pattern = ( - f"{{x}}{space_character}{currency_symbol}" - if placement == "right" - else f"{currency_symbol}{space_character}{{x}}" - ) + # Get the currency symbol on the basis of a valid currency code + currency_symbol = _get_currency_str(currency=currency) - if is_negative and placement == "left": - x_formatted = x_formatted.replace("-", "") - x_formatted = currency_pattern.replace("{x}", x_formatted) - x_formatted = "-" + x_formatted - elif is_positive and force_sign and placement == "left": - x_formatted = x_formatted.replace("+", "") - x_formatted = currency_pattern.replace("{x}", x_formatted) - x_formatted = "+" + x_formatted - else: - x_formatted = currency_pattern.replace("{x}", x_formatted) + if currency_symbol == "$": + currency_symbol = _context_dollar_mark(context=context) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + # Format the value to decimal notation; this is done before the currency symbol is + # affixed to the value + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=None, + drop_trailing_zeros=False, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Create a currency pattern for affixing the currency symbol + space_character = " " if incl_space else "" + currency_pattern = ( + f"{{x}}{space_character}{currency_symbol}" + if placement == "right" + else f"{currency_symbol}{space_character}{{x}}" + ) - return x_formatted + if is_negative and placement == "left": + x_formatted = x_formatted.replace("-", "") + x_formatted = currency_pattern.replace("{x}", x_formatted) + x_formatted = "-" + x_formatted + elif is_positive and force_sign and placement == "left": + x_formatted = x_formatted.replace("+", "") + x_formatted = currency_pattern.replace("{x}", x_formatted) + x_formatted = "+" + x_formatted + else: + x_formatted = currency_pattern.replace("{x}", x_formatted) + + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context=context) + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + x_formatted = pattern.replace("{x}", x_formatted) - return fmt(self, fns=fmt_currency_fn, columns=columns, rows=rows) + return x_formatted def fmt_roman( @@ -1558,7 +1844,7 @@ def fmt_bytes_fn( # Implement minus sign replacement for `x_formatted` if is_negative: - minus_mark = _context_minus_mark() + minus_mark = _context_minus_mark(context="html") x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) # Use a supplied pattern specification to decorate the formatted value @@ -2662,8 +2948,16 @@ def _has_sci_order_zero(value: int | float) -> bool: return (value >= 1 and value < 10) or (value <= -1 and value > -10) or value == 0 -def _context_exp_marks() -> list[str]: - return [" \u00D7 10", ""] +def _context_exp_marks(context: str) -> list[str]: + + if context == "html": + marks = [" \u00D7 10", ""] + elif context == "latex": + marks = [" $\\times$ 10\\textsuperscript{", "}"] + else: + marks = [" \u00D7 10^", ""] + + return marks def _context_exp_str(exp_style: str) -> str: @@ -2683,8 +2977,34 @@ def _context_exp_str(exp_style: str) -> str: return exp_str -def _context_minus_mark() -> str: - return "\u2212" +def _context_minus_mark(context: str) -> str: + + if context == "html": + mark = "\u2212" + else: + mark = "-" + + return mark + + +def _context_percent_mark(context: str) -> str: + + if context == "latex": + mark = "\\%" + else: + mark = "%" + + return mark + + +def _context_dollar_mark(context: str) -> str: + + if context == "latex": + mark = "\\$" + else: + mark = "$" + + return mark def _replace_minus(string: str, minus_mark: str) -> str: diff --git a/great_tables/_gt_data.py b/great_tables/_gt_data.py index 697c2404b..3d09643dc 100644 --- a/great_tables/_gt_data.py +++ b/great_tables/_gt_data.py @@ -1185,6 +1185,8 @@ class Options: # page_footer_height: OptionsInfo = OptionsInfo(False, "page", "value", "0.5in") quarto_disable_processing: OptionsInfo = OptionsInfo(False, "quarto", "logical", False) quarto_use_bootstrap: OptionsInfo = OptionsInfo(False, "quarto", "logical", False) + latex_use_longtable: OptionsInfo = OptionsInfo(False, "latex", "boolean", False) + latex_tbl_pos: OptionsInfo = OptionsInfo(False, "latex", "value", "!t") def _get_all_options_keys(self) -> list[str | None]: return [x.parameter for x in self._options.values()] diff --git a/great_tables/_helpers.py b/great_tables/_helpers.py index 0e27c82fe..434220e03 100644 --- a/great_tables/_helpers.py +++ b/great_tables/_helpers.py @@ -850,6 +850,11 @@ def to_html(self) -> str: return built_units + # TODO: add `to_latex` method + def to_latex(self) -> str: + + raise NotImplementedError("LaTeX conversion of units is not yet supported.") + def _repr_html_(self): return self.to_html() diff --git a/great_tables/_options.py b/great_tables/_options.py index e2d62101a..98abbd15a 100644 --- a/great_tables/_options.py +++ b/great_tables/_options.py @@ -164,6 +164,8 @@ def tab_options( row_striping_background_color: str | None = None, row_striping_include_stub: bool | None = None, row_striping_include_table_body: bool | None = None, + latex_use_longtable: bool | None = None, + latex_tbl_pos: str | None = None, ) -> GTSelf: """ Modify the table output options. @@ -473,6 +475,13 @@ def tab_options( An option for whether to include the stub when striping rows. row_striping_include_table_body An option for whether to include the table body when striping rows. + latex_use_longtable + An option to use the `longtable` environment in LaTeX output. This is useful for tables that + span multiple pages. + latex_tbl_pos + The position of the table in the LaTeX output. The default value is `"!t"` which means that + the table is placed at the top of the page. Other valid values are `"!b"` (bottom of the + page), `"!h"` (here), `"!p"` (on a separate page), and `"!H"` (exactly here). Returns ------- diff --git a/great_tables/_text.py b/great_tables/_text.py index 74239776a..c723093f7 100644 --- a/great_tables/_text.py +++ b/great_tables/_text.py @@ -26,24 +26,53 @@ def _md_html(x: str) -> str: return re.sub(r"^
|
\n$", "", str) -def _process_text(x: str | Text | None) -> str: +def _md_latex(x: str) -> str: + + # TODO: Implement commonmark to LaTeX conversion (through a different library as + # commonmark-py does not support it) + raise NotImplementedError("Markdown to LaTeX conversion is not supported yet") + + +def _process_text(x: str | Text | None, context: str = "html") -> str: + from great_tables._helpers import UnitStr if x is None: return "" - if isinstance(x, Md): - return _md_html(x.text) - elif isinstance(x, Html): - return x.text - elif isinstance(x, str): - return _html_escape(x) - elif isinstance(x, Text): - return x.text - elif isinstance(x, UnitStr): - return x.to_html() + if context == "html": + + if isinstance(x, Md): + return _md_html(x.text) + elif isinstance(x, Html): + return x.text + elif isinstance(x, str): + return _html_escape(x) + elif isinstance(x, Text): + return x.text + elif isinstance(x, UnitStr): + return x.to_html() + else: + raise TypeError(f"Invalid type: {type(x)}") + + elif context == "latex": + + if isinstance(x, Md): + return _md_latex(x.text) + elif isinstance(x, Html): + return x.text + elif isinstance(x, str): + return x + elif isinstance(x, Text): + return x.text + elif isinstance(x, UnitStr): + # TODO: this is currently not implemented + return x.to_latex() + else: + raise TypeError(f"Invalid type: {type(x)}") + else: - raise TypeError(f"Invalid type: {type(x)}") + raise ValueError(f"Invalid context: {context}") def _process_text_id(x: str | Text | None) -> str: diff --git a/great_tables/_utils_render_latex.py b/great_tables/_utils_render_latex.py new file mode 100644 index 000000000..e2cca28e5 --- /dev/null +++ b/great_tables/_utils_render_latex.py @@ -0,0 +1,810 @@ +from __future__ import annotations + +from itertools import chain + +import re + +from ._gt_data import GTData, GroupRowInfo +from ._tbl_data import _get_cell, cast_frame_to_string, replace_null_frame +from .quarto import check_quarto +from great_tables._spanners import spanners_print_matrix +from great_tables._utils import heading_has_subtitle, heading_has_title, seq_groups +from great_tables._utils_render_html import _get_spanners_matrix_height +from great_tables._text import _process_text + +from typing import TypedDict, List + + +LENGTH_TRANSLATIONS_TO_PX = { + "px": 1.0, + "pt": 4 / 3, + "in": 96.0, + "cm": 37.7952755906, + "emu": 1 / 9525, + "em": 16.0, +} + + +class WidthDict(TypedDict): + type: List[str] + unspec: List[int] + lw: List[float] + pt: List[float] + column_align: List[str] + tbl_width: str | None + + +def is_css_length_string(x: str) -> bool: + + # This checks if there is a number followed by an optional string (only of letters) + return re.match(r"^[0-9.]+[a-zA-Z]*$", x) is not None + + +def is_number_without_units(x: str) -> bool: + + # This check if the string is a number without any text + return re.match(r"^[0-9.]+$", x) is not None + + +def css_length_has_supported_units(x: str, no_units_valid: bool = True) -> bool: + + # Check if the the string is a valid CSS length string with a text string + + if not is_css_length_string(x): + return False + + # If the string is a number without units, we can return the value of `no_units_valid` + if is_number_without_units(x): + return no_units_valid + + units = get_units_from_length_string(x) + + return units in LENGTH_TRANSLATIONS_TO_PX.keys() + + +def get_units_from_length_string(length: str) -> str: + + # Extract the units from a string that is likely in the form of '123px' or '3.23in' in + # order to return 'px' or 'in' respectively; we'll also need to trim any whitespace and + # convert the string to lowercase + units_str = re.sub(r"[0-9.]+", "", length).strip().lower() + + if units_str == "": + return "px" + + return units_str + + +def get_px_conversion(length: str) -> float: + + input_units = get_units_from_length_string(length) + + if input_units == "px": + return 1.0 + + valid_units = list(LENGTH_TRANSLATIONS_TO_PX.keys()) + + if input_units not in valid_units: + raise ValueError(f"Invalid units: {input_units}") + + return LENGTH_TRANSLATIONS_TO_PX.get(input_units, 0.0) + + +def convert_to_px(length: str) -> float: + + # Extract the units from a string that is likely in the form of '123px' or '3.23in' + units = get_units_from_length_string(length=length) + + # Extract the numeric value from the string and convert to a float + value = float(re.sub(r"[a-zA-Z\s]", "", length)) + + # If the units are already in pixels, we can return the value as-is (w/o rounding) + if units == "px": + return value + + # Get the conversion factor for the units + # - this defaults to 1.0 if the units are 'px' + # - otherwise, it will be a value that converts the units `value` to pixels + px_conversion = get_px_conversion(length=units) + + return round(value * px_conversion) + + +def convert_to_pt(x: str) -> float: + + px_value = convert_to_px(x) + + return px_value * 3 / 4 + + +def escape_latex(text: str) -> str: + + # Replace characters in a string that's to be used in a LaTeX context + + latex_escape_regex = "[\\\\&%$#_{}~^]" + text = re.sub(latex_escape_regex, lambda match: "\\" + match.group(), text) + + return text + + +def create_width_dict_l(data: GTData) -> WidthDict: + + boxhead = data._boxhead + + # Get the table width value + tbl_width = data._options.table_width.value + + # Get list representation of stub layout + stub_layout = data._stub._get_stub_layout(options=data._options) + + n = len(boxhead) + + width_dict: WidthDict = { + "type": [boxhead[i].type.name for i in range(n)], + "unspec": [0] * n, # Ensure this is initialized as a list of integers + "lw": [0] * n, + "pt": [0] * n, + "column_align": [ + boxhead[i].column_align if boxhead[i].column_align else "" for i in range(n) + ], + } + + for i in range(n): + + raw_val = boxhead[i].column_width + + if raw_val is None or raw_val == "": + + width_dict["unspec"][i] = 1 + + continue + + elif raw_val.endswith("%"): + + pct = float(raw_val.strip("%")) + + if tbl_width == "auto": + width_dict["lw"][i] = pct / 100 + + elif tbl_width.endswith("%"): + width_dict["lw"][i] = (pct * float(tbl_width.strip("%"))) / 1e4 + + else: + width_dict["pt"][i] = (pct / 100) * convert_to_pt(tbl_width) + + if tbl_width == "auto": + + if any(x > 0 for x in width_dict["unspec"]): + + # If any of the column widths are unspecified, a table width can't be inferred + width_dict["tbl_width"] = None + + else: + pt_total = sum(width_dict["pt"]) + lw_total = sum(width_dict["lw"]) + + if pt_total <= 0: + width_dict["tbl_width"] = f"{lw_total}\\linewidth" + elif lw_total <= 0: + width_dict["tbl_width"] = f"{pt_total}pt" + else: + width_dict["tbl_width"] = f"{pt_total}pt+{lw_total}\\linewidth" + + elif tbl_width.endswith("%"): + + lw_multiple = float(tbl_width.strip("%")) / 100 + width_dict["tbl_width"] = f"{lw_multiple}\\linewidth" + + else: + + tbl_width_pt = convert_to_pt(tbl_width) + + width_dict["tbl_width"] = f"{tbl_width_pt}pt" + + return width_dict + + +def create_singlecolumn_width_text_l(pt: float, lw: float) -> str: + + if pt <= 0 and lw <= 0: + out_txt = "0pt" + elif pt <= 0: + out_txt = "\\dimexpr {:.2f}\\linewidth -2\\tabcolsep-1.5\\arrayrulewidth".format(lw) + elif lw <= 0: + out_txt = "\\dimexpr {:.2f}pt -2\\tabcolsep-1.5\\arrayrulewidth".format(pt) + else: + out_txt = "\\dimexpr {:.2f}pt + {:.2f}\\linewidth -2\\tabcolsep-1.5\\arrayrulewidth".format( + pt, lw + ) + + return out_txt + + +def calculate_multicolumn_width_text_l(begins: list[str], ends: list[str], width_dict: WidthDict): + pass + + +def latex_heading_row(content: list[str]) -> str: + + return "".join([" & ".join(content) + " \\\\ \n", "\\midrule\\addlinespace[2.5pt]"]) + + +def consolidate_cell_styles_l(): + pass + + +def create_table_start_l(data: GTData, width_dict: WidthDict) -> str: + + # Get list representation of stub layout + stub_layout = data._stub._get_stub_layout(options=data._options) + + # Is the longtable environment being used? + latex_use_longtable = data._options.latex_use_longtable.value + + # Extract only visible columns of `colwidth_df` based on stub_layout + types = ["default"] + + if "rowname" in stub_layout: + types.append("stub") + + if "group_label" in stub_layout: + types.append("row_group") + + # Get the `tbl_width` value from `width_dict` as a local variable + table_width = width_dict.get("tbl_width", None) + + # Remove the `tbl_width` key from `width_dict` without using `pop()` + width_dict = WidthDict({k: v for k, v in width_dict.items() if k != "tbl_width"}) + + # Get indices of the types in `types` that are in the `type` key of `width_dict` + width_dict_visible_idx = [i for i, v in enumerate(width_dict["type"]) if v in types] + + # Filter the `width_dict` dict entries based on the indices in `width_dict_visible_idx` + width_dict_visible = {k: [width_dict[k][i] for i in width_dict_visible_idx] for k in width_dict} + + # Ensure that the `width_dict_visible` entries are sorted such that the + # `"row_group"` entry is first (only if it's located in the stub), then `"stub"`, + # and then everything else + if "stub" in width_dict_visible["type"]: + + stub_idx = width_dict_visible["type"].index("stub") + othr_idx = [i for i in range(len(width_dict_visible["type"])) if i != stub_idx] + width_dict_visible["type"] = ["row_group", "stub"] + width_dict_visible["type"][othr_idx] + + if "row_group" in width_dict_visible["type"]: + + row_group_idx = width_dict_visible["type"].index("row_group") + othr_idx = [i for i in range(len(width_dict_visible["type"])) if i != row_group_idx] + width_dict_visible["type"] = ["row_group"] + width_dict_visible["type"][othr_idx] + + # Determine if there are any footnotes or source notes; if any, + # add a `\setlength` command that will pull up the minipage environment + # for the footnotes block + + source_notes = data._source_notes + + if len(source_notes) > 0: + + longtable_post_length = "\\setlength{\\LTpost}{0mm}\n" + + else: + + longtable_post_length = "" + + # Generate the column definitions for visible columns + # these can either be simple `l`, `c`, `r` directive if a width isn't set + # for a column, or, use `p{