ref: 8f43d324d12431d297ca2f8136b221a58ff52269
parent: b070264521386666dd0cbbedec469c84b26489ba
author: Alexei Podtelezhnikov <apodtele@gmail.com>
date: Tue May 11 20:04:59 EDT 2021
[smooth] Faster bitmap sweeping. Selecting the fill rule or checking the direct mode each time we call `gray_hline' is sub-optimal. This effectively splits the direct mode into a separate code path while inlining `gray_hline' and saving 5-7% of rendering time. * src/smooth/ftgrays.c (gray_hline): Eliminated in favor of... (FT_FILL_RULE, FT_GRAY_SET): ... these new macros... (gray_sweep): ... inlined here. (gray_sweep_direct): New function that handles the direct span buffer. (gray_TWorker): Remove the span buffer. (gray_raster_render, gray_convert_glyph): Updated.
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2021-05-07 Alexei Podtelezhnikov <apodtele@gmail.com>
+
+ [smooth] Faster bitmap sweeping.
+
+ Selecting the fill rule or checking the direct mode each time we call
+ `gray_hline' is sub-optimal. This effectively splits the direct mode
+ into a separate code path while inlining `gray_hline' and saving 5-7%
+ of rendering time.
+
+ * src/smooth/ftgrays.c (gray_hline): Eliminated in favor of...
+ (FT_FILL_RULE, FT_GRAY_SET): ... these new macros...
+ (gray_sweep): ... inlined here.
+ (gray_sweep_direct): New function that handles the direct span buffer.
+ (gray_TWorker): Remove the span buffer.
+ (gray_raster_render, gray_convert_glyph): Updated.
+
2021-05-10 Alexei Podtelezhnikov <apodtele@gmail.com>
* src/smooth/ftgrays.c (gray_hline): Simplify even-odd computations.
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -392,6 +392,42 @@
( sizeof( long ) * FT_CHAR_BIT - PIXEL_BITS ) )
+ /* Scale area and apply fill rule to calculate the coverage byte. */
+ /* The top fill bit is used for the non-zero rule. The eighth */
+ /* fill bit is used for the even-odd rule. The higher coverage */
+ /* bytes are either clamped for the non-zero-rule or discarded */
+ /* later for the even-odd rule. */
+#define FT_FILL_RULE( coverage, area, fill ) \
+ FT_BEGIN_STMNT \
+ coverage = (int)( area >> ( PIXEL_BITS * 2 + 1 - 8 ) ); \
+ if ( coverage & fill ) \
+ coverage = ~coverage; \
+ if ( coverage > 255 && fill & INT_MIN ) \
+ coverage = 255; \
+ FT_END_STMNT
+
+
+ /* It is faster to write small spans byte-by-byte than calling */
+ /* `memset'. This is mainly due to the cost of the function call. */
+#define FT_GRAY_SET( d, s, count ) \
+ FT_BEGIN_STMNT \
+ unsigned char* q = d; \
+ unsigned char c = (unsigned char)s; \
+ switch ( count ) \
+ { \
+ case 7: *q++ = c; /* fall through */ \
+ case 6: *q++ = c; /* fall through */ \
+ case 5: *q++ = c; /* fall through */ \
+ case 4: *q++ = c; /* fall through */ \
+ case 3: *q++ = c; /* fall through */ \
+ case 2: *q++ = c; /* fall through */ \
+ case 1: *q = c; /* fall through */ \
+ case 0: break; \
+ default: FT_MEM_SET( d, s, count ); \
+ } \
+ FT_END_STMNT
+
+
/**************************************************************************
*
* TYPE DEFINITIONS
@@ -463,8 +499,6 @@
FT_Raster_Span_Func render_span;
void* render_span_data;
- FT_Span spans[FT_MAX_GRAY_SPANS];
- int num_spans;
} gray_TWorker, *gray_PWorker;
@@ -1171,84 +1205,47 @@
static void
- gray_hline( RAS_ARG_ TCoord x,
- TCoord y,
- TArea coverage,
- TCoord acount )
+ gray_sweep( RAS_ARG )
{
- /* scale the coverage from 0..(ONE_PIXEL*ONE_PIXEL*2) to 0..256 */
- coverage >>= PIXEL_BITS * 2 + 1 - 8;
+ int fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN;
+ int coverage;
+ int y;
- /* compute the line's coverage depending on the outline fill rule */
- if ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL )
- {
- if ( coverage & 256 ) /* odd bit */
- coverage = ~coverage;
- /* higher bits discarded below */
- }
- else /* default non-zero winding rule */
+ for ( y = ras.min_ey; y < ras.max_ey; y++ )
{
- if ( coverage < 0 )
- coverage = ~coverage; /* the same as -coverage - 1 */
+ PCell cell = ras.ycells[y - ras.min_ey];
+ TCoord x = ras.min_ex;
+ TArea cover = 0;
+ TArea area;
- if ( coverage >= 256 )
- coverage = 255;
- }
+ unsigned char* line = ras.target.origin - ras.target.pitch * y;
- if ( ras.num_spans >= 0 ) /* for FT_RASTER_FLAG_DIRECT only */
- {
- FT_Span* span = ras.spans + ras.num_spans++;
+ for ( ; cell != NULL; cell = cell->next )
+ {
+ if ( cover != 0 && cell->x > x )
+ {
+ FT_FILL_RULE( coverage, cover, fill );
+ FT_GRAY_SET( line + x, coverage, cell->x - x );
+ }
- span->x = (short)x;
- span->len = (unsigned short)acount;
- span->coverage = (unsigned char)coverage;
+ cover += (TArea)cell->cover * ( ONE_PIXEL * 2 );
+ area = cover - cell->area;
- if ( ras.num_spans == FT_MAX_GRAY_SPANS )
- {
- /* flush the span buffer and reset the count */
- ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data );
- ras.num_spans = 0;
+ if ( area != 0 && cell->x >= ras.min_ex )
+ {
+ FT_FILL_RULE( coverage, area, fill );
+ line[cell->x] = (unsigned char)coverage;
+ }
+
+ x = cell->x + 1;
}
- }
- else
- {
- unsigned char* q = ras.target.origin - ras.target.pitch * y + x;
- unsigned char c = (unsigned char)coverage;
-
- /* For small-spans it is faster to do it by ourselves than
- * calling `memset'. This is mainly due to the cost of the
- * function call.
- */
- switch ( acount )
+ if ( cover != 0 ) /* only if cropped */
{
- case 7:
- *q++ = c;
- /* fall through */
- case 6:
- *q++ = c;
- /* fall through */
- case 5:
- *q++ = c;
- /* fall through */
- case 4:
- *q++ = c;
- /* fall through */
- case 3:
- *q++ = c;
- /* fall through */
- case 2:
- *q++ = c;
- /* fall through */
- case 1:
- *q = c;
- /* fall through */
- case 0:
- break;
- default:
- FT_MEM_SET( q, c, acount );
+ FT_FILL_RULE( coverage, cover, fill );
+ FT_GRAY_SET( line + x, coverage, ras.max_ex - x );
}
}
}
@@ -1255,11 +1252,16 @@
static void
- gray_sweep( RAS_ARG )
+ gray_sweep_direct( RAS_ARG )
{
+ int fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN;
+ int coverage;
int y;
+ FT_Span span[FT_MAX_GRAY_SPANS];
+ int n = 0;
+
for ( y = ras.min_ey; y < ras.max_ey; y++ )
{
PCell cell = ras.ycells[y - ras.min_ey];
@@ -1271,25 +1273,59 @@
for ( ; cell != NULL; cell = cell->next )
{
if ( cover != 0 && cell->x > x )
- gray_hline( RAS_VAR_ x, y, cover, cell->x - x );
+ {
+ FT_FILL_RULE( coverage, cover, fill );
+ span[n].coverage = (unsigned char)coverage;
+ span[n].x = (short)x;
+ span[n].len = (unsigned short)( cell->x - x );
+
+ if ( ++n == FT_MAX_GRAY_SPANS )
+ {
+ /* flush the span buffer and reset the count */
+ ras.render_span( y, n, span, ras.render_span_data );
+ n = 0;
+ }
+ }
+
cover += (TArea)cell->cover * ( ONE_PIXEL * 2 );
area = cover - cell->area;
if ( area != 0 && cell->x >= ras.min_ex )
- gray_hline( RAS_VAR_ cell->x, y, area, 1 );
+ {
+ FT_FILL_RULE( coverage, area, fill );
+ span[n].coverage = (unsigned char)coverage;
+ span[n].x = (short)cell->x;
+ span[n].len = 1;
+
+ if ( ++n == FT_MAX_GRAY_SPANS )
+ {
+ /* flush the span buffer and reset the count */
+ ras.render_span( y, n, span, ras.render_span_data );
+ n = 0;
+ }
+ }
+
x = cell->x + 1;
}
- if ( cover != 0 )
- gray_hline( RAS_VAR_ x, y, cover, ras.max_ex - x );
+ if ( cover != 0 ) /* only if cropped */
+ {
+ FT_FILL_RULE( coverage, cover, fill );
- if ( ras.num_spans > 0 ) /* for FT_RASTER_FLAG_DIRECT only */
+ span[n].coverage = (unsigned char)coverage;
+ span[n].x = (short)x;
+ span[n].len = (unsigned short)( ras.max_ex - x );
+
+ ++n;
+ }
+
+ if ( n )
{
/* flush the span buffer and reset the count */
- ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data );
- ras.num_spans = 0;
+ ras.render_span( y, n, span, ras.render_span_data );
+ n = 0;
}
}
}
@@ -1688,7 +1724,10 @@
if ( !error )
{
- gray_sweep( RAS_VAR );
+ if ( ras.render_span ) /* for FT_RASTER_FLAG_DIRECT only */
+ gray_sweep_direct( RAS_VAR );
+ else
+ gray_sweep( RAS_VAR );
band--;
continue;
}
@@ -1757,7 +1796,6 @@
ras.render_span = (FT_Raster_Span_Func)params->gray_spans;
ras.render_span_data = params->user;
- ras.num_spans = 0;
ras.min_ex = params->clip_box.xMin;
ras.min_ey = params->clip_box.yMin;
@@ -1787,7 +1825,6 @@
ras.render_span = (FT_Raster_Span_Func)NULL;
ras.render_span_data = NULL;
- ras.num_spans = -1; /* invalid */
ras.min_ex = 0;
ras.min_ey = 0;