[PATCH 3/9] ansi2html: optimize lines without any control characters

Paolo Bonzini posted 9 patches 7 years, 2 months ago
[PATCH 3/9] ansi2html: optimize lines without any control characters
Posted by Paolo Bonzini 7 years, 2 months ago
Store the initial part of the line, up to the first control characteer,
in a string.  If it is followed immediately by \n, \r\n or \f, it can
be printed without going through the cursor tracking logic.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 patchew/logviewer.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/patchew/logviewer.py b/patchew/logviewer.py
index 1e3495b..4523d1c 100644
--- a/patchew/logviewer.py
+++ b/patchew/logviewer.py
@@ -44,6 +44,8 @@ class ANSI2HTMLConverter(object):
     def _reset(self):
         self.line = []
         self.pos = 0
+        self.lazy_contents = ''
+        self.lazy_accumulate = True
 
     # self.line holds the characters for the current line.
     # Writing can overwrite some characters if self.pos is
@@ -51,6 +53,8 @@ class ANSI2HTMLConverter(object):
     # Moving the cursor right can add spaces to the end.
 
     def _write(self, chars):
+        assert not self.lazy_accumulate or self.lazy_contents == ''
+        self.lazy_accumulate = False
         cur_len = len(self.line)
         if self.pos < cur_len:
             last = min(cur_len - self.pos, len(chars))
@@ -65,6 +69,8 @@ class ANSI2HTMLConverter(object):
     def _set_pos(self, pos):
         self.pos = pos
         if self.pos > len(self.line):
+            assert not self.lazy_accumulate or self.lazy_contents == ''
+            self.lazy_accumulate = False
             num = self.pos - len(self.line)
             self.line += [' '] * num
 
@@ -77,6 +83,14 @@ class ANSI2HTMLConverter(object):
         yield self.RE_ENTITIES.sub(lambda x: self.ENTITIES[x.group(0)], text)
 
     def _write_line(self, suffix):
+        # If the line consists of a single string of text without no escapes
+        # or control chararcters, convert() special cases it
+        if self.lazy_contents != '':
+            yield from self._write_span(self.lazy_contents)
+            yield suffix
+            self._reset()
+            return
+
         text = "".join(self.line)
         yield from self._write_span(text)
         yield suffix
@@ -86,14 +100,27 @@ class ANSI2HTMLConverter(object):
         yield from self._write_prefix()
         for m in self.RE.finditer(input):
             if m.group(1):
-                self._write(m.group(1))
+                if self.lazy_accumulate:
+                    self.lazy_contents += m.group(1)
+                else:
+                    self._write(m.group(1))
             else:
                 seq = m.group(2)
+                # _write_line can deal with lazy storage.  Everything else
+                # must be flushed to self.line with _write.
                 if seq == '\n':
                     yield from self._write_line('\n')
+                    continue
                 elif seq == '\f':
                     yield from self._write_line('\n<hr>')
-                elif seq == '\b':
+                    continue
+
+                if self.lazy_contents != '':
+                    content = self.lazy_contents
+                    self.lazy_contents = ''
+                    self._write(content)
+
+                if seq == '\b':
                     if self.pos > 0:
                         self.pos -= 1
                 elif seq == '\t':
-- 
2.14.3


[patchew-devel] Re: [PATCH 3/9] ansi2html: optimize lines without any control characters
Posted by Fam Zheng 7 years, 2 months ago
On Mon, 02/26 12:27, Paolo Bonzini wrote:
> Store the initial part of the line, up to the first control characteer,
> in a string.  If it is followed immediately by \n, \r\n or \f, it can
> be printed without going through the cursor tracking logic.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  patchew/logviewer.py | 31 +++++++++++++++++++++++++++++--
>  1 file changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/patchew/logviewer.py b/patchew/logviewer.py
> index 1e3495b..4523d1c 100644
> --- a/patchew/logviewer.py
> +++ b/patchew/logviewer.py
> @@ -44,6 +44,8 @@ class ANSI2HTMLConverter(object):
>      def _reset(self):
>          self.line = []
>          self.pos = 0
> +        self.lazy_contents = ''
> +        self.lazy_accumulate = True
>  
>      # self.line holds the characters for the current line.
>      # Writing can overwrite some characters if self.pos is
> @@ -51,6 +53,8 @@ class ANSI2HTMLConverter(object):
>      # Moving the cursor right can add spaces to the end.
>  
>      def _write(self, chars):
> +        assert not self.lazy_accumulate or self.lazy_contents == ''
> +        self.lazy_accumulate = False
>          cur_len = len(self.line)
>          if self.pos < cur_len:
>              last = min(cur_len - self.pos, len(chars))
> @@ -65,6 +69,8 @@ class ANSI2HTMLConverter(object):
>      def _set_pos(self, pos):
>          self.pos = pos
>          if self.pos > len(self.line):
> +            assert not self.lazy_accumulate or self.lazy_contents == ''
> +            self.lazy_accumulate = False
>              num = self.pos - len(self.line)
>              self.line += [' '] * num
>  
> @@ -77,6 +83,14 @@ class ANSI2HTMLConverter(object):
>          yield self.RE_ENTITIES.sub(lambda x: self.ENTITIES[x.group(0)], text)
>  
>      def _write_line(self, suffix):
> +        # If the line consists of a single string of text without no escapes

"with no escapes"?

> +        # or control chararcters, convert() special cases it

What do you mean by "convert()?

> +        if self.lazy_contents != '':
> +            yield from self._write_span(self.lazy_contents)
> +            yield suffix
> +            self._reset()
> +            return
> +
>          text = "".join(self.line)
>          yield from self._write_span(text)
>          yield suffix
> @@ -86,14 +100,27 @@ class ANSI2HTMLConverter(object):
>          yield from self._write_prefix()
>          for m in self.RE.finditer(input):
>              if m.group(1):
> -                self._write(m.group(1))
> +                if self.lazy_accumulate:
> +                    self.lazy_contents += m.group(1)
> +                else:
> +                    self._write(m.group(1))
>              else:
>                  seq = m.group(2)
> +                # _write_line can deal with lazy storage.  Everything else
> +                # must be flushed to self.line with _write.
>                  if seq == '\n':
>                      yield from self._write_line('\n')
> +                    continue
>                  elif seq == '\f':
>                      yield from self._write_line('\n<hr>')
> -                elif seq == '\b':
> +                    continue
> +
> +                if self.lazy_contents != '':
> +                    content = self.lazy_contents
> +                    self.lazy_contents = ''
> +                    self._write(content)
> +
> +                if seq == '\b':
>                      if self.pos > 0:
>                          self.pos -= 1
>                  elif seq == '\t':
> -- 
> 2.14.3
> 
> 

Fam

[patchew-devel] Re: [PATCH 3/9] ansi2html: optimize lines without any control characters
Posted by Paolo Bonzini 7 years, 2 months ago
On 01/03/2018 07:33, Fam Zheng wrote:
> On Mon, 02/26 12:27, Paolo Bonzini wrote:
>> Store the initial part of the line, up to the first control characteer,
>> in a string.  If it is followed immediately by \n, \r\n or \f, it can
>> be printed without going through the cursor tracking logic.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  patchew/logviewer.py | 31 +++++++++++++++++++++++++++++--
>>  1 file changed, 29 insertions(+), 2 deletions(-)
>>
>> diff --git a/patchew/logviewer.py b/patchew/logviewer.py
>> index 1e3495b..4523d1c 100644
>> --- a/patchew/logviewer.py
>> +++ b/patchew/logviewer.py
>> @@ -44,6 +44,8 @@ class ANSI2HTMLConverter(object):
>>      def _reset(self):
>>          self.line = []
>>          self.pos = 0
>> +        self.lazy_contents = ''
>> +        self.lazy_accumulate = True
>>  
>>      # self.line holds the characters for the current line.
>>      # Writing can overwrite some characters if self.pos is
>> @@ -51,6 +53,8 @@ class ANSI2HTMLConverter(object):
>>      # Moving the cursor right can add spaces to the end.
>>  
>>      def _write(self, chars):
>> +        assert not self.lazy_accumulate or self.lazy_contents == ''
>> +        self.lazy_accumulate = False
>>          cur_len = len(self.line)
>>          if self.pos < cur_len:
>>              last = min(cur_len - self.pos, len(chars))
>> @@ -65,6 +69,8 @@ class ANSI2HTMLConverter(object):
>>      def _set_pos(self, pos):
>>          self.pos = pos
>>          if self.pos > len(self.line):
>> +            assert not self.lazy_accumulate or self.lazy_contents == ''
>> +            self.lazy_accumulate = False
>>              num = self.pos - len(self.line)
>>              self.line += [' '] * num
>>  
>> @@ -77,6 +83,14 @@ class ANSI2HTMLConverter(object):
>>          yield self.RE_ENTITIES.sub(lambda x: self.ENTITIES[x.group(0)], text)
>>  
>>      def _write_line(self, suffix):
>> +        # If the line consists of a single string of text without no escapes
> 
> "with no escapes"?

Yes, fixed.

>> +        # or control chararcters, convert() special cases it
> 
> What do you mean by "convert()?

The convert() method in ANSI2HTMLConverter.

Paolo