hwdb: PNP/ACPI lists on uefi.org are now in CSV format

Adjust the parsing as it's no longer HTML files. Some IDs end with whitespace, without being quoted, which seems like a mistake as they weren't before, so strip the ID columns before applying them.
author: Luca Boccassi <bluca@debian.org> 2023-11-14 20:46:12 +0000
committer: Luca Boccassi <bluca@debian.org> 2023-11-14 21:07:08 +0000
commit: 9a86f0841200218a5fe9b9aca676d01f22c983a1 (patch)
tree: de26ed5dc5068796e0cede1bc0e04f7914fcfdb0
parent: docs/RELEASE.md: retain systemd.io in IRC topic update (diff)
download: systemd-9a86f0841200218a5fe9b9aca676d01f22c983a1.tar.xz
systemd-9a86f0841200218a5fe9b9aca676d01f22c983a1.zip
1 files changed, 15 insertions, 63 deletions
diff --git a/hwdb.d/acpi-update.py b/hwdb.d/acpi-update.py
index e669d0d21f1..f65147c91d4 100755
--- a/hwdb.d/acpi-update.py
+++ b/hwdb.d/acpi-update.py
@@ -1,74 +1,26 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: LGPL-2.1-or-later
 
-from html.parser import HTMLParser
+from csv import reader
 from enum import Enum
 
-class State(Enum):
-    NOWHERE = 0
-    COMPANY = 1
-    AFTER_COMPANY = 2
-    PNPID = 3
-    AFTER_PNPID = 4
-    DATE = 5
-
-class PNPTableParser(HTMLParser):
-
-    def __init__(self):
-        HTMLParser.__init__(self)
-        self.state = State.NOWHERE
-        self.data = ""
-        self.pnpid = None
-        self.company = None
-        self.table = []
-
-    def handle_starttag(self, tag, attrs):
-
-        if tag == "td":
-            if self.state == State.NOWHERE:
-                self.state = State.COMPANY
-            elif self.state == State.AFTER_COMPANY:
-                self.state = State.PNPID
-            elif self.state == State.AFTER_PNPID:
-                self.state = State.DATE
-            else:
-                raise ValueError
-
-            self.data = ""
-
-    def handle_endtag(self, tag):
-
-        if tag == "td":
-            if self.state == State.COMPANY:
-                self.company = ' '.join(self.data.strip().split())
-                self.state = State.AFTER_COMPANY
-            elif self.state == State.PNPID:
-                self.pnpid = self.data.strip()
-                self.state = State.AFTER_PNPID
-                self.table.append((self.pnpid, self.company))
-            elif self.state == State.DATE:
-                self.state = State.NOWHERE
-            else:
-                raise ValueError
-
-    def handle_data(self, data):
-        self.data += data
-
 def read_table(a):
 
-    parser = PNPTableParser()
-
-    for line in a:
-        parser.feed(line)
+    table = []
 
-    parser.close()
-    parser.table.sort()
+    with open(a, newline='') as csvfile:
+        for row in reader(csvfile):
+            if row[0] == "Company":
+                # Skip header
+                continue
+            table.append(row)
 
-    for pnpid, company in parser.table:
-        print("\nacpi:{0}*:\n ID_VENDOR_FROM_DATABASE={1}".format(pnpid, company))
+    table.sort(key=lambda x: x[1])
 
-a = open("acpi_id_registry.html")
-b = open("pnp_id_registry.html")
+    for row in table:
+        # Some IDs end with whitespace, while they didn't in the old HTML table, so it's probably
+        # a mistake, strip it.
+        print("\nacpi:{0}*:\n ID_VENDOR_FROM_DATABASE={1}".format(row[1].strip(), row[0].strip()))
 
 print('# This file is part of systemd.\n'
       '#\n'
@@ -76,5 +28,5 @@ print('# This file is part of systemd.\n'
       '#     https://uefi.org/uefi-pnp-export\n'
       '#     https://uefi.org/uefi-acpi-export')
 
-read_table(a)
-read_table(b)
+read_table("acpi_id_registry.html")
+read_table("pnp_id_registry.html")
author	Luca Boccassi <bluca@debian.org>	2023-11-14 20:46:12 +0000
committer	Luca Boccassi <bluca@debian.org>	2023-11-14 21:07:08 +0000
commit	9a86f0841200218a5fe9b9aca676d01f22c983a1 (patch)
tree	de26ed5dc5068796e0cede1bc0e04f7914fcfdb0
parent	docs/RELEASE.md: retain systemd.io in IRC topic update (diff)
download	systemd-9a86f0841200218a5fe9b9aca676d01f22c983a1.tar.xz systemd-9a86f0841200218a5fe9b9aca676d01f22c983a1.zip